当前位置:网站首页>Pytorch model
Pytorch model
2022-06-28 13:23:00 【Gu_ NN】
Catalog
Basic model definition method
pytorch Provided in nn.Sequential()、nn.ModuleList() as well as nn.ModuleDict() For integrating multiple Module, Complete the model construction . The similarities and differences are as follows :
Sequential() | ModuleList() /ModuleDict() |
---|---|
Build the network directly , The definition order is the model connection order | List/Dict The order of elements in the network does not represent the order of their real positions in the network , need forward Function to specify the connection order of each layer |
External input cannot be added to the model | When the information of the previous layer is required in the middle of the model , such as ResNets Residual calculation in , It's more convenient |
adopt nn.Sequential()
# Method 1 :
import torch.nn as nn
net = nn.Sequential(
nn.Linear(784, 256),
nn.ReLU(),
nn.Linear(256, 10),
)
# Method 2 :
import collections
net2 = nn.Sequential(collections.OrderedDict([
('fc1', nn.Linear(784, 256)),
('relu1', nn.ReLU()),
('fc2', nn.Linear(256, 10))
]))
adopt nn.ModuleList()/nn.ModuleDict()
# List
class model(nn.Module):
def __init__(self):
super().__init__()
self.modulelist = nn.ModuleList([nn.Linear(784, 256), nn.ReLU(),nn.Linear(256, 10)])
def forward(self, x):
for layer in self.modulelist:
x = layer(x)
return x
# Dict
class model(nn.Module):
def __init__(self):
super().__init__()
self.moduledict = nn.ModuleDict({
'linear': nn.Linear(784, 256),
'act': nn.ReLU(),
'output':nn.Linear(256, 10)
})
def forward(self, x):
for layer in self.moduledict:
x = layer(x)
return x
Complex model building methods
For large complex models , You can block the model first , Then the model is built . With U-Net The model, for example .
The picture above shows U-Net Network structure , It can be divided into the following four modules :
- Two convolutions inside each sub block (Double Convolution)
- Downsampling connections between model blocks on the left , That is, maximize pooling (Max pooling)
- Upsampling connections between model blocks on the right (Up sampling)
- Processing of output layer
Module building
import torch
import torch.nn as nn
import torch.nn.functional as F
class DoubleConv(nn.Module):
"""(convolution => [BN] => ReLU) * 2"""
def __init__(self, in_channels, out_channels, mid_channels=None):
super().__init__()
if not mid_channels:
mid_channels = out_channels
self.double_conv = nn.Sequential(
nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(mid_channels),
nn.ReLU(inplace=True),
nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
)
def forward(self, x):
return self.double_conv(x)
class Down(nn.Module):
"""Downscaling with maxpool then double conv"""
def __init__(self, in_channels, out_channels):
super().__init__()
self.maxpool_conv = nn.Sequential(
nn.MaxPool2d(2),
DoubleConv(in_channels, out_channels)
)
def forward(self, x):
return self.maxpool_conv(x)
class Up(nn.Module):
"""Upscaling then double conv"""
def __init__(self, in_channels, out_channels, bilinear=True):
super().__init__()
# if bilinear, use the normal convolutions to reduce the number of channels
if bilinear:
self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
else:
self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
self.conv = DoubleConv(in_channels, out_channels)
def forward(self, x1, x2):
x1 = self.up(x1)
# input is CHW
diffY = x2.size()[2] - x1.size()[2]
diffX = x2.size()[3] - x1.size()[3]
x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
diffY // 2, diffY - diffY // 2])
x = torch.cat([x2, x1], dim=1)
return self.conv(x)
class OutConv(nn.Module):
def __init__(self, in_channels, out_channels):
super(OutConv, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
def forward(self, x):
return self.conv(x)
Model assembly
class UNet(nn.Module):
def __init__(self, n_channels, n_classes, bilinear=True):
super(UNet, self).__init__()
self.n_channels = n_channels
self.n_classes = n_classes
self.bilinear = bilinear
self.inc = DoubleConv(n_channels, 64)
self.down1 = Down(64, 128)
self.down2 = Down(128, 256)
self.down3 = Down(256, 512)
factor = 2 if bilinear else 1
self.down4 = Down(512, 1024 // factor)
self.up1 = Up(1024, 512 // factor, bilinear)
self.up2 = Up(512, 256 // factor, bilinear)
self.up3 = Up(256, 128 // factor, bilinear)
self.up4 = Up(128, 64, bilinear)
self.outc = OutConv(64, n_classes)
def forward(self, x):
x1 = self.inc(x)
x2 = self.down1(x1)
x3 = self.down2(x2)
x4 = self.down3(x3)
x5 = self.down4(x4)
x = self.up1(x5, x4)
x = self.up2(x, x3)
x = self.up3(x, x2)
x = self.up4(x, x1)
logits = self.outc(x)
return logits
Modification of existing models
Replace a layer
import torchvision.models as models
net = models.resnet50()
print(net)
# Replace with fc layer
from collections import OrderedDict
classifier = nn.Sequential(OrderedDict([('fc1', nn.Linear(2048, 128)),
('relu1', nn.ReLU()),
('dropout1',nn.Dropout(0.5)),
('fc2', nn.Linear(128, 10)),
('output', nn.Softmax(dim=1))
]))
# This sentence is directly used to define classifier Replace the original fc layer
net.fc = classifier
Add input variables
# Define model modifications
class Model(nn.Module):
def __init__(self, net):
super(Model, self).__init__()
# Original network structure
self.net = net
# First the 2048 Dimensional tensor By activating the function layer
self.relu = nn.ReLU()
# dropout layer
self.dropout = nn.Dropout(0.5)
# The full connection layer maps to the specified output dimension 10
self.fc_add = nn.Linear(1001, 10, bias=True)
self.output = nn.Softmax(dim=1)
def forward(self, x, add_variable):
x = self.net(x)
# In active layer 、dropout After layer, it is spliced with external input variables
x = torch.cat((self.dropout(self.relu(x)), add_variable.unsqueeze(1)),1) #unsqueeze The operation is to communicate with net Output tensor Keep dimensions consistent , Commonly used in add_variable Is a single value (scalar) The situation of
x = self.fc_add(x)
x = self.output(x)
return x
# Instantiation
model = Model(net).cuda()
# Training
outputs = model(inputs, add_var)
Add output variables
class Model(nn.Module):
def __init__(self, net):
super(Model, self).__init__()
self.net = net
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.5)
self.fc1 = nn.Linear(1000, 10, bias=True)
self.output = nn.Softmax(dim=1)
def forward(self, x, add_variable):
x1000 = self.net(x)
x10 = self.dropout(self.relu(x1000))
x10 = self.fc1(x10)
x10 = self.output(x10)
return x10, x1000 # Increase output
model = Model(net).cuda()
out10, out1000 = model(inputs, add_var)
Model preservation 、 load
PyTorch The storage model mainly adopts pkl,pt,pth Three formats .
PyTorch The model mainly consists of two parts : Model structure and The weight .
- Model :nn.Module Class
- The weight : Dictionaries (key It's the layer name ,value It's the weight vector ).
Storage can also be divided into two forms :
- Storage model structure + The weight
- Store only weights
from torchvision import models
model = models.resnet152(pretrained=True)
# Save the entire model
torch.save(model, save_dir)
# Save model weights
torch.save(model.state_dict, save_dir)
When there is more GPU There will be a single card for storage and reading in parallel 、 Multi card situation , The multi card stored procedure has more names than the single card module Field , Therefore, when multiple cards are stored , Model loading will be more complicated .
preservation
Single card storage
import os
import torch
from torchvision import models
os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Replace here with the desired GPU Number
model = models.resnet152(pretrained=True)
model.cuda()
# Save the entire model
torch.save(model, save_dir)
# Save model weights
torch.save(model.state_dict(), save_dir)
Multi card storage
use nn.DataParallel Function for distributed training settings
import os
import torch
from torchvision import models
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2' # Replace here with the desired GPU Number
model = models.resnet152(pretrained=True)
model = nn.DataParallel(model).cuda()
# Save the entire model
torch.save(model, save_dir)
# Save model weights
torch.save(model.state_dict(), save_dir)
load
Single card loading
- Save the model with a single card
import os
import torch
from torchvision import models
os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Replace here with the desired GPU Number
# Read the whole model
loaded_model = torch.load(save_dir)
loaded_model.cuda()
# Read model weights
loaded_dict = torch.load(save_dir)
loaded_model = models.resnet152() # Note that the model structure needs to be defined here
loaded_model.state_dict = loaded_dict
loaded_model.cuda()
- Multi card save model
import os
import torch
from torchvision import models
os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Replace here with the desired GPU Number
# Read the whole model
loaded_model = torch.load(save_dir)
loaded_model = loaded_model.module # The difference
# Read model weights ( recommend )
loaded_dict = torch.load(save_dir)
loaded_model = models.resnet152() # Note that the model structure needs to be defined here
loaded_model = nn.DataParallel(loaded_model).cuda() # The difference
loaded_model.state_dict = loaded_dict
# Read model weights ( Other methods 1)
from collections import OrderedDict
loaded_dict = torch.load(save_dir)
# Remove module Field
new_state_dict = OrderedDict()
for k, v in loaded_dict.items():
name = k[7:] # module The field is at the top , From 7 You can remove... From the beginning of a character module
new_state_dict[name] = v # Of the new dictionary key Value corresponding value One-to-one correspondence
# Others are consistent with the single card storage model
loaded_model = models.resnet152()
loaded_model.state_dict = new_state_dict
loaded_model = loaded_model.cuda()
# Read model weights ( Other methods 2)
loaded_model = models.resnet152()
loaded_dict = torch.load(save_dir)
loaded_model.load_state_dict({
k.replace('module.', ''): v for k, v in loaded_dict.items()})
loaded_model = loaded_model.cuda()
Multi card loading
- Single card storage model
use nn.DataParallel Function for distributed training settings
import os
import torch
from torchvision import models
os.environ['CUDA_VISIBLE_DEVICES'] = '1,2' # Replace here with the desired GPU Number
# Read the whole model
loaded_model = torch.load(save_dir)
loaded_model = nn.DataParallel(loaded_model).cuda()# Different places
# Read model weights
loaded_dict = torch.load(save_dir)
loaded_model = models.resnet152() # Note that the model structure needs to be defined here
loaded_model.state_dict = loaded_dict
loaded_model = nn.DataParallel(loaded_model).cuda()# Different places
- Multi card storage model
It is recommended to store only weights , It is the same as a single card . If only the whole model , You need the following code :
import os
import torch
from torchvision import models
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2' # Replace here with the desired GPU Number
loaded_whole_model = torch.load(save_dir)
loaded_model = models.resnet152() # Note that the model structure needs to be defined here
loaded_model.state_dict = loaded_whole_model.state_dict
loaded_model = nn.DataParallel(loaded_model).cuda()
Reference resources
datawhale: Explain profound theories in simple language pytorch
边栏推荐
- FH511+TP4333组成一个户外移动电源照明野营灯方案。
- Hubble database x a joint-stock commercial bank: upgrade the number management system of Guanzi, so that every RMB has an "ID card"
- c语言中的类结构体-点号
- (原创)【MAUI】一步一步实现“悬浮操作按钮”(FAB,Floating Action Button)
- redis和mysql数据不一致问题如何解决?
- 弹性盒子自动换行小Demo
- StackOverflow 2022数据库年度调查
- 决策树预测红酒品质
- 为什么越来越多的用户放弃 Swagger,选择Apifox
- 海思35xx实现GT911触摸屏功能「建议收藏」
猜你喜欢
随机推荐
Tiantian mathematics serial 53: February 22
redis和mysql数据不一致问题如何解决?
完全背包 初学篇「建议收藏」
List set to array
Hubble database x a joint-stock commercial bank: upgrade the number management system of Guanzi, so that every RMB has an "ID card"
Centos7:切换mysql用户并登录mysql
Commonly used "redmine" for # test bug
Deep understanding of Bayes theorem
全志V853芯片 如何在Tina V85x平台切换sensor?
求职简历的书写技巧
Why do more and more users give up swagger and choose apifox
投资98万美元的Saas项目失败了
电子元器件分销10亿俱乐部[通俗易懂]
PHP obtains the beginning and end time of the month according to the month and year
Scratch travel photo album Electronic Society graphical programming scratch grade examination level 1 true questions and answers analysis June 2022
ShareIt has outstanding strength and landed in the top 7 of the global IAP strength list
Align content attribute in flex layout
Flutter series part: detailed explanation of GridView layout commonly used in flutter
Oceanwide micro fh511 single chip microcomputer IC scheme small household appliances LED lighting MCU screen printing fh511
JS class is not just a simple syntax sugar!