当前位置：网站首页>Pytorch model

Pytorch model

2022-06-28 13:23:00 【Gu_ NN】

Catalog

Basic model definition method
- adopt nn.Sequential()
- adopt nn.ModuleList()/nn.ModuleDict()
Complex model building methods
- Module building
- Model assembly
Modification of existing models
Model preservation 、 load
Reference resources

Basic model definition method

pytorch Provided in nn.Sequential()、nn.ModuleList() as well as nn.ModuleDict() For integrating multiple Module, Complete the model construction . The similarities and differences are as follows ：

Sequential()	ModuleList() /ModuleDict()
Build the network directly , The definition order is the model connection order	List/Dict The order of elements in the network does not represent the order of their real positions in the network , need forward Function to specify the connection order of each layer
External input cannot be added to the model	When the information of the previous layer is required in the middle of the model , such as ResNets Residual calculation in , It's more convenient

adopt nn.Sequential()

#  Method 1 ：
import torch.nn as nn
net = nn.Sequential(
        nn.Linear(784, 256),
        nn.ReLU(),
        nn.Linear(256, 10), 
        )
#  Method 2 ：
import collections
net2 = nn.Sequential(collections.OrderedDict([
          ('fc1', nn.Linear(784, 256)),
          ('relu1', nn.ReLU()),
          ('fc2', nn.Linear(256, 10))
          ]))

adopt nn.ModuleList()/nn.ModuleDict()

# List
class model(nn.Module):
  def __init__(self):
    super().__init__()
    self.modulelist = nn.ModuleList([nn.Linear(784, 256), nn.ReLU(),nn.Linear(256, 10)])
    
  def forward(self, x):
    for layer in self.modulelist:
      x = layer(x)
    return x
# Dict
class model(nn.Module):
  def __init__(self):
    super().__init__()
    self.moduledict = nn.ModuleDict({
    
    'linear': nn.Linear(784, 256),
    'act': nn.ReLU(),
    'output':nn.Linear(256, 10)
    })
    
  def forward(self, x):
    for layer in self.moduledict:
      x = layer(x)
    return x

Complex model building methods

For large complex models , You can block the model first , Then the model is built . With U-Net The model, for example .
Insert picture description here
The picture above shows U-Net Network structure , It can be divided into the following four modules ：

Two convolutions inside each sub block （Double Convolution）
Downsampling connections between model blocks on the left , That is, maximize pooling （Max pooling）
Upsampling connections between model blocks on the right （Up sampling）
Processing of output layer

Module building

import torch
import torch.nn as nn
import torch.nn.functional as F

class DoubleConv(nn.Module):
    """(convolution => [BN] => ReLU) * 2"""

    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)

class Down(nn.Module):
    """Downscaling with maxpool then double conv"""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)

class Up(nn.Module):
    """Upscaling then double conv"""

    def __init__(self, in_channels, out_channels, bilinear=True):
        super().__init__()

        # if bilinear, use the normal convolutions to reduce the number of channels
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)

    def forward(self, x1, x2):
        x1 = self.up(x1)
        # input is CHW
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)

class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        return self.conv(x)

Model assembly

class UNet(nn.Module):
    def __init__(self, n_channels, n_classes, bilinear=True):
        super(UNet, self).__init__()
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.bilinear = bilinear

        self.inc = DoubleConv(n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        factor = 2 if bilinear else 1
        self.down4 = Down(512, 1024 // factor)
        self.up1 = Up(1024, 512 // factor, bilinear)
        self.up2 = Up(512, 256 // factor, bilinear)
        self.up3 = Up(256, 128 // factor, bilinear)
        self.up4 = Up(128, 64, bilinear)
        self.outc = OutConv(64, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.outc(x)
        return logits

Modification of existing models

Replace a layer

import torchvision.models as models
net = models.resnet50()
print(net)
#  Replace with fc layer 
from collections import OrderedDict
classifier = nn.Sequential(OrderedDict([('fc1', nn.Linear(2048, 128)),
                          ('relu1', nn.ReLU()), 
                          ('dropout1',nn.Dropout(0.5)),
                          ('fc2', nn.Linear(128, 10)),
                          ('output', nn.Softmax(dim=1))
                          ]))
# This sentence is directly used to define classifier Replace the original fc layer  
net.fc = classifier

Add input variables

# Define model modifications 
class Model(nn.Module):
    def __init__(self, net):
        super(Model, self).__init__()
        #  Original network structure 
        self.net = net
        #  First the 2048 Dimensional tensor By activating the function layer 
        self.relu = nn.ReLU()
        # dropout layer 
        self.dropout = nn.Dropout(0.5)
        #  The full connection layer maps to the specified output dimension 10
        self.fc_add = nn.Linear(1001, 10, bias=True)
        self.output = nn.Softmax(dim=1)
        
    def forward(self, x, add_variable):
        x = self.net(x)
        # In active layer 、dropout After layer, it is spliced with external input variables 
        x = torch.cat((self.dropout(self.relu(x)), add_variable.unsqueeze(1)),1) #unsqueeze The operation is to communicate with net Output tensor Keep dimensions consistent , Commonly used in add_variable Is a single value  (scalar)  The situation of 
        x = self.fc_add(x)
        x = self.output(x)
        return x
# Instantiation 
model = Model(net).cuda()
# Training 
outputs = model(inputs, add_var)

Add output variables

class Model(nn.Module):
    def __init__(self, net):
        super(Model, self).__init__()
        self.net = net
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc1 = nn.Linear(1000, 10, bias=True)
        self.output = nn.Softmax(dim=1)
        
    def forward(self, x, add_variable):
        x1000 = self.net(x)
        x10 = self.dropout(self.relu(x1000))
        x10 = self.fc1(x10)
        x10 = self.output(x10)
        return x10, x1000 # Increase output 
model = Model(net).cuda()
out10, out1000 = model(inputs, add_var)

Model preservation 、 load

PyTorch The storage model mainly adopts pkl,pt,pth Three formats .
PyTorch The model mainly consists of two parts ： Model structure and The weight .

Model ：nn.Module Class
The weight ： Dictionaries （key It's the layer name ,value It's the weight vector ）.

Storage can also be divided into two forms ：

Storage model structure + The weight
Store only weights

from torchvision import models
model = models.resnet152(pretrained=True)

#  Save the entire model 
torch.save(model, save_dir)
#  Save model weights 
torch.save(model.state_dict, save_dir)

When there is more GPU There will be a single card for storage and reading in parallel 、 Multi card situation , The multi card stored procedure has more names than the single card module Field , Therefore, when multiple cards are stored , Model loading will be more complicated .

preservation

Single card storage

import os
import torch
from torchvision import models

os.environ['CUDA_VISIBLE_DEVICES'] = '0'   # Replace here with the desired GPU Number 
model = models.resnet152(pretrained=True)
model.cuda()

#  Save the entire model 
torch.save(model, save_dir)

#  Save model weights 
torch.save(model.state_dict(), save_dir)

Multi card storage

use nn.DataParallel Function for distributed training settings

import os
import torch
from torchvision import models

os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2'   # Replace here with the desired GPU Number 

model = models.resnet152(pretrained=True)
model = nn.DataParallel(model).cuda()

#  Save the entire model 
torch.save(model, save_dir)
#  Save model weights 
torch.save(model.state_dict(), save_dir)

load

Single card loading

Save the model with a single card

import os
import torch
from torchvision import models

os.environ['CUDA_VISIBLE_DEVICES'] = '0'   # Replace here with the desired GPU Number 

#  Read the whole model 
loaded_model = torch.load(save_dir)
loaded_model.cuda()

#  Read model weights 
loaded_dict = torch.load(save_dir)
loaded_model = models.resnet152()   # Note that the model structure needs to be defined here 
loaded_model.state_dict = loaded_dict
loaded_model.cuda()

Multi card save model

import os
import torch
from torchvision import models

os.environ['CUDA_VISIBLE_DEVICES'] = '0'   # Replace here with the desired GPU Number 

#  Read the whole model 
loaded_model = torch.load(save_dir)
loaded_model = loaded_model.module # The difference 

#  Read model weights （ recommend ）
loaded_dict = torch.load(save_dir)
loaded_model = models.resnet152()   # Note that the model structure needs to be defined here 
loaded_model = nn.DataParallel(loaded_model).cuda() # The difference 
loaded_model.state_dict = loaded_dict

#  Read model weights （ Other methods 1）
from collections import OrderedDict

loaded_dict = torch.load(save_dir)
#  Remove module Field 
new_state_dict = OrderedDict()
for k, v in loaded_dict.items():
    name = k[7:] # module The field is at the top , From 7 You can remove... From the beginning of a character module
    new_state_dict[name] = v # Of the new dictionary key Value corresponding value One-to-one correspondence 
#  Others are consistent with the single card storage model 
loaded_model = models.resnet152()  
loaded_model.state_dict = new_state_dict
loaded_model = loaded_model.cuda()

#  Read model weights （ Other methods 2）
loaded_model = models.resnet152()    
loaded_dict = torch.load(save_dir)
loaded_model.load_state_dict({
    k.replace('module.', ''): v for k, v in loaded_dict.items()})
loaded_model = loaded_model.cuda()

Multi card loading

Single card storage model
use nn.DataParallel Function for distributed training settings

import os
import torch
from torchvision import models

os.environ['CUDA_VISIBLE_DEVICES'] = '1,2'   # Replace here with the desired GPU Number 

#  Read the whole model 
loaded_model = torch.load(save_dir)
loaded_model = nn.DataParallel(loaded_model).cuda()# Different places 

#  Read model weights 
loaded_dict = torch.load(save_dir)
loaded_model = models.resnet152()   # Note that the model structure needs to be defined here 
loaded_model.state_dict = loaded_dict
loaded_model = nn.DataParallel(loaded_model).cuda()# Different places

Multi card storage model
It is recommended to store only weights , It is the same as a single card . If only the whole model , You need the following code ：

import os
import torch
from torchvision import models

os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2'   # Replace here with the desired GPU Number 

loaded_whole_model = torch.load(save_dir)
loaded_model = models.resnet152()   # Note that the model structure needs to be defined here 
loaded_model.state_dict = loaded_whole_model.state_dict
loaded_model = nn.DataParallel(loaded_model).cuda()