Pythorch，mnist手写数字识别，几个神经网络的匹配

import torch  # Import PyTorch
from torch import nn  # Import the neural network module from PyTorch
from torchinfo import summary
# Define the neural network class, inheriting from nn.Module
class PreNetwork(nn.Module):
    def __init__(self):
        super().__init__()  # Call the initializer of the parent class nn.Module
        self.layer1 = nn.Linear(784, 256)  # Define the first linear layer (input size 784, output size 256)
        self.layer2 = nn.Linear(256, 10)  # Define the second linear layer (input size 256, output size 10)

    def forward(self, x):
        x = x.view(-1, 28*28)  # Flatten the input tensor to a 1D tensor of size 28*28
        x = self.layer1(x)  # Pass the input through the first linear layer
        x = torch.relu(x)  # Apply the ReLU activation function
        return self.layer2(x)  # Pass the result through the second linear layer and return it

验证模型是否正常

net = PreNetwork()
summary(net, input_size=(1,1, 28, 28))

测试结果

==========================================================================================
Layer (type:depth-idx)                   Output Shape              Param #
==========================================================================================
PreNetwork                               [1, 10]                   --
├─Linear: 1-1                            [1, 256]                  200,960
├─Linear: 1-2                            [1, 10]                   2,570
==========================================================================================
Total params: 203,530
Trainable params: 203,530
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.20
==========================================================================================
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.81
Estimated Total Size (MB): 0.82
==========================================================================================

LeNet卷积神经网络

网络设计

Pythorch，mnist手写数字识别，几个神经网络的匹配-LMLPHP

代码设计

从上面的设计图上可以看到，输入的图片是32*32*1，但是mnist的图片是28*28*1，那么如果按照上面的设计进行写代码时，一定会出现问题。

import torch.nn as nn
import torch.nn.functional as func
from torchinfo import summary

class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(6, 16, kernel_size=5),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(16*5*5, 120),
            nn.Linear(120, 84),
            nn.Linear(84, 10),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

测试

net = LeNet5()
summary(net, input_size=(1,1, 28, 28))

测试结果

Pythorch，mnist手写数字识别，几个神经网络的匹配-LMLPHP

从测试结果可以看到出现错误

从上面的设计图上可以到，数据经过2个卷积层和2个池化层，然后3个全连接层。

排查问题的方法

1.保证输入的图片channel 一致，例如mnist是 1channel，那么确认一下，输入层的channel是1

nn.Conv2d(1, 6, kernel_size=5)

2.中间的卷积层和池化层不会出问题，这时，就需要看看池化层和全连接层的地方是否有问题。

如果排查，可以先把 x = self.classifier(x) 注释一下

import torch.nn as nn
import torch.nn.functional as func
from torchinfo import summary

class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(6, 16, kernel_size=5),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(16*5*5, 120),
            nn.Linear(120, 84),
            nn.Linear(84, 10),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        #x = self.classifier(x)
        return x

再次运行测试

==========================================================================================
Layer (type:depth-idx)                   Output Shape              Param #
==========================================================================================
LeNet5                                   [1, 256]                  41,854
├─Sequential: 1-1                        [1, 16, 4, 4]             --
│    └─Conv2d: 2-1                       [1, 6, 24, 24]            156
│    └─ReLU: 2-2                         [1, 6, 24, 24]            --
│    └─MaxPool2d: 2-3                    [1, 6, 12, 12]            --
│    └─Conv2d: 2-4                       [1, 16, 8, 8]             2,416
│    └─ReLU: 2-5                         [1, 16, 8, 8]             --
│    └─MaxPool2d: 2-6                    [1, 16, 4, 4]             --
==========================================================================================
Total params: 44,426
Trainable params: 44,426
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.24
==========================================================================================
Input size (MB): 0.00
Forward/backward pass size (MB): 0.04
Params size (MB): 0.01
Estimated Total Size (MB): 0.05
==========================================================================================

可以看到，经过两次卷积和两次池化后，最后的数据为：16*4*4，但是我们上面的代码写的16*5*5,所以这里更改为16*4*4.更改后再进行测试


Layer (type:depth-idx)                   Output Shape              Param #
==========================================================================================
LeNet5                                   [1, 10]                   --
├─Sequential: 1-1                        [1, 16, 4, 4]             --
│    └─Conv2d: 2-1                       [1, 6, 24, 24]            156
│    └─ReLU: 2-2                         [1, 6, 24, 24]            --
│    └─MaxPool2d: 2-3                    [1, 6, 12, 12]            --
│    └─Conv2d: 2-4                       [1, 16, 8, 8]             2,416
│    └─ReLU: 2-5                         [1, 16, 8, 8]             --
│    └─MaxPool2d: 2-6                    [1, 16, 4, 4]             --
├─Sequential: 1-2                        [1, 10]                   --
│    └─Linear: 2-7                       [1, 120]                  30,840
│    └─Linear: 2-8                       [1, 84]                   10,164
│    └─Linear: 2-9                       [1, 10]                   850
==========================================================================================
Total params: 44,426
Trainable params: 44,426
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 0.29
==========================================================================================
Input size (MB): 0.00
Forward/backward pass size (MB): 0.04
Params size (MB): 0.18
Estimated Total Size (MB): 0.22
==========================================================================================

AlexNet神经网络

网络设计

Pythorch，mnist手写数字识别，几个神经网络的匹配-LMLPHP

代码设计

import torch.nn as nn

'''
modified to fit dataset size
'''
NUM_CLASSES = 10


class AlexNet(nn.Module):
    def __init__(self, num_classes=NUM_CLASSES):
        super(AlexNet, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(64, 192, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
        )
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 1 * 1, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

测试模型

net = AlexNet()
summary(net, input_size=(20,1, 28, 28))

测试结果

从下面的数据中很容易看到，模型中，有5个卷积层和3个池化层和3个全连接层，其中还加入和dropout的功能

==========================================================================================
Layer (type:depth-idx)                   Output Shape              Param #
==========================================================================================
AlexNet                                  [20, 10]                  --
├─Sequential: 1-1                        [20, 256, 1, 1]           --
│    └─Conv2d: 2-1                       [20, 64, 14, 14]          640
│    └─ReLU: 2-2                         [20, 64, 14, 14]          --
│    └─MaxPool2d: 2-3                    [20, 64, 7, 7]            --
│    └─Conv2d: 2-4                       [20, 192, 7, 7]           110,784
│    └─ReLU: 2-5                         [20, 192, 7, 7]           --
│    └─MaxPool2d: 2-6                    [20, 192, 3, 3]           --
│    └─Conv2d: 2-7                       [20, 384, 3, 3]           663,936
│    └─ReLU: 2-8                         [20, 384, 3, 3]           --
│    └─Conv2d: 2-9                       [20, 256, 3, 3]           884,992
│    └─ReLU: 2-10                        [20, 256, 3, 3]           --
│    └─Conv2d: 2-11                      [20, 256, 3, 3]           590,080
│    └─ReLU: 2-12                        [20, 256, 3, 3]           --
│    └─MaxPool2d: 2-13                   [20, 256, 1, 1]           --
├─Sequential: 1-2                        [20, 10]                  --
│    └─Dropout: 2-14                     [20, 256]                 --
│    └─Linear: 2-15                      [20, 4096]                1,052,672
│    └─ReLU: 2-16                        [20, 4096]                --
│    └─Dropout: 2-17                     [20, 4096]                --
│    └─Linear: 2-18                      [20, 4096]                16,781,312
│    └─ReLU: 2-19                        [20, 4096]                --
...
Input size (MB): 0.06
Forward/backward pass size (MB): 6.11
Params size (MB): 80.50
Estimated Total Size (MB): 86.68
==========================================================================================

DaGod123