GoogLeNet¶

提高深度神经网络表现核心提高尺寸(increasing size)

1、增加模型的深度---神经网络的层数
2、增加模型的宽度---每一层神经元的数量

上述两点带来的问题:
1、参数量的增加(larger number of parameter),进而可能导致过拟合发生(overfitting),就需要对样本就行改进(数量、质量);
2、计算资源的怎加(increasing computation resource);

改进措施:作者给出的建议是:从全连接到稀疏链接(fully connected to sparsely connected)。在数据集的概率分布可以用一个大型的、非常稀疏的深度神经网络来表示前提下,通过分析最后一层激活的相关统计数据逐层构建最优网络拓扑,对具有高度相关输出的神经元进行聚类。

Their main result states that if the probability distribution of the data-set is representable by a large, very sparse deep neural network, then the optimal network topology can be constructed layer by layer by analyzing the correlation statistics of the activations of the last layer and clustering neurons with highly correlated outputs.

模型结构
正如作者前文所提到的increase the size,作者不是在模型的“长度”(添加各种卷积或者全连接操作)而是在“宽度”上进行修改:

202310131713046

左侧为最初模型,右侧是在最初模型的基础上所提出的改进模型。对比 (a) 模型存在一个$3*3$和$5*5$卷积那么无疑增加了计算消耗(问题二)。作者提出的思路是在这些卷积层添加一个$1*1$的卷积,进而对模型的计算进行减少。

模型计算减少

202310131713046
那么此时模型的计算次数为:(28x28x32)x(5x5x192)=120422400
通过添加 $1x1$ 卷积层

202310131713046
那么此时的计算次数为:(28x28x16x1x1x192)+(28x28x32x5x5x16)=12,443,648

那么根据上述思路,作者提出如下的模型结构图:

结构示意图

整体来说GoogLeNet模型最大的创新点如下:
1、在卷积神经网络中对于提高准确率有效的方法就是改变整体网络size,随之而来也会带来许多难题,于此通过$1*1$的卷积来减少参数;
2、改变整体网络长度的同时,作者提出改变模型宽度

论文地址: http://ieeexplore.ieee.org/document/726791/

In [1]:
import torch 
import torch.nn as nn
import numpy as np
from typing import Optional, Tuple, Any
device = ('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
cuda
In [2]:
class InceptionModule(nn.Module):
    """
    in_channel: 初始输入模型通道数量
    out1: 第一层1x1卷积输出
    pre_in3: 3x3卷积输入 
    com_out3: 3x3卷积输出
    pre_in5: 5x5卷积输入 
    com_out5: 5x5卷积输出
    pool_out: 池化层输出
    """
    def __init__(self, in_channel, out1, pre_in3, com_out3, pre_in5, com_out5, pool_out):
        super(InceptionModule, self).__init__()
        self.con1 = nn.Conv2d(in_channel, out1, kernel_size= 1) # 1x1卷积层

        self.con3 = nn.Sequential(
            nn.Conv2d(in_channel, pre_in3, kernel_size= 1),
            nn.Conv2d(pre_in3, com_out3, kernel_size=3, padding= 1),
        ) # 1x1+3x3

        self.con5 = nn.Sequential(
            nn.Conv2d(in_channel, pre_in5, kernel_size= 1),
            nn.Conv2d(pre_in5, com_out5, kernel_size= 5, padding= 2)
        ) # 1x1+5x5

        self.pool = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=True),
            nn.Conv2d(in_channel, pool_out, kernel_size= 1, stride= 1)
        ) # max_pooling+1x1
    def forward(self, x):                          
        x1 = self.con1(x)
        x3 = self.con3(x)
        x5 = self.con5(x)
        max_x = self.pool(x)
        # print(x1.size(), x3.size(), x5.size(), max_x.size())
        return torch.concatenate([x1, x3, x5, max_x], 1)

class ConvBlock(nn.Module):
    """
    定义一个卷积网络类型,简化后续卷积操作
    """
    def __init__(self, in_channels: int, out_channels: int, **kwargs: Any) -> None:
        super(ConvBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
        self.batch_norm = nn.BatchNorm2d(out_channels, eps= 0.001)
        self.relu = nn.ReLU(True)

    def forward(self, x):
        out = self.conv(x)
        out = self.batch_norm(out)
        out = self.relu(out)
        return out

class InceptionAux(nn.Module):
    """
    在GoogLeNet中存在分支判断函数,因此补充分支判断函数定义
    """
    def __init__(
            self, 
            input_size: int, 
            num_classes: int= 1000, 
            dropout: float=0.7) -> None:
        super(InceptionAux, self).__init__()
        self.avg_pool = nn.AvgPool2d(kernel_size=5, stride=3, ceil_mode=True)
        self.conv = ConvBlock(input_size, 128, kernel_size=1, stride=1)
        self.fc1 = nn.Linear(2048, 1024)
        self.fc2 = nn.Linear(1024, num_classes)
        self.relu = nn.ReLU(True)
        self.dropout = nn.Dropout(dropout, True)
    def forward(self, x):
        x = self.avg_pool(x)
        x = self.conv(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x
In [4]:
class GoogLeNet(nn.Module):
    """
    input_size:输入模型图片channel
    out_label:输出图片类型
    """
    def __init__(
            self, 
            input_size: int=3, 
            num_classes: int= 1000, 
            dropout: float = 0.4, 
            dropout_aux: float = 0.7,
            inception_aux: bool= True) -> None:
        super(GoogLeNet, self).__init__()
        
        self.inception_aux = inception_aux # 判断是否走分支
        self.conv1 = ConvBlock(input_size, 64, kernel_size=7, stride=2, padding=3)
        self.max_pool1 = nn.MaxPool2d(kernel_size= 3, stride=2) # 56x56x64
        self.conv2 = ConvBlock(64, 192, kernel_size=1, stride=1) # 56x56x192
        self.conv3 = ConvBlock(192, 192, kernel_size=3, stride=1, padding=1) # 56x56x192
        self.max_pool2 = nn.MaxPool2d(kernel_size=3, stride=2) # 28x28x192

        self.incept3a = InceptionModule(192, 64, 96, 128, 16, 32, 32) # 28x28x256
        self.incept3b = InceptionModule(256, 128, 128, 192, 32, 96, 64) # 28x28x480
        self.max_pool3 = nn.MaxPool2d(kernel_size=3, stride=2) # 14x14x480

        self.incept4a = InceptionModule(480, 192, 96, 208, 16, 48, 64) # 14x14x512
        # 判断
        self.incept4b = InceptionModule(512, 160, 112, 224, 24, 64, 64) # 14x14x512
        self.incept4c = InceptionModule(512, 128, 128, 256, 24, 64, 64) # 14x14x512
        self.incept4d = InceptionModule(512, 112, 144, 288, 32, 64, 64) # 14x14x528
        # 判断
        self.incept4e = InceptionModule(528, 256, 160, 320, 32, 128, 128) # 14x14x832

        self.max_pool4 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) # 7x7x832
        self.incept5a = InceptionModule(832, 256, 160, 320, 32, 128, 128) # 7x7x832
        self.incept5b = InceptionModule(832, 384, 192, 384, 48, 128, 128) # 7x7x1024

        self.avg_pool = nn.AvgPool2d(kernel_size=7, stride=1) # 1x1x1024
        self.linear = nn.Linear(1024, num_classes) # 1x1x1000
        self.dropout = nn.Dropout(dropout_aux)

        # 补充判断函数
        if inception_aux:
            self.inception_aux1 = InceptionAux(512, num_classes, dropout)
            self.inception_aux2 = InceptionAux(528, num_classes, dropout)
        else:
            self.inception_aux1 = None
            self.inception_aux2 = None

    def forward(self, x):
        x = self.conv1(x)
        x = self.max_pool1(x)
        x = self.max_pool2(self.conv3(self.conv2(x)))
        x = self.incept3a(x)
        x = self.incept3b(x)
        x = self.max_pool3(x)
        x = self.incept4a(x)

        if self.inception_aux1 is not None:
            aux1 = self.inception_aux1(x)

        x = self.incept4b(x)
        x = self.incept4c(x)
        x = self.incept4d(x)

        if self.inception_aux2 is not None:
            aux2 = self.inception_aux2(x)

        x = self.incept4e(x)
        x = self.max_pool4(x)
        x = self.incept5a(x)
        x = self.incept5b(x)
        x = self.avg_pool(x)
        x = torch.flatten(x, 1)
        x = self.dropout(x)
        x = self.linear(x)
        
        return x, aux2, aux1
In [5]:
model = GoogLeNet().to(device)
x = torch.randn(1, 3, 224, 224).to(device)
y = model(x)