有一段時(shí)間沒(méi)有更新了刨疼,這次我給大家?guī)?lái)的是大名鼎鼎的GoogleNet模型井氢。也可以稱為Inception v3模型。參考了源代碼毕骡,寫出了可讀性與性能更優(yōu)的模型,在模型上有些許微調(diào)岩瘦,輸入的圖片大小是224 x 224 x 3.!!! 圖片大小可以任意調(diào)節(jié)未巫,甚至用來(lái)訓(xùn)練CIFAR10也可以。本文先放出代碼启昧。
import torch
from torch import nn
NUM_CLASSES = 10
class BasicConv2d(nn.Module):
def __init__(self, in_channels, out_channels, **kwargs):
super(BasicConv2d, self).__init__()
self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
self.bn = nn.BatchNorm2d(out_channels, eps=0.001)
self.relu = nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class Inception(nn.Module):
def __init__(self, in_channel, n1_1, n3x3red, n3x3, n5x5red, n5x5, pool_plane):
super(Inception, self).__init__()
# first line
self.branch1x1 = BasicConv2d(in_channel, n1_1, kernel_size=1)
# second line
self.branch3x3 = nn.Sequential(
BasicConv2d(in_channel, n3x3red, kernel_size=1),
BasicConv2d(n3x3red, n3x3, kernel_size=3, padding=1)
)
# third line
self.branch5x5 = nn.Sequential(
BasicConv2d(in_channel, n5x5red, kernel_size=1),
BasicConv2d(n5x5red, n5x5, kernel_size=5, padding=2)
)
# fourth line
self.branch_pool = nn.Sequential(
nn.MaxPool2d(3, stride=1, padding=1),
BasicConv2d(in_channel, pool_plane, kernel_size=1)
)
def forward(self, x):
y1 = self.branch1x1(x)
y2 = self.branch3x3(x)
y3 = self.branch5x5(x)
y4 = self.branch_pool(x)
output = torch.cat([y1, y2, y3, y4], 1)
return output
class GoogLeNet(nn.Module):
def __init__(self, num_classes=NUM_CLASSES):
super(GoogLeNet, self).__init__()
self.conv1 = BasicConv2d(3, 64, kernel_size=7, stride=2, padding=3)
self.max_pool1 = nn.MaxPool2d(3, stride=2)
self.conv2 = BasicConv2d(64, 192, kernel_size=3, stride=1, padding=1)
self.max_pool2 = nn.MaxPool2d(3, stride=2)
self.a3 = Inception(192, 64, 96, 128, 16, 32, 32)
self.b3 = Inception(256, 128, 128, 192, 32, 96, 64)
self.max_pool3 = nn.MaxPool2d(3, stride=2)
self.a4 = Inception(480, 192, 96, 208, 16, 48, 64)
self.b4 = Inception(512, 160, 112, 224, 24, 64, 64)
self.c4 = Inception(512, 128, 128, 256, 24, 64, 64)
self.d4 = Inception(512, 112, 144, 288, 32, 64, 64)
self.e4 = Inception(528, 256, 160, 320, 32, 128, 128)
self.max_pool4 = nn.MaxPool2d(3, stride=2)
self.a5 = Inception(832, 256, 160, 320, 32, 128, 128)
self.b5 = Inception(832, 384, 192, 384, 48, 128, 128)
self.avg_pool = nn.AvgPool2d(7)
self.dropout = nn.Dropout(0.4)
self.classifier = nn.Linear(1024, num_classes)
def forward(self, x):
x = self.conv1(x)
x = self.max_pool1(x)
x = self.conv2(x)
x = self.max_pool2(x)
x = self.a3(x)
x = self.b3(x)
x = self.max_pool3(x)
x = self.a4(x)
x = self.b4(x)
x = self.c4(x)
x = self.d4(x)
x = self.e4(x)
x = self.max_pool4(x)
x = self.a5(x)
x = self.b5(x)
x = self.avg_pool(x)
x = self.dropout(x)
x = x.view(x.size(0), -1)
x = self.classifier(x)
return x
讀者可以參照以下模型來(lái)進(jìn)行研究
上圖為GoogLeNet的網(wǎng)絡(luò)框圖細(xì)節(jié)叙凡,其中“#3x3 reduce”,“#5x5 reduce”代表在3x3密末,5x5卷積操作之前使用1x1卷積的數(shù)量握爷。輸入圖像為224x224x3跛璧,且都進(jìn)行了零均值化的預(yù)處理操作,所有降維層也都是用了ReLU非線性激活函數(shù)新啼。
如上圖用到了輔助分類器追城,Inception?Net有22層深,除了最后一層的輸出燥撞,其中間節(jié)點(diǎn)的分類效果也很好漓柑。因此在Inception?Net中,還使用到了輔助分類節(jié)點(diǎn)(auxiliary?classifiers)叨吮,即將中間某一層的輸出用作分類辆布,并按一個(gè)較小的權(quán)重(0.3)加到最終分類結(jié)果中。這樣相當(dāng)于做了模型融合茶鉴,同時(shí)給網(wǎng)絡(luò)增加了反向傳播的梯度信號(hào)锋玲,也提供了額外的正則化,對(duì)于整個(gè)Inception?Net的訓(xùn)練很有裨益涵叮。
文章引用于 GoogLeNet 論文解讀
編輯 Lornatang
校準(zhǔn) Lornatang