(一)DROP OUT 丟棄
來(lái)源:一個(gè)好的模型需要對(duì)輸入數(shù)據(jù)的擾動(dòng)魯棒。簡(jiǎn)單來(lái)講语淘,就是說(shuō)模型的泛化能力段磨,對(duì)于輸入數(shù)據(jù)的小的變動(dòng)也能夠識(shí)別陋桂。
- 使用有噪音的數(shù)據(jù)等價(jià)于Tikhonov正則。
- 丟棄法:在神經(jīng)網(wǎng)絡(luò)層之間增加噪音荆几。
無(wú)偏差的加入噪音
對(duì)于x加入噪音得到x'吓妆,我們希望期望不會(huì)變,也就是平均值不會(huì)變吨铸。
丟棄法對(duì)每一個(gè)元素進(jìn)行如下的操作行拢,變大或變小,而期望不變诞吱。
使用丟棄法
- 通常將丟棄法作用在隱藏全連接層的輸出上
drop out 屬于是一個(gè)正則項(xiàng)剂陡,正則可以理解為變平滑。他只在訓(xùn)練中使用:他們影響模型參數(shù)的更新狐胎。在推理過(guò)程中,丟棄法直接返回輸入歌馍。其中丟棄概率是一個(gè)超參數(shù)握巢。
(二)dropout 代碼實(shí)現(xiàn)
import torch
from torch import nn
from d2l import torch as d2l
from IPython import display
import torchvision
from torch.utils import data
from torchvision import transforms
from matplotlib import pyplot as plt
import numpy as np
def dropout_layer(x,dropout):
assert 0 <= dropout <= 1
if dropout == 1:
return torch.zeros_like(x)
if dropout == 0:
return x
mask = (torch.rand(x.shape) > dropout).float() #randn是一個(gè)標(biāo)準(zhǔn)正態(tài)分布,rand是均勻分布
# x[mask] = 0,我們不用這樣的寫法是因?yàn)椋速M(fèi)算力松却,用矩陣乘法會(huì)快很多
return mask*x / (1-dropout)
x = torch.randn(16,dtype=torch.float32).reshape((4,4))
print(x == dropout_layer(x,0))
print(dropout_layer(x,1))
print(dropout_layer(x,0.5))
# 來(lái)定義具有兩個(gè)隱藏層的多層感知機(jī)暴浦,每個(gè)隱藏層是256個(gè)神經(jīng)元
num_inputs, num_outputs, num_h1, num_h2 = 784,10,256,256
drop_1,drop_2 = 0.0, 0.0
#構(gòu)建神經(jīng)網(wǎng)絡(luò),繼承自Moudle晓锻,重寫了forwad方法
class Net(nn.Module):
def __init__(self, num_inputs, num_outputs, num_h1, num_h2, is_training=True):
super(Net, self).__init__()
self.num_inputs = num_inputs
self.num_outputs = num_outputs
self.training = is_training
self.lin1 = nn.Linear(num_inputs,num_h1)
self.lin2 = nn.Linear(num_h1, num_h2)
self.lin3 = nn.Linear(num_h2,num_outputs)
self.relu = nn.ReLU()
# 這里應(yīng)該是
def forward(self,x):
H1 = self.relu(self.lin1(x.reshape((-1, self.num_inputs))))
if self.training == True:
H1 = dropout_layer(H1,drop_1)
H2 = self.relu(self.lin2(H1))
if self.training == True:
H2 = dropout_layer(H2,drop_2)
out = self.lin3(H2)
return out
# 加載數(shù)據(jù)歌焦,設(shè)置參數(shù)
def load_data_fashion_mnist(batch_size, resize=None):
"""下載Fashion-MNIST數(shù)據(jù)集, 然后將其加載到內(nèi)存中"""
trans = [transforms.ToTensor()]
if resize:
trans.insert(0, transforms.Resize(resize))
trans = transforms.Compose(trans) # 這是一步可以去掉的操作,這個(gè)就是把多個(gè)圖像處理的步驟整合到一起
mnist_train = torchvision.datasets.FashionMNIST(
root="../data", train=True, transform=trans, download=True)
mnist_test = torchvision.datasets.FashionMNIST(
root="../data", train=False, transform=trans, download=True)
return (data.DataLoader(mnist_train, batch_size, shuffle=True,
num_workers=0),
data.DataLoader(mnist_test, batch_size, shuffle=False,
num_workers=0))
num_epochs, lr, batch_size = 20, 0.3, 256
train_iter, test_iter = load_data_fashion_mnist(batch_size)
net = Net(num_inputs, num_outputs, num_h1, num_h2)
loss = nn.CrossEntropyLoss(reduction='none')
trainer = torch.optim.SGD(net.parameters(),lr)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)
沒(méi)有dropout砚哆,過(guò)擬合的情況.png
簡(jiǎn)潔實(shí)現(xiàn)
lr 調(diào)的有點(diǎn)大独撇,所以看起來(lái)有點(diǎn)震蕩。如果你想平滑一點(diǎn)躁锁,設(shè)置小一點(diǎn)就好了
import torch
from torch import nn
#參數(shù)調(diào)節(jié)區(qū)
num_epochs = 20
lr = 0.3
drop_1 = 0.5
drop_2 = 0.5
# 載入數(shù)據(jù)
def load_data_fashion_mnist(batch_size, resize=None):
"""下載Fashion-MNIST數(shù)據(jù)集, 然后將其加載到內(nèi)存中"""
trans = [transforms.ToTensor()]
if resize:
trans.insert(0, transforms.Resize(resize))
trans = transforms.Compose(trans) # 這是一步可以去掉的操作纷铣,這個(gè)就是把多個(gè)圖像處理的步驟整合到一起
mnist_train = torchvision.datasets.FashionMNIST(
root="../data", train=True, transform=trans, download=True)
mnist_test = torchvision.datasets.FashionMNIST(
root="../data", train=False, transform=trans, download=True)
return (data.DataLoader(mnist_train, batch_size, shuffle=True,
num_workers=0),
data.DataLoader(mnist_test, batch_size, shuffle=False,
num_workers=0))
train_iter, test_iter = load_data_fashion_mnist(batch_size)
# 構(gòu)建網(wǎng)絡(luò)
net = nn.Sequential(
nn.Flatten(),
nn.Linear(784,256),
nn.ReLU(),
nn.Dropout(drop_1),
nn.Linear(256,256),
nn.ReLU(),
nn.Dropout(drop_2),
nn.Linear(256,10))
# 初始化參數(shù)
def init_weights(m):
if type(m) == nn.Linear:
nn.init.normal_(m.weight, std=0.01) # 這是隨機(jī)初始化
net.apply(init_weights)
# 損失函數(shù)
loss = nn.CrossEntropyLoss(reduction='none')
# 優(yōu)化器
trainer = torch.optim.SGD(net.parameters(),lr)
# 開(kāi)始訓(xùn)練
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, trainer)
dropout=0.5