PyTorch常用導(dǎo)入的包
from modname import *
#把一個模塊的所有內(nèi)容全都導(dǎo)入到當(dāng)前的命名空間, 不應(yīng)該過多使用
用from語句可以直接導(dǎo)入子模塊犁享,而不需要冗長的前綴來調(diào)用模塊中的函數(shù)
而用import導(dǎo)入子模塊,需要使用前綴愈犹,所以一般用as來消除前綴
import torch #導(dǎo)入整個模塊
import numpy as np
import torch.utils.data as data
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torchvision import datasets
from torch.autograd import Variable
import matplotlib
from matplotlib import pyplot as plt
import torch.nn.functional as F
#從模塊中導(dǎo)入一個指定的部分到當(dāng)前命名空間中
Dataset(數(shù)據(jù)集)
#定義數(shù)據(jù)類
class myDataset(data.Dataset):
def __init__(self, csv_file, txt_file, root_dir, other_file):
self.csv_data = pd.read_csv(csv_file)
with open(txt_file, 'r') as f:
data_list = f.readlines()
self.txt_data = data_list
self.root_dir = root_dir
def __len__(self):
return len(self.csv_data)
def __getitem__(self, idx):
data = (self.csv_data[idx], self.txt_data[idx])
return data
通過上面的方式,可以定義我們需要的數(shù)據(jù)類
可以通過迭代的方式來取得每一個數(shù)據(jù)
但是這樣很難實現(xiàn)取batch耿战,shuffle 或者多線程去讀取數(shù)據(jù)
所以PyTorch通過torch.utils.data.DataLoader來定義一個新的迭代器
#定義迭代器:
dataiter = data.DataLoader(myDataset, batch_size = 32,
shuffle = True, collate_fn = default_collate)
#collate_fn是表示如何取樣本琉用,我們可以定義自己的函數(shù)來準確地
#實現(xiàn)想要的功能,默認的函數(shù)一般情況下可以使用
torchvision這個包中有一個計算機視覺數(shù)據(jù)讀取類:ImageFolder俊啼,圖片是這種形式
1. root/dog/xxx.png
2. root/dog/xxy.png
3. root/dog/xyz.png
之后調(diào)用這個類
dset = ImageFolder(root = 'root_path', transform = None,
loader = default_loader)
transform和target_transform是圖片增強肺缕,loader是圖片讀取方法,
通過loader將圖片轉(zhuǎn)換成我們需要的圖片類型進入神經(jīng)網(wǎng)絡(luò)
nn.Module(模組)
在Pytorch里編寫神經(jīng)網(wǎng)絡(luò),所有的層結(jié)構(gòu)和損失函數(shù)都來自于torch.nn同木, 所有模型的構(gòu)建都是從這個基類nn.Module繼承的浮梢,所以有下面這個模板:
class net_name(nn.Module):
def __init__(self, other_arguments):
super(net_name, self).__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size)
#other network layer
def forword(self, x):
x = self.conv1(x)
return x
損失函數(shù)也在nn模塊中定義好了:
criterion = nn.CrossEntropyLoss()
loss = criterion(output,target)
torch.optim(優(yōu)化)
通過torch.optim包來實現(xiàn),調(diào)用的時候?qū)⑿枰獌?yōu)化的參數(shù)傳入彤路,這些參數(shù)必須是Variable秕硝,然后傳入一些基本的設(shè)定,比如學(xué)習(xí)率和動量等洲尊。
optimizer = torch.optim.SGD(model,parameters(), lr = 0.01,
momentum = 0.9)
優(yōu)化之前需要先將梯度歸零
即optimizer.zeros()
然后通過loss.backward()反向傳播远豺,自動求道每個參數(shù)的梯度
最后只需要optimizer.step()就可以通過梯度做一步參數(shù)更新
模型的保存和加載torch.save
保存有兩種方式:
(1)保存整個模型的結(jié)構(gòu)信息和參數(shù)信息,保存的對象是模型model
(2)保存模型的參數(shù)坞嘀, 保存的對象是模型的狀態(tài)model.state_dict()
save的第一個參數(shù)是保存對象躯护,第二個是保存路徑及名稱
torch.save(model, './model.pth')
torch.save(model.state_dict(), './model_state.pth')
加載有兩種方式對應(yīng)保存模型的方式:
(1)加載完整的模型結(jié)構(gòu)和參數(shù)信息,使用load_model = torch.load('model.pth')丽涩,網(wǎng)絡(luò)較大時加載時間較長棺滞,同時存儲空間較大。
(2)加載模型的參數(shù)信息内狸,需要先導(dǎo)入模型的結(jié)構(gòu)检眯,然后通過model.load_state_dic(torch.load('model_state.pth'))來導(dǎo)入
一維線性回歸的代碼實現(xiàn)
x_train = np.array([[3.3],[4.4],[5.5],[6.71],
[6.93],[4.168],[9.779],[6.182],[7.59],[2.167],[7.042],
[10.791],[5.313],[7.997],[3.1]],dtype = np.float32)
y_train = np.array([[1.7],[2.76],[2.09],[3.19],[1.694],[1.573],
[3.366],[2.596],[2.53],[1.221],[2.827],[3.465],
[1.65],[2.904],[1.3]],dtype = np.float32)
#然后可以對numpy.array用matplotlib庫畫圖
pyplot.plot(x_train, y_train) #連線圖
pyplot.plot(x_train, y_train, 'go') #綠色圓點的散點圖
pyplot.plot(x_train, y_train, 'r-') #紅色實線
pyplot.plot(x_train, y_train, 'b^') #藍色三角形
畫好之后用pyplot.show()展示出來
先將numpy.array轉(zhuǎn)換成Tensor
x_train = torch.from_numpy(x_train)
y_train = torch.from_numpy(y_train)
定義一個簡單的模型
class LinearRegression(nn.Module):
def __init__(self):
super(LinearRegression, self).__init__()
self.linear = nn.Linear(1,1)
def forward(self, x):
out = self.linear(x)
return out
if torch.cuda.is_available():
model = LinearRegression().cuda()
else :
model = LinearRegression()
定義損失函數(shù)和優(yōu)化函數(shù),這里使用均方誤差作為優(yōu)化函數(shù)昆淡,使用梯度下降進行優(yōu)化
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-3)
接著開始訓(xùn)練模型
num_epochs = 1000
for epoch in range(num_epochs):
inputs = Variable(x_train)
target = Variable(y_train)
#forward
out = model(inputs)
loss = criterion(out, target)
#backward
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch + 1) % 20 == 0:
print('Epoch[{} / {} ], loss: {:.6f}'.format(epoch + 1,
num_epochs, loss.data[0]))
#注意在終端跑中間不能加空的換行
out得到的是前向傳播的結(jié)果
loss得到損失函數(shù)锰瘸,然后歸零梯度,做反向傳播和更新參數(shù)
loss.data可以取出一個Tensor昂灵, 再通過loss.data[0]得到一個int或者float類型的數(shù)據(jù)避凝,這樣我們才能打印相應(yīng)的數(shù)據(jù)。
model.eval() #變成測試模式
predict = model(Variable(x_train)) #模型的數(shù)據(jù)
predict = predict.data.numpy() #變成numpy()數(shù)組
pyplot.plot(x_train.numpy(),y_train.numpy(), 'ro',label = "Original data") #將原來的數(shù)據(jù)用散點圖畫出
pyplot.plot(x_train.numpy(), predict, label = 'Fitting Line') #模型里的數(shù)據(jù)用直線畫出
pyplot.show()
多項式回歸
因為一次線性多項式精度欠佳
y = b + w1 * x + w2 * x^2 + w3 * x^3
首先需要預(yù)處理數(shù)據(jù)眨补,將數(shù)據(jù)變成一個矩陣的形式
在PyTorch里面使用torch.cat()函數(shù)實線Tensor的拼接:
def make_features(x):
x = x.unsqueeze(1) #用help(torch.unsqueeze)查看函數(shù)功能
return torch.cat([x ** i for i in range(1, 4)], 1)
#用help(torch.cat)查看函數(shù)功能
然后定義好真實的函數(shù):
W_target = torch.FloatTensor([0.5, 3, 2.4]).unsqueeze(1)
b_target = torch.FloatTensor([0.9])
#f(x)就是每次輸入一個x得到一個y的真實函數(shù)
def f(x):
#Approximated function.
return x.mm(W_target) + b_target[0]
進行訓(xùn)練的時候要采樣一些點管削,可以隨機生成一些數(shù)來得到每次的訓(xùn)練集:
def get_batch(batch_size = 32):
random = torch.randn(batch_size)
x = make_features(random)
y = f(x)
return Variable(x), Variable(y)
定義模型
class ploy_model(nn.Module):
def __init__(self):
super(ploy_model, self).__init__()
self.ploy = nn.Linear(3, 1)
def forward(self, x):
out = self.ploy(x)
return out
model = ploy_model()
定義損失函數(shù)和優(yōu)化器:
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-3)
開始訓(xùn)練模型:
epoch = 0
while True:
#Get data
batch_x, batch_y = get_batch()
#Forward pass
output = model(batch_x)
loss = criterion(output, batch_y)
print_loss = loss.data[0]
#Reset gradients
optimizer.zero_grad()
#Backward pass
loss.backward()
#updata parameters
optimizer.step()
epoch += 1
print('epoch:{} loss:{}'.format(epoch, print_loss))
if print_loss < 1e-3:
break
畫圖#不懂的函數(shù)直接參考書本上的解析吧,每次都用help函數(shù)代價太大了
x_test = np.linspace(-5,5,50).astype(np.float32)
#linspace生成一個序列撑螺,參數(shù)為開始含思,結(jié)束和個數(shù)
y_test = 0.9+0.5*x_test+np.square(x_test)*3+np.power(x_test,3)*2.4
model.eval()
predict = model(make_features(Variable(torch.from_numpy(x_test))).cuda())
predict = predict.cpu()
predict = predict.data.numpy()
plt.figure()
plt.plot(x_test,y_test,'-r',label = 'Original Data')
plt.scatter(x_test, predict)
plt.legend()
plt.show()
Logistic回歸的代碼實現(xiàn)
設(shè)定隨機數(shù)種子:
torch.manual_seed(2018)
1.讀取數(shù)據(jù)
with open('./Desktop/data.txt', 'r') as f:
data_list = f.readlines()
data_list = [i.split('\n')[0] for i in data_list]
data_list = [i.split(',') for i in data_list]
data = [(float(i[0]), float(i[1]), float(i[2])) for i in data_list]
#標(biāo)準化數(shù)據(jù)
x0_max = max([i[0] for i in data])
x1_max = max([i[1] for i in data])
data = [(i[0] / x0_max, i[1]/ x1_max, i[2]) for i in data])
#轉(zhuǎn)換數(shù)據(jù)
np_data = np.array(data, dtype='float32') # 轉(zhuǎn)換成 numpy array
x_data = torch.from_numpy(np_data[:, 0:2]) # 轉(zhuǎn)換成 Tensor, 大小是 [100, 2]
y_data = torch.from_numpy(np_data[:, -1]).unsqueeze(1) # 轉(zhuǎn)換成 Tensor,大小是 [100, 1]
2.用matplotlib將數(shù)據(jù)畫出來
x0 = list(filter(lambda x: x[-1] == 0.0, data)) #選擇第一類的點
x1 = list(filter(lambda x: x[-1] == 1.0, data)) #選擇第二類的點
plot_x0 = [i[0] for i in x0]
plot_y0 = [i[1] for i in x0]
plot_x1 = [i[0] for i in x1]
plot_y1 = [i[1] for i in x1]
x_data = torch.Tensor([i[0] for i in data])
y_data = torch.Tensor([i[1] for i in data])
# 定義 sigmoid 函數(shù)
def sigmoid(x):
return 1 / (1 + np.exp(-x))
# 畫出 sigmoid 的圖像
plot_x = np.arange(-10, 10.01, 0.01)
plot_y = sigmoid(plot_x)
plt.plot(plot_x, plot_y, 'r')
#畫出data.txt中的數(shù)據(jù)點
plt.plot(plot_x0_0, plot_x0_1, 'ro', label = 'x_0')
plt.plot(plot_x1_0, plot_x1_1, 'bo', label='x_1')
plt.legend(loc = 'best')
plt.show()
3.定義Logistic回歸的模型以及而分類問題的損失函數(shù)和優(yōu)化方法
class LogisticRegression(nn.Module):
def __init__(self):
super(LogisticRegression, self).__init__()
self.lr = nn.Linear(2, 1)
self.sm = nn.Sigmoid()
def forward(self, x):
x = self.lr(x)
x = self.sm(x)
return x
logistic_model = LogisticRegression()
criterion = nn.BCELoss()#二分類損失函數(shù)
optimizer = torch.optim.SGD(logistic_model.parameters(), lr = 1e-3,
momentum = 0.9)
4.訓(xùn)練模型
for epoch in range(50000):
x = Variable(x_data)
y = Variable(y_data)
#forward
out = logistic_model(x)
loss = criterion(out, y)
print_loss = loss.data[0]
mask = out.ge(0.5).float()
correct = (mask == y).sum()
acc = correct.data[0] / x.size(0)
#backward
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch + 1) % 1000 == 0:
print('*' * 10)
print('epoch {}'.format(epoch + 1))
print('loss is {:.4f}'.format(print_loss))
print('acc is {:.4f}'.format(acc))