計(jì)算機(jī)視覺(jué)入門過(guò)程(用時(shí)約為2個(gè)月):
一富拗,理論學(xué)習(xí)
1研叫,復(fù)習(xí)了線性代數(shù)和概率論
2玫荣,學(xué)習(xí)了python的numpy庫(kù)和pytorch庫(kù)的使用
3巩螃,李飛飛cs213n課程視頻
4演怎,吳恩達(dá)深度學(xué)習(xí)課程視頻
二.實(shí)踐
1匕争,搭建深度學(xué)習(xí)環(huán)境
2避乏,mnist最高達(dá)到98.2%,cifar最高達(dá)到94.7%
因?yàn)槔铒w飛cs213n課程和吳恩達(dá)深度學(xué)習(xí)課程都是全英課程甘桑,所以在CSDN找了對(duì)應(yīng)的筆記拍皮,邊看筆記邊看視頻,這樣學(xué)起來(lái)輕松一些跑杭。
(課程視頻在B站可以找到)
筆記鏈接如下:
2.吳恩達(dá)深度學(xué)習(xí)課程:https://blog.csdn.net/wuzhongqiang/article/details/89702268?utm_source=app&app_version=4.8.0&code=app_1562916241&uLinkId=usr1mkqgl919blen
還有一些在學(xué)習(xí)過(guò)程中疑問(wèn)百度到的結(jié)果:
1.numpy 中的隨機(jī)打亂數(shù)據(jù)方法np.random.shuffle
https://blog.csdn.net/weixin_43896259/article/details/106116955
2,圖像預(yù)處理Transforms與normalize
https://blog.csdn.net/aidanmo/article/details/104059612
3.關(guān)于transforms.Normalize()函數(shù)
https://blog.csdn.net/jzwong/article/details/104272600
4铆帽,numpy.floor()函數(shù)作用:向下取整
5.torch.utils.data.DataLoader()詳解
https://blog.csdn.net/qq_40520596/article/details/106981039
https://www.cnblogs.com/pogeba/p/13890846.html
Pytorch里面nn.CrossEntropyLoss的含義
https://blog.csdn.net/lang_yubo/article/details/105108174
model.train()和model.eval()用法和區(qū)別
https://zhuanlan.zhihu.com/p/357075502
以optim.SGD為例介紹pytorch優(yōu)化器
https://www.sogou.com/link?url=hedJjaC291OV7dVab-QfvHtdr0qpeLU_JZ6a8oyfxdi0c29X6nLNTA..
下面來(lái)講講mnist和cifar數(shù)據(jù)集的訓(xùn)練過(guò)程
1.mnist數(shù)據(jù)集
from torchvision import datasets, transforms
import numpy as np
from sklearn.metrics import accuracy_score
import torch
# from tqdm import tqdm
import time
# matrix func
def knn(train_x, train_y, test_x, test_y,k):
? ? since = time.time()? ? # 獲取當(dāng)前時(shí)間
? ? m = test_x.size(0)? ? # test_s是torch.tensor類,m是在求它的數(shù)據(jù)個(gè)數(shù)
? ? n = train_x.size(0)
? ? # 計(jì)算歐幾里得距離德谅,得到m*n矩陣爹橱,ij表示第i個(gè)測(cè)試圖片與第j個(gè)圖片的歐幾里得距離
? ? print("cal dist matrix")
? ? xx = (test_x ** 2).sum(dim=1, keepdim=True).expand(m, n)
? ? # **2為對(duì)每個(gè)元素平方,.sum中dim=1,對(duì)行求和窄做,keepdim=True時(shí)保持二維愧驱,=false時(shí)降一維慰技。text原來(lái)是m*1,.expand后變成m*n组砚。
? ? yy = (train_x ** 2).sum(dim=1, keepdim=True).expand(n, m).transpose(0, 1)
? ? dist_mat = xx + yy - 2 * test_x.matmul(train_x.transpose(0, 1))
? ? mink_idxs = dist_mat.argsort(dim=-1)
? ? res = []
? ? for idxs in mink_idxs:
? ? ? ? # voting
? ? ? ? res.append(np.bincount(np.array([train_y[idx] for idx in idxs[:k]])).argmax())
? ? assert len(res) == len(test_y)
? ? print("識(shí)別率:", accuracy_score(test_y, res))
? ? time_elapsed = time.time() - since
? ? print('KNN mat training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
if __name__ == "__main__":
? ? train_dataset = datasets.CIFAR10(root="./data2", transform=transforms.ToTensor(), train=True)
? ? #參數(shù)說(shuō)明: - root : processed/training.pt 和 processed/test.pt 的主目錄
? ? # -train : True = 訓(xùn)練集, False = 測(cè)試集
? ? # - download : True = 從互聯(lián)網(wǎng)上下載數(shù)據(jù)集吻商,并把數(shù)據(jù)集放在root目錄下.
? ? ? ? ? ? ? ? # 如果數(shù)據(jù)集之前下載過(guò),將處理過(guò)的數(shù)據(jù)(minist.py中有相關(guān)函數(shù))放在processed文件夾下糟红。
? ? test_dataset = datasets.CIFAR10(root="./data2", transform=transforms.ToTensor(), train=False)
? ? # build train&test data
? ? train_x = []
? ? train_y = []
? ? for i in range(len(train_dataset)):? #i為int艾帐,從0到len(train_dataset)-1
? ? ? ? img, target = train_dataset[i]? ? #train_dataset[i]是二元組
? ? ? ? train_x.append(img.view(-1))
? ? ? ? # view(-1)將多維img(tensor([? [[],[]] , [[],[]] ])轉(zhuǎn)化為一維tensor([])
? ? ? ? # ( train_x()是二維[tensor([ , , ,]) , tensor([ , , ,])],第一維是tensor([ , , ,]) )盆偿,
? ? ? ? # 然后加進(jìn)去train_x數(shù)組里面
? ? ? ? train_y.append(target)
? ? ? ? if i > 50000:
? ? ? ? ? ? break
? ? # print(set(train_y))
? ? test_x = []
? ? test_y = []
? ? for i in range(len(test_dataset)):
? ? ? ? img, target = test_dataset[i]
? ? ? ? test_x.append(img.view(-1))
? ? ? ? test_y.append(target)
? ? ? ? if i > 9000:
? ? ? ? ? ? break
? ? print("classes:", set(train_y))? ? ? #將所有標(biāo)簽類輸出柒爸,因?yàn)閟et變成集合后無(wú)重復(fù)
? ? knn(torch.stack(train_x), train_y, torch.stack(test_x), test_y, 7)#stack將[tensor([]),tensor([])]轉(zhuǎn)化為tensor([[],[]])
? ? # knn_by_iter(torch.stack(train_x), train_y, torch.stack(test_x), test_y, 10)
2.cifar數(shù)據(jù)集
import torch
import numpy as np
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
#將數(shù)據(jù)轉(zhuǎn)換為torch.FloatTensor,并標(biāo)準(zhǔn)化
#ToTensor()能夠把灰度范圍從0-255變換到0-1之間,而后面的transform.Normalize()則把0-1變換到(-1,1).
transform = transforms.Compose([
? ? transforms.ToTensor(),
? ? transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])
#選擇訓(xùn)練集與測(cè)試集的數(shù)據(jù)
train_data = datasets.CIFAR10( 'data',train=True,download=False,transform=transform)
test_data = datasets.CIFAR10('data',train=True,download=False,transform=transform)
# percentage of training set to use as validation
valid_size = 0.2
#obtain training indices that will be used for validation劃分訓(xùn)練集和驗(yàn)證集
num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int (np.floor(valid_size*num_train))
train_idx,valid_idx = indices[split:],indices[:split]
#define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)
#加載數(shù)據(jù)
num_workers = 0
#每批加載16張圖片
batch_size = 16
#perpare data loaders(combine dataset and sampler)
train_loader = torch.utils.data.DataLoader(train_data,batch_size=batch_size,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? sampler=train_sampler,num_workers=num_workers)
valid_loader = torch.utils.data.DataLoader(train_data,batch_size=batch_size,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? sampler=valid_sampler,num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_data,batch_size=batch_size,
? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? num_workers=num_workers)
#10classes
classes = ['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']
# 定義卷積神經(jīng)網(wǎng)絡(luò)結(jié)構(gòu)
class Net(nn.Module):
? ? def __init__(self):
? ? ? ? super(Net,self).__init__()
? ? ? ? #卷積層(32*32*3的圖像)
? ? ? ? self.conv1 = nn.Conv2d(3,16,3,padding=1)
? ? ? ? #卷積層(16*16*16)
? ? ? ? self.conv2 = nn.Conv2d(16,32,3,padding=1)
? ? ? ? #卷積層(8*8*32)
? ? ? ? self.conv3 = nn.Conv2d(32,64,3,padding=1)
? ? ? ? #最大池化層
? ? ? ? self.pool = nn.MaxPool2d(2,2)
? ? ? ? #LINEAR LAYER(64*4*4-->500)
? ? ? ? self.fc1 = nn.Linear(64*4*4,500)
? ? ? ? #linear層(500事扭,10)
? ? ? ? self.fc2 = nn.Linear(500,10)
? ? ? ? #dropout(p=0.3)
? ? ? ? self.dropout = nn.Dropout(0.3)
? ? def forward(self,x):
? ? ? ? #add sequence of convolutinal and max pooling layers
? ? ? ? x = self.pool(F.relu(self.conv1(x)))
? ? ? ? x = self.pool(F.relu(self.conv2(x)))
? ? ? ? x = self.pool(F.relu(self.conv3(x)))
? ? ? ? #flatten image input
? ? ? ? x = x.view(-1,64*4*4)
? ? ? ? #add dropout layer
? ? ? ? x = self.dropout(x)
? ? ? ? # add 1st hidden layer,with relu activation function
? ? ? ? x = F.relu(self.fc1(x))
? ? ? ? # add dropout layer
? ? ? ? x = self.dropout(x)
? ? ? ? # add 2nd hidden layer,with relu activation function
? ? ? ? x = self.fc2(x)
? ? ? ? return x
#create a complete CNN
model = Net()
print (model)
#檢查是否可以利用GPU
train_on_gpu = torch.cuda.is_available()
#
# if not train_on_gpu:
#? ? print ('CUDA IS NOT AVAILABLE!')
# else:
#? ? print('CUDA IS AVAILABEL!')
#可以將模型加載到GPU上去
if train_on_gpu:
? ? model.cuda()
#選擇損失函數(shù)與優(yōu)化函數(shù)
#使用交叉熵?fù)p失函數(shù)
criterion = nn.CrossEntropyLoss()
#使用隨機(jī)梯度下降揍鸟,學(xué)習(xí)率為0.01
optimizer = optim.SGD(model.parameters(),lr=0.01)
# 訓(xùn)練模型的次數(shù)
n_epochs = 40
valid_loss_min = np.Inf #track change in calidation loss
for epoch in range(1,n_epochs+1):
? ? #keep tracks of training and validation loss
? ? train_loss = 0.0
? ? valid_loss = 0.0
? ? ##################
? ? # 訓(xùn)練集的模型 #
? ? ##################
? ? model.train()
? ? for data,target in train_loader:
? ? ? ? #move tensors to gpu if cuda is available
? ? ? ? if train_on_gpu:
? ? ? ? ? ? data,target = data.cuda(),target.cuda()
? ? ? ? #clear the gradients of all optimized variables
? ? ? ? optimizer.zero_grad()
? ? ? ? #forward pass:compute predicted outputs by passing inputs to the model
? ? ? ? output = model(data)
? ? ? ? # calculate the batch loss
? ? ? ? loss = criterion(output,target)
? ? ? ? #backward pass:compute gradient of the loss with respect to model parameters
? ? ? ? loss.backward()
? ? ? ? #perform a single optimization step(parameters updata)
? ? ? ? optimizer.step()
? ? ? ? #updata training loss
? ? ? ? train_loss += loss.item()*data.size(0)
? ? ###############
? ? # 驗(yàn)證集模型 #
? ? ##################
? ? model.eval()
? ? for data,target in valid_loader:
? ? ? ? if train_on_gpu:
? ? ? ? ? ? data,target = data.cuda(),target.cuda()
? ? ? ? output = model(data)
? ? ? ? loss = criterion(output,target)
? ? ? ? valid_loss += loss.item()*data.size(0)
? ? #計(jì)算平均損失
? ? train_loss = train_loss/len(train_loader.sampler)
? ? valid_loss = valid_loss/len(valid_loader.sampler)
? ? #顯示訓(xùn)練集與驗(yàn)證集的損失函數(shù)
? ? print('Epoch:{} \tTraining loss:{} \tValidation loss:{}'.format(
? ? ? ? epoch,train_loss,valid_loss
? ? ))
? ? #如果驗(yàn)證集損失函數(shù)減少,就保存模型
? ? if valid_loss <= valid_loss_min:
? ? ? ? print ('Validation loss decreased ({} --> {}). Saving model ...'.format(
? ? ? ? ? ? valid_loss_min,valid_loss
? ? ? ? ))
? ? ? ? torch.save(model.state_dict(),'model_cifar.pt')
? ? ? ? valid_loss_min = valid_loss
model.load_state_dict(torch.load('model_cifar.pt',map_location=torch.device('cpu')))
# track test loss
test_loss = 0.0
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
model.eval()
# iterate over test data
for data, target in test_loader:
? ? # move tensors to GPU if CUDA is available
? ? if train_on_gpu:
? ? ? ? data, target = data.cuda(), target.cuda()
? ? # forward pass: compute predicted outputs by passing inputs to the model
? ? output = model(data)
? ? # calculate the batch loss
? ? loss = criterion(output, target)
? ? # update test loss
? ? test_loss += loss.item()*data.size(0)
? ? # convert output probabilities to predicted class
? ? _, pred = torch.max(output, 1)
? ? # compare predictions to true label
? ? correct_tensor = pred.eq(target.data.view_as(pred))
? ? correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
? ? # calculate test accuracy for each object class
? ? for i in range(batch_size):
? ? ? ? label = target.data[i]
? ? ? ? class_correct[label] += correct[i].item()
? ? ? ? class_total[label] += 1
# average test loss
test_loss = test_loss/len(test_loader.dataset)
print('Test Loss: {:.6f}\n'.format(test_loss))
for i in range(10):
? ? if class_total[i] > 0:
? ? ? ? print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
? ? ? ? ? ? classes[i], 100 * class_correct[i] / class_total[i],
? ? ? ? ? ? np.sum(class_correct[i]), np.sum(class_total[i])))
? ? else:
? ? ? ? print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))
print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
? ? 100. * np.sum(class_correct) / np.sum(class_total),
? ? np.sum(class_correct), np.sum(class_total)))