CNN藏杖,即卷積神經(jīng)網(wǎng)絡(luò)将塑,主要用于圖像識(shí)別,分類(lèi)蝌麸。由輸入層点寥,卷積層,池化層来吩,全連接層(Affline層)敢辩,Softmax層疊加而成。卷積神經(jīng)網(wǎng)絡(luò)中還有一個(gè)非常重要的結(jié)構(gòu):過(guò)濾器弟疆,它作用于層與層之間(卷積層與池化層)戚长,決定了怎樣對(duì)數(shù)據(jù)進(jìn)行卷積和池化。下面先直觀理解下卷積和池化
二維卷積
三維卷積
池化
卷積用于提取高層次特征,池化用于縮小參數(shù)剥纷。一般為一層卷積加一層池化反復(fù)疊加或多層卷積加一層池化痹籍。
全連接層用于卷積池化后,對(duì)數(shù)據(jù)列化然后經(jīng)過(guò)一兩層全連接層晦鞋,得出結(jié)果蹲缠。
softmax用于最后的分類(lèi)
好了棺克,知道卷積池化,下面就來(lái)實(shí)現(xiàn)最簡(jiǎn)單的一個(gè)卷積網(wǎng)絡(luò):
CNN實(shí)現(xiàn)手寫(xiě)數(shù)字識(shí)別
Package
import sys ,os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf #只是用來(lái)加載mnist數(shù)據(jù)集
from PIL import Image
import pandas as pd
import math
加載MNIST數(shù)據(jù)集
def one_hot_label(y):
one_hot_label = np.zeros((y.shape[0],10))
y = y.reshape(y.shape[0])
one_hot_label[range(y.shape[0]),y] = 1
return one_hot_label
# #(訓(xùn)練圖像斤讥,訓(xùn)練標(biāo)簽)纱皆,(測(cè)試圖像,測(cè)試標(biāo)簽)
# # mnist的圖像均為28*28尺寸的數(shù)據(jù)芭商,通道為1
(x_train_origin,t_train_origin),(x_test_origin,t_test_origin) = tf.keras.datasets.mnist.load_data()
X_train = x_train_origin/255.0
X_test = x_test_origin/255.0
m,h,w = x_train_origin.shape
X_train = X_train.reshape((m,1,h,w))
y_train = one_hot_label(t_train_origin)
m,h,w = x_test_origin.shape
X_test = X_test.reshape((m,1,h,w))
y_test = one_hot_label(t_test_origin)
print("shape of x_train is :"+repr(X_train.shape))
print("shape of t_train is :"+repr(y_train.shape))
print("shape of x_test is :"+repr(X_test.shape))
print("shape of t_test is :"+repr(y_test.shape))
shape of x_train is :(60000, 1, 28, 28)
shape of t_train is :(60000, 10)
shape of x_test is :(10000, 1, 28, 28)
shape of t_test is :(10000, 10)
顯示圖像
index = 0
plt.imshow(X_train[index].reshape((28,28)),cmap = plt.cm.gray)
print("y is:"+str(np.argmax(y_train[index])))
y is:5
激活函數(shù)
def relu(input_X):
"""
Arguments:
input_X -- a numpy array
Return :
A: a numpy array. let each elements in array all greater or equal 0
"""
A = np.where(input_X < 0 ,0,input_X)
return A
def softmax(input_X):
"""
Arguments:
input_X -- a numpy array
Return :
A: a numpy array same shape with input_X
"""
exp_a = np.exp(input_X)
sum_exp_a = np.sum(exp_a,axis=1)
sum_exp_a = sum_exp_a.reshape(input_X.shape[0],-1)
ret = exp_a/sum_exp_a
# print(ret)
return ret
損失函數(shù)
def cross_entropy_error(labels,logits):
return -np.sum(labels*np.log(logits))
卷積層
class Convolution:
def __init__(self,W,fb,stride = 1,pad = 0):
"""
W-- 濾波器權(quán)重派草,shape為(FN,NC,FH,FW),FN 為濾波器的個(gè)數(shù)
fb -- 濾波器的偏置,shape 為(1,FN)
stride -- 步長(zhǎng)
pad -- 填充個(gè)數(shù)
"""
self.W = W
self.fb = fb
self.stride = stride
self.pad = pad
self.col_X = None
self.X = None
self.col_W = None
self.dW = None
self.db = None
self.out_shape = None
# self.out = None
def forward (self ,input_X):
"""
input_X-- shape為(m,nc,height,width)
"""
self.X = input_X
FN,NC,FH,FW = self.W.shape
m,input_nc, input_h,input_w = self.X.shape
#先計(jì)算輸出的height和widt
out_h = int((input_h+2*self.pad-FH)/self.stride + 1)
out_w = int((input_w+2*self.pad-FW)/self.stride + 1)
#將輸入數(shù)據(jù)展開(kāi)成二維數(shù)組蓉坎,shape為(m*out_h*out_w,FH*FW*C)
self.col_X = col_X = im2col2(self.X,FH,FW,self.stride,self.pad)
#將濾波器一個(gè)個(gè)按列展開(kāi)(FH*FW*C,FN)
self.col_W = col_W = self.W.reshape(FN,-1).T
out = np.dot(col_X,col_W)+self.fb
out = out.T
out = out.reshape(m,FN,out_h,out_w)
self.out_shape = out.shape
return out
def backward(self, dz,learning_rate):
#print("==== Conv backbward ==== ")
assert(dz.shape == self.out_shape)
FN,NC,FH,FW = self.W.shape
o_FN,o_NC,o_FH,o_FW = self.out_shape
col_dz = dz.reshape(o_NC,-1)
col_dz = col_dz.T
self.dW = np.dot(self.col_X.T,col_dz) #shape is (FH*FW*C,FN)
self.db = np.sum(col_dz,axis=0,keepdims=True)
self.dW = self.dW.T.reshape(self.W.shape)
self.db = self.db.reshape(self.fb.shape)
d_col_x = np.dot(col_dz,self.col_W.T) #shape is (m*out_h*out_w,FH,FW*C)
dx = col2im2(d_col_x,self.X.shape,FH,FW,stride=1)
assert(dx.shape == self.X.shape)
#更新W和b
self.W = self.W - learning_rate*self.dW
self.fb = self.fb -learning_rate*self.db
return dx
池化層
class Pooling:
def __init__(self,pool_h,pool_w,stride = 1,pad = 0):
self.pool_h = pool_h
self.pool_w = pool_w
self.stride = stride
self.pad = pad
self.X = None
self.arg_max = None
def forward ( self,input_X) :
"""
前向傳播
input_X-- shape為(m,nc,height,width)
"""
self.X = input_X
N , C, H, W = input_X.shape
out_h = int(1+(H-self.pool_h)/self.stride)
out_w = int(1+(W-self.pool_w)/self.stride)
#展開(kāi)
col = im2col2(input_X,self.pool_h,self.pool_w,self.stride,self.pad)
col = col.reshape(-1,self.pool_h*self.pool_w)
arg_max = np.argmax(col,axis=1)
#最大值
out = np.max(col,axis=1)
out =out.T.reshape(N,C,out_h,out_w)
self.arg_max = arg_max
return out
def backward(self ,dz):
"""
反向傳播
Arguments:
dz-- out的導(dǎo)數(shù)澳眷,shape與out 一致
Return:
返回前向傳播是的input_X的導(dǎo)數(shù)
"""
pool_size = self.pool_h*self.pool_w
dmax = np.zeros((dz.size,pool_size))
dmax[np.arange(self.arg_max.size),self.arg_max.flatten()] = dz.flatten()
dx = col2im2(dmax,out_shape=self.X.shape,fh=self.pool_h,fw=self.pool_w,stride=self.stride)
return dx
Relu層
class Relu:
def __init__(self):
self.mask = None
def forward(self ,X):
self.mask = X <= 0
out = X
out[self.mask] = 0
return out
def backward(self,dz):
dz[self.mask] = 0
dx = dz
return dx
SoftMax層
class SoftMax:
def __init__ (self):
self.y_hat = None
def forward(self,X):
self.y_hat = softmax(X)
return self.y_hat
def backward(self,labels):
m = labels.shape[0]
dx = (self.y_hat - labels)
return dx
def compute_cost(logits,label):
return cross_entropy_error(label,logits)
Affine FC層
class Affine:
def __init__(self,W,b):
self.W = W # shape is (n_x,n_unit)
self.b = b # shape is(1,n_unit)
self.X = None
self.origin_x_shape = None
self.dW = None
self.db = None
self.out_shape =None
def forward(self,X):
self.origin_x_shape = X.shape
self.X = X.reshape(X.shape[0],-1)#(m,n)
out = np.dot(self.X, self.W)+self.b
self.out_shape = out.shape
return out
def backward(self,dz,learning_rate):
"""
dz-- 前面的導(dǎo)數(shù)
"""
# print("Affine backward")
# print(self.X.shape)
# print(dz.shape)
# print(self.W.shape)
assert(dz.shape == self.out_shape)
m = self.X.shape[0]
self.dW = np.dot(self.X.T,dz)/m
self.db = np.sum(dz,axis=0,keepdims=True)/m
assert(self.dW.shape == self.W.shape)
assert(self.db.shape == self.b.shape)
dx = np.dot(dz,self.W.T)
assert(dx.shape == self.X.shape)
dx = dx.reshape(self.origin_x_shape) # 保持與之前的x一樣的shape
#更新W和b
self.W = self.W-learning_rate*self.dW
self.b = self.b - learning_rate*self.db
return dx
模型
class SimpleConvNet:
def __init__(self):
self.X = None
self.Y= None
self.layers = []
def add_conv_layer(self,n_filter,n_c , f, stride=1, pad=0):
"""
添加一層卷積層
Arguments:
n_c -- 輸入數(shù)據(jù)通道數(shù),也即卷積層的通道數(shù)
n_filter -- 濾波器的個(gè)數(shù)
f --濾波器的長(zhǎng)/寬
Return :
Conv -- 卷積層
"""
# 初始化W蛉艾,b
W = np.random.randn(n_filter, n_c, f, f)*0.01
fb = np.zeros((1, n_filter))
# 卷積層
Conv = Convolution(W, fb, stride=stride, pad=pad)
return Conv
def add_maxpool_layer(self, pool_shape, stride=1, pad=0):
"""
添加一層池化層
Arguments:
pool_shape -- 濾波器的shape
f -- 濾波器大小
Return :
Pool -- 初始化的Pool類(lèi)
"""
pool_h, pool_w = pool_shape
pool = Pooling(pool_h, pool_w, stride=stride, pad=pad)
return pool
def add_affine(self,n_x, n_units):
"""
添加一層全連接層
Arguments:
n_x -- 輸入個(gè)數(shù)
n_units -- 神經(jīng)元個(gè)數(shù)
Return :
fc_layer -- Affine層對(duì)象
"""
W= np.random.randn(n_x, n_units)*0.01
b = np.zeros((1, n_units))
fc_layer = Affine(W,b)
return fc_layer
def add_relu(self):
relu_layer = Relu()
return relu_layer
def add_softmax(self):
softmax_layer = SoftMax()
return softmax_layer
#計(jì)算卷積或池化后的H和W
def cacl_out_hw(self,HW,f,stride = 1,pad = 0):
return (HW+2*pad - f)/stride+1
def init_model(self,train_X,n_classes):
"""
初始化一個(gè)卷積層網(wǎng)絡(luò)
"""
N,C,H,W = train_X.shape
#卷積層
n_filter = 4
f = 7
conv_layer = self.add_conv_layer(n_filter= n_filter,n_c=C,f=f,stride=1)
out_h = self.cacl_out_hw(H,f)
out_w = self.cacl_out_hw(W,f)
out_ch = n_filter
self.layers.append(conv_layer)
#Relu
relu_layer = self.add_relu()
self.layers.append(relu_layer)
#池化
f = 2
pool_layer = self.add_maxpool_layer(pool_shape=(f,f),stride=2)
out_h = self.cacl_out_hw(out_h,f,stride=2)
out_w = self.cacl_out_hw(out_w,f,stride=2)
#out_ch 不改變
self.layers.append(pool_layer)
#Affine層
n_x = int(out_h*out_w*out_ch)
n_units = 32
fc_layer = self.add_affine(n_x=n_x,n_units=n_units)
self.layers.append(fc_layer)
#Relu
relu_layer = self.add_relu()
self.layers.append(relu_layer)
#Affine
fc_layer = self.add_affine(n_x=n_units,n_units=n_classes)
self.layers.append(fc_layer)
#SoftMax
softmax_layer = self.add_softmax()
self.layers.append(softmax_layer)
def forward_progation(self,train_X, print_out = False):
"""
前向傳播
Arguments:
train_X -- 訓(xùn)練數(shù)據(jù)
f -- 濾波器大小
Return :
Z-- 前向傳播的結(jié)果
loss -- 損失值
"""
N,C,H,W = train_X.shape
index = 0
# 卷積層
conv_layer = self.layers[index]
X = conv_layer.forward(train_X)
index =index+1
if print_out:
print("卷積之后:"+str(X.shape))
# Relu
relu_layer = self.layers[index]
index =index+1
X = relu_layer.forward(X)
if print_out:
print("Relu:"+str(X.shape))
# 池化層
pool_layer = self.layers[index]
index =index+1
X = pool_layer.forward(X)
if print_out:
print("池化:"+str(X.shape))
#Affine層
fc_layer = self.layers[index]
index =index+1
X = fc_layer.forward(X)
if print_out:
print("Affline 層的X:"+str(X.shape))
#Relu
relu_layer = self.layers[index]
index =index+1
X = relu_layer.forward(X)
if print_out:
print("Relu 層的X:"+str(X.shape))
#Affine層
fc_layer = self.layers[index]
index =index+1
X = fc_layer.forward(X)
if print_out:
print("Affline 層的X:"+str(X.shape))
#SoftMax層
sofmax_layer = self.layers[index]
index =index+1
A = sofmax_layer.forward(X)
if print_out:
print("Softmax 層的X:"+str(A.shape))
return A
def back_progation(self,train_y,learning_rate):
"""
反向傳播
Arguments:
"""
index = len(self.layers)-1
sofmax_layer = self.layers[index]
index -= 1
dz = sofmax_layer.backward(train_y)
fc_layer = self.layers[index]
dz = fc_layer.backward(dz,learning_rate=learning_rate)
index -= 1
relu_layer = self.layers[index]
dz = relu_layer.backward(dz)
index -= 1
fc_layer = self.layers[index]
dz = fc_layer.backward(dz,learning_rate=learning_rate)
index -= 1
pool_layer = self.layers[index]
dz = pool_layer.backward(dz)
index -= 1
relu_layer = self.layers[index]
dz = relu_layer.backward(dz)
index -= 1
conv_layer = self.layers[index]
conv_layer.backward(dz,learning_rate=learning_rate)
index -= 1
def get_minibatch(self,batch_data,minibatch_size,num):
m_examples = batch_data.shape[0]
minibatches = math.ceil( m_examples / minibatch_size)
if(num < minibatches):
return batch_data[num*minibatch_size:(num+1)*minibatch_size]
else:
return batch_data[num*minibatch_size:m_examples]
def optimize(self,train_X, train_y,minibatch_size,learning_rate=0.05,num_iters=500):
"""
優(yōu)化方法
Arguments:
train_X -- 訓(xùn)練數(shù)據(jù)
train_y -- 訓(xùn)練數(shù)據(jù)的標(biāo)簽
learning_rate -- 學(xué)習(xí)率
num_iters -- 迭代次數(shù)
minibatch_size
"""
m = train_X.shape[0]
num_batches = math.ceil(m / minibatch_size)
costs = []
for iteration in range(num_iters):
iter_cost = 0
for batch_num in range(num_batches):
minibatch_X = self.get_minibatch(train_X,minibatch_size,batch_num)
minibatch_y = self.get_minibatch(train_y,minibatch_size,batch_num)
# 前向傳播
A = self.forward_progation(minibatch_X,print_out=False)
#損失:
cost = compute_cost (A,minibatch_y)
#反向傳播
self.back_progation(minibatch_y,learning_rate)
if(iteration%100 == 0):
iter_cost += cost/num_batches
if(iteration%100 == 0):
print("After %d iters ,cost is :%g" %(iteration,iter_cost))
costs.append(iter_cost)
#畫(huà)出損失函數(shù)圖
plt.plot(costs)
plt.xlabel("iterations/hundreds")
plt.ylabel("costs")
plt.show()
def predicate(self, train_X):
"""
預(yù)測(cè)
"""
logits = self.forward_progation(train_X)
one_hot = np.zeros_like(logits)
one_hot[range(train_X.shape[0]),np.argmax(logits,axis=1)] = 1
return one_hot
def fit(self,train_X, train_y):
"""
訓(xùn)練
"""
self.X = train_X
self.Y = train_y
n_y = train_y.shape[1]
m = train_X.shape[0]
#初始化模型
self.init_model(train_X,n_classes=n_y)
self.optimize(train_X, train_y,minibatch_size=10,learning_rate=0.05,num_iters=800)
logits = self.predicate(train_X)
accuracy = np.sum(np.argmax(logits,axis=1) == np.argmax(train_y,axis=1))/m
print("訓(xùn)練集的準(zhǔn)確率為:%g" %(accuracy))
convNet = SimpleConvNet()
#拿20張先做實(shí)驗(yàn)
train_X = X_train[0:10]
train_y = y_train[0:10]
convNet.fit(train_X,train_y)
After 0 iters ,cost is :23.0254
After 100 iters ,cost is :14.5255
After 200 iters ,cost is :6.01782
After 300 iters ,cost is :5.71148
After 400 iters ,cost is :5.63212
After 500 iters ,cost is :5.45006
After 600 iters ,cost is :5.05849
After 700 iters ,cost is :4.29723
訓(xùn)練集的準(zhǔn)確率為:0.9
預(yù)測(cè)
logits = convNet.predicate(X_train[0:10])
m = 10
accuracy = np.sum(np.argmax(logits,axis=1) == np.argmax(y_train[0:10],axis=1))/m
print("訓(xùn)練的準(zhǔn)確率為:%g" %(accuracy))
訓(xùn)練的準(zhǔn)確率為:0.9
index = 0
plt.imshow(X_train[index].reshape((28,28)),cmap = plt.cm.gray)
print("y is:"+str(np.argmax(y_train[index])))
print("your predicate result is :"+str(np.argmax(logits[index])))
y is:5
your predicate result is :5
logits = convNet.predicate(X_test)
m = X_test.shape[0]
accuracy = np.sum(np.argmax(logits,axis=1) == np.argmax(y_test,axis=1))/m
print("測(cè)試的準(zhǔn)確率為:%g" %(accuracy))
測(cè)試的準(zhǔn)確率為:0.1031
因?yàn)橛?xùn)練的數(shù)據(jù)只有10個(gè)钳踊,所以測(cè)試的準(zhǔn)確率只有0.1。
本文的目的是實(shí)現(xiàn)CNN勿侯,了解CNN的過(guò)程拓瞪。有一些輔助函數(shù)沒(méi)有顯示出來(lái),用于將圖像轉(zhuǎn)成矩陣數(shù)據(jù)助琐,方便卷積操作祭埂,然后再將其轉(zhuǎn)換成圖像用于后面的操作。如有興趣兵钮,可以查看完整代碼蛆橡。完整代碼鏈接:https://github.com/huanhuang/SimpleConvNet.git
實(shí)現(xiàn)過(guò)程有參考《深度學(xué)習(xí)入門(mén)》的 《卷積神經(jīng)網(wǎng)絡(luò)》那章,借用其思想掘譬,但實(shí)現(xiàn)有改動(dòng)泰演,對(duì)于圖像轉(zhuǎn)矩陣,矩陣轉(zhuǎn)圖像進(jìn)行了改寫(xiě)葱轩,更易理解睦焕。