一 介紹:
通過卷積神經(jīng)網(wǎng)絡(luò)對哈士奇狞谱、拉布拉多、金毛禁漓、柯基犬的數(shù)據(jù)集進(jìn)行學(xué)習(xí) 跟衅,訓(xùn)練出一個可以識別出這四種狗的品種的模型 。本文適合有mnist手寫數(shù)字識別基礎(chǔ)的讀者觀看 播歼,如果你對深度學(xué)習(xí)只有一些模糊概念的話伶跷,建議你在閱讀本文之嘗試此篇文章:Keras入門數(shù)字手寫識別
二 :數(shù)據(jù)集
數(shù)據(jù)集 提取碼: 1ik1
通過爬蟲自行構(gòu)建的,共2700余張狗的品種圖片 秘狞,分別放置在分好類別HSQ叭莫、LBLD、JM谒撼、KJQ (文件夾之所以設(shè)置為英文是因為opencv在open圖片時候不支持路徑存在中文) 的四個文件夾內(nèi) 食寡。如果你需要爬蟲源碼請訪問:Github
準(zhǔn)備數(shù)據(jù)集:
圖片預(yù)處理
class Datasets:
def __init__(self,filepath,fields,image_size):
self.images = []
self.labels = []
self.classs = []
self.rename = []
self.fields = fields
self.filepath = filepath
self.image_size=image_size
def Rename(self):
for field in self.fields:
label = self.fields.index(field)
count = 0
for file in os.listdir(self.filepath+field):
#print(self.filepath+field+'/'+str(label)+'_'+str(count)+field+'.jpg')
count += 1
os.rename(self.filepath+field+'/'+file,self.filepath+field+'/'+str(label+1)+'_'+str(count)+'.jpg')
def Load_img(self):
for field in self.fields:
index = self.fields.index(field)
for file in os.listdir(self.filepath+field):
try:
image = cv2.imread(self.filepath+field+'/'+file)
image = cv2.resize(image, (self.image_size, self.image_size), cv2.INTER_LINEAR)
image = image.astype(np.float32)
image = np.multiply(image, 1.0 / 255.0)
self.images.append(image)
label = np.zeros(len(self.fields))
label[index] = 1.0
self.labels.append(label)
self.classs.append(field)
self.rename.append(file)
except:
continue
images = np.array(self.images)
labels = np.array(self.labels)
fields = np.array(self.classs)
filenm = np.array(self.rename)
return images,labels,fields,filenm
構(gòu)造一個圖片預(yù)處理的類Datasets,主要功能就是把圖片改名字、并且轉(zhuǎn)為cnn需要的數(shù)據(jù)類型 廓潜。接受三個參數(shù)數(shù)據(jù)集文件夾(總文件夾)抵皱、類別(即為分類文件夾名字)以及調(diào)整的圖像像素大小。
返回的參數(shù)為辩蛋,包含所有圖像像素信息的列表的array 呻畸、圖像分類信息的列表的array ,這里的label的形式為[0,0,1,0]表示屬于第三類即:JM(金毛)悼院。fields為labels對應(yīng)的說明 伤为。
數(shù)據(jù)讀取
def Read_datas(self,validation_size):
class Temp(object):
pass
data_sets = Temp()
images,labels,fields,filenm=self.Load_img()
images,labels,fields,filenm=shuffle(images,labels,fields,filenm)#隨機(jī)打亂序列
if isinstance(validation_size, float):
validation_size = int(validation_size * images.shape[0])
train_images = images[validation_size:]
train_labels = labels[validation_size:]
train_fields = fields[validation_size:]
train_filenm = filenm[validation_size:]
validation_images = images[:validation_size]
validation_labels = labels[:validation_size]
validation_fields = fields[:validation_size]
validation_filenm = filenm[:validation_size]
data_sets.train = DataSet(train_images, train_labels, train_fields,train_filenm)
data_sets.valid = DataSet(validation_images, validation_labels, validation_fields,validation_filenm)
return data_sets
class DataSet(object):
def __init__(self, images, labels, fields, filenm):
self._num_examples = images.shape[0]
self._images = images
self._labels = labels
self._fields = fields
self._filenm = filenm
self._epochs_done = 0
self._index_in_epoch = 0
@property
def images(self):
return self._images
@property
def labels(self):
return self._labels
@property
def fields(self):
return self._fields
@property
def filenm(self):
return self._filenm
@property
def num_examples(self):
return self._num_examples
@property
def epochs_done(self):
return self._epochs_done
訓(xùn)練數(shù)據(jù)集
from Load_data import *
import numpy as np
from keras.optimizers import Adadelta
from keras.losses import categorical_crossentropy
from keras.datasets import mnist
from keras.utils import np_utils
from keras.layers import Activation,Conv2D,MaxPool2D,Flatten,Dense,Dropout
from keras.models import Sequential
class Train(object):
"""docstring for Train"""
def __init__(self,epoch,classes,batch_size):
self.epoch = epoch
self.classes = classes
self.batch_size = batch_size
def read_datas(self,filepath,fields,image_size,validation_size):
datasets = Datasets(filepath,fields,image_size).Read_datas(validation_size)
self.train = datasets.train
self.valid = datasets.valid
def train_datas(self):
X_train = self.train.images
Y_train = self.train.labels
X_valid = self.valid.images
Y_valie = self.valid.labels
model = Sequential([
Conv2D(filters=16,kernel_size=(5,5),padding='same',activation='relu',input_shape=(64,64,3)),
MaxPool2D(pool_size=(2,2),strides=(2,2),padding='same'),
Conv2D(filters=32,kernel_size=(5,5),padding='same',activation='relu'),
MaxPool2D(pool_size=(2,2),strides=(2,2),padding='same'),
Dropout(0.5),
Flatten(),
Dense(64,activation='relu'),
Dropout(0.25),
Dense(4,activation='softmax'),
])
model.compile(loss=categorical_crossentropy,
optimizer=Adadelta(),
metrics=['accuracy']
)
model.fit(X_train,Y_train,batch_size=self.batch_size,epochs=self.epoch,validation_data=(X_valid,Y_valie))#validation_data=(X_valid,Y_valie)
model.save("my_model.h6")
if __name__ == '__main__':
a=Train(30,4,64)
a.read_datas(filepath="H:/DogRaw/",fields=["HSQ","JM","LBLD","KJQ"],image_size=64,validation_size=0.1)
a.train_datas()
構(gòu)造的數(shù)據(jù)訓(xùn)練類,傳入三個參數(shù):訓(xùn)練次數(shù)据途、輸出層的分類數(shù)绞愚、以及batch_size 。
read_datas 函數(shù)功能是講前一部分準(zhǔn)備好的數(shù)據(jù)加載進(jìn)來 颖医。訓(xùn)練集(X_train,Y_train)驗證集(X_valid,Y_valid)位衩。
訓(xùn)練部分采用卷積神經(jīng)網(wǎng)絡(luò)隱藏層個數(shù)為64層,輸出層4層 熔萧。
預(yù)測圖片
import cv2
import numpy as np
from keras.models import load_model
class Pred(object):
"""docstring for Pred"""
def __init__(self,filepath,fileds,image_size,modelfile):
self.fileds = fileds
self.filepath = filepath
self.modelfile = modelfile
self.image_size = image_size
def read_data(self):
image = cv2.imread(self.filepath)
image = cv2.resize(image,(self.image_size,self.image_size))
self.image = np.array(image).reshape(-1,self.image_size,self.image_size,3).astype("float32")/255
def pred_data(self):
model = load_model(self.modelfile)
prediction = model.predict(self.image)
count = 0
for i in prediction[0]:
percent = '%.5f%%'%(i*100)
print(f"{self.fileds[count]}的概率:{percent}")
count += 1
if __name__ == '__main__':
pred = Pred(filepath="HSQ.jpg",fileds=["哈士奇","金毛","拉布拉多","柯基犬"],image_size=64,modelfile="my_model.h5")
pred.read_data()
pred.pred_data()
將上一個部分訓(xùn)練好的模型導(dǎo)入 糖驴,再講測試的數(shù)據(jù)圖片導(dǎo)入并做好數(shù)據(jù)處理 ,進(jìn)行預(yù)測