tfrecord數(shù)據(jù)到可用數(shù)據(jù)集的轉(zhuǎn)換
在上一篇文章中實(shí)現(xiàn)了tfrecord格式數(shù)據(jù)的讀取
http://www.reibang.com/p/88d09196bf07
但是讀取的內(nèi)容還不能直接被網(wǎng)絡(luò)訓(xùn)練函數(shù)使用艇劫,因而需要對(duì)讀到的數(shù)據(jù)進(jìn)行簡(jiǎn)單處理
reader = tf.data.TFRecordDataset(record_path) # 打開一個(gè)TFrecord
讀取數(shù)據(jù)后硼控,圖像數(shù)據(jù)進(jìn)行解碼和數(shù)據(jù)類型轉(zhuǎn)化來適應(yīng)網(wǎng)絡(luò)計(jì)算的要求,最后將所得列表轉(zhuǎn)化為tensor適配fit函數(shù)要求的格式底扳,實(shí)現(xiàn)如下
def read_dataset(record_path):
reader = tf.data.TFRecordDataset(record_path) # 打開一個(gè)TFrecord
#reader = reader.shuffle (buffer_size = 1000) # 在緩沖區(qū)中隨機(jī)打亂數(shù)據(jù)
reader = reader.map (_parse_function) # 解析數(shù)據(jù)
#for row in reader.take(1): #獲取指定數(shù)量的數(shù)據(jù)集
labels = []
imgs = []
for row in reader: #遍歷數(shù)據(jù)集
label = tf.cast(row['label'],dtype=tf.float32)
label = label - 1
#此處應(yīng)當(dāng)注意tf.int8和tf.uint8的區(qū)別,使用錯(cuò)誤將造成正常讀入的圖片解碼結(jié)果與tfrecord解碼結(jié)果不一致
img = tf.io.decode_raw(row['img_raw'],out_type=tf.uint8)
img = tf.cast(img,dtype=tf.float32)
labels.append(label)
imgs.append(img)
np.random.seed(1024)
np.random.shuffle(labels)
np.random.seed(1024)
np.random.shuffle(imgs)
np.random.seed(1024)
labels = tf.convert_to_tensor(labels)
imgs = tf.convert_to_tensor(imgs)
return labels,imgs
應(yīng)當(dāng)注意tf.int8和tf.uint8的區(qū)別瀑凝,使用錯(cuò)誤將造成正常讀入的圖片解碼結(jié)果與tfrecord解碼結(jié)果不一致
訓(xùn)練過程與訓(xùn)練模型保存
該部分的內(nèi)容與
http://www.reibang.com/p/94cf2a32bbf0
中的差異并不大蝙眶,這里直接貼出完整實(shí)現(xiàn)
import tensorflow as tf
import os
import numpy as np
#定義待解析數(shù)據(jù)集特征
feature_description = {
'label': tf.io.FixedLenFeature([] , tf.int64, default_value=-1), # 默認(rèn)值自己定義
'img_raw' : tf.io.FixedLenFeature([], tf.string)
}
# 映射函數(shù),用于解析一條example
def _parse_function (exam_proto):
return tf.io.parse_single_example (exam_proto, feature_description)
#讀取返回?cái)?shù)據(jù)集
def read_dataset(record_path):
reader = tf.data.TFRecordDataset(record_path) # 打開一個(gè)TFrecord
#reader = reader.shuffle (buffer_size = 1000) # 在緩沖區(qū)中隨機(jī)打亂數(shù)據(jù)
reader = reader.map (_parse_function) # 解析數(shù)據(jù)
#for row in reader.take(1): #獲取指定數(shù)量的數(shù)據(jù)集
labels = []
imgs = []
for row in reader: #遍歷數(shù)據(jù)集
label = tf.cast(row['label'],dtype=tf.float32)
label = label - 1
img = tf.io.decode_raw(row['img_raw'],out_type=tf.uint8)
img = tf.cast(img,dtype=tf.float32)
labels.append(label)
imgs.append(img)
np.random.seed(1024)
np.random.shuffle(labels)
np.random.seed(1024)
np.random.shuffle(imgs)
np.random.seed(1024)
labels = tf.convert_to_tensor(labels)
imgs = tf.convert_to_tensor(imgs)
return labels,imgs
if __name__ == '__main__':
labels,imgs = read_dataset('./armor_train.tfrecords')
#網(wǎng)絡(luò)搭建
model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(500,activation='relu',kernel_regularizer=tf.keras.regularizers.l2()),
tf.keras.layers.Dense(128,activation='relu',kernel_regularizer=tf.keras.regularizers.l2()),
tf.keras.layers.Dense(50,activation='relu',kernel_regularizer=tf.keras.regularizers.l2()),
tf.keras.layers.Dense(8,activation='softmax',kernel_regularizer=tf.keras.regularizers.l2())
])
#訓(xùn)練參數(shù)設(shè)置
model.compile(
optimizer=tf.keras.optimizers.Adam(),
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics=['sparse_categorical_accuracy']
)
#模型持久化設(shè)置
ckpt_path = "./checkpoint/armor_id.ckpt"
if(os.path.exists(ckpt_path + ".index")): #生成ckpt的同時(shí)會(huì)生成index文件,可通過該文件是否存在判斷是否有預(yù)訓(xùn)練模型生成
print("--load modle--")
model.load_weights(ckpt_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(
filepath = ckpt_path,
save_weights_only=True, #只保留模型參數(shù)
save_best_only=True #只保留最優(yōu)模型
)
#訓(xùn)練
history = model.fit(imgs,labels,batch_size=32,epochs=50,validation_split=0.2,validation_freq=1,callbacks=[cp_callback])
#網(wǎng)絡(luò)結(jié)構(gòu)和參數(shù)顯示
model.summary()
結(jié)果分析
訓(xùn)練100輪結(jié)果如下
curacy: 0.9874 - val_loss: 0.3584 - val_sparse_categorical_accuracy: 0.9764
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
flatten (Flatten) multiple 0
_________________________________________________________________
dense (Dense) multiple 6144500
_________________________________________________________________
dense_1 (Dense) multiple 64128
_________________________________________________________________
dense_2 (Dense) multiple 6450
_________________________________________________________________
dense_3 (Dense) multiple 408
=================================================================
Total params: 6,215,486
Trainable params: 6,215,486
Non-trainable params: 0
_________________________________________________________________
后面還需要對(duì)網(wǎng)絡(luò)的結(jié)構(gòu)進(jìn)行進(jìn)一步的更改剧腻。不過可以確定的是訓(xùn)練流程已經(jīng)跑通。