原文鏈接:https://finthon.com/resnet-captcha/
簡介
在使用Python+Tensorflow的CNN技術(shù)快速識別驗證碼一文中劫灶,我們使用了3層簡單的CNN卷積神經(jīng)網(wǎng)絡(luò)完成了驗證碼的識別裸违,準(zhǔn)確率不是特別高。雖然ResNet網(wǎng)絡(luò)經(jīng)常使用本昏,但是一直沒有時間來測試之前的驗證碼識別的項目供汛。于是,我花了一下午的時間從頭到尾梳理了這個項目:ResNet網(wǎng)絡(luò)識別驗證碼實戰(zhàn)練習(xí)——高準(zhǔn)確率的機器學(xué)習(xí)模型涌穆。在本文中怔昨,我將會詳細地介紹如何處理圖片、制作tfrecord格式文件以及搭建訓(xùn)練宿稀、預(yù)測模型趁舀,一路實現(xiàn)高準(zhǔn)確率的機器學(xué)習(xí)模型。
運行環(huán)境:
- Python 3.6.5
- TensorFlow 1.8.0(GPU版本)
圖片處理
我直接使用先前的驗證碼做測試祝沸,畢竟花了6個小時碼出來的驗證碼(3430張)矮烹。原始的驗證碼是這樣的:
二值化處理后,得到如下的圖形:
處理過程見前文罩锐。
圖片resize
我們今天的項目就是基于處理后的驗證碼開始的奉狈。由于使用的是slim框架中的ResNet50網(wǎng)絡(luò),要求輸入大小為 224×224×3涩惑,需要事先對圖片大小進行統(tǒng)一修改仁期,方便后面的操作。
import os
import numpy as np
from PIL import Image
# 驗證碼路徑
captcha_path = r'F:\resnet_for_captcha\captcha4'
# 修改后圖片存放路徑
save_path = r'F:\resnet_for_captcha\1resize\resize_path'
for i in os.listdir(captcha_path):
img = Image.open(os.path.join(captcha_path, i))
arr = np.array(img) * 255 # 注意這里的np.array(img)是布爾值,之前二值化遺留下來的問題
im = Image.fromarray(arr)
im = im.resize((224, 224))
arr = np.array(im)
x = np.zeros([224, 224, 3]) # 創(chuàng)建一個224*224*3的矩陣
for ii in range(224):
for jj in range(224):
x[ii, jj, :] = arr[ii, jj]
im = Image.fromarray(x.astype('uint8')) # 圖片矩陣使用該格式
im.save(os.path.join(save_path, i))
在這里需要注意幾點:原來的圖片是 114×450 大小的蟀拷,要變成 224×224×3 大小碰纬,相當(dāng)于多加了一個維度,在第三個維度上的值都是一樣的问芬;圖片矩陣一定要改成 uint8 格式,表示像素點范圍0-255之間寿桨。轉(zhuǎn)換完成后就變成下圖這樣:
雖然部分拉伸了圖片此衅,但是對機器學(xué)習(xí)來說可以接受。
tfrecord格式
為了提高機器學(xué)習(xí)的效率亭螟,需要構(gòu)建隊列模式來減少機器讀取數(shù)據(jù)的等待時間挡鞍。TensorFlow提供了tfrecord格式文件,結(jié)合隊列模式能夠方便數(shù)據(jù)的讀取操作预烙。在resize驗證碼圖片之后墨微,再將其轉(zhuǎn)換成tfrecord格式文件。
import os
import tensorflow as tf
from PIL import Image
import numpy as np
# 將驗證碼的名字轉(zhuǎn)換成數(shù)組扁掸,one hot編碼
def name2vec(name):
vector = np.zeros(6 * 26)
for i, c in enumerate(name):
idx = i * 26 + ord(c) - 97
vector[idx] = 1
return vector
# 圖片路徑
cwd = r'F:\resnet_for_captcha\1resize\resize_path'
# 文件路徑
file_path = r'F:\resnet_for_captcha\2to_tfrecord\tfrecord'
# 存放圖片個數(shù)
bestnum = 1000
# 第幾個圖片
num = 0
# 第幾個TFRecord文件
recordfilenum = 0
# tfrecords格式文件名
ftrecordfilename = ("train.tfrecords-%.3d" % recordfilenum)
writer = tf.python_io.TFRecordWriter(os.path.join(file_path, ftrecordfilename))
for i in os.listdir(cwd):
num += 1
print(num)
if num > bestnum:
num = 1
recordfilenum += 1
ftrecordfilename = ("train.tfrecords-%.3d" % recordfilenum)
writer = tf.python_io.TFRecordWriter(os.path.join(file_path, ftrecordfilename))
name = os.path.splitext(i)[0]
name_vec = name2vec(name).tobytes() # 轉(zhuǎn)成二進制格式
img = Image.open(os.path.join(cwd, i), 'r')
img_raw = img.tobytes()
example = tf.train.Example(
features=tf.train.Features(feature={
'label': tf.train.Feature(bytes_list=tf.train.BytesList(value=[name_vec])),
'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])),
}))
writer.write(example.SerializeToString())
writer.close()
我們以每1000張圖片制作一個tfrecord文件翘县,我們把圖片的像素矩陣和圖片的標(biāo)簽都做成了二進制的數(shù)據(jù)流,存入tfrecord文件中谴分。最后得到的tfrecord文件如下所示:
訓(xùn)練模型
做好了tfrecord文件后锈麸,接下來的任務(wù)就是搭建訓(xùn)練模型,其中包括讀取tfrecord文件牺蹄、構(gòu)建隊列模式忘伞、構(gòu)建ResNet網(wǎng)絡(luò)等。
首先定義讀取tfrecord文件的函數(shù):
def read_and_decode_tfrecord(filename):
filename_deque = tf.train.string_input_producer(filename)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_deque)
features = tf.parse_single_example(serialized_example, features={
'label': tf.FixedLenFeature([], tf.string),
'img_raw': tf.FixedLenFeature([], tf.string)})
label = tf.decode_raw(features['label'], tf.float64)
label = tf.reshape(label, [6 * 26])
label = tf.cast(label, tf.float32)
img = tf.decode_raw(features['img_raw'], tf.uint8)
img = tf.reshape(img, [224, 224, 3])
img = tf.cast(img, tf.float32) / 255.0 # 歸一化
return img, label
filename就是每個tfrecord的地址沙兰,因為我們是以二進制流存入tfrecord文件的氓奈,當(dāng)從文件中讀取原來的數(shù)據(jù)的時候,需要指定數(shù)據(jù)格式和大小鼎天。在這里我們對圖片矩陣進行歸一化舀奶。
接下來我們在主函數(shù)中定義隊列和模型部分:
def main():
save_dir = r"F:\resnet_for_captcha\3train\model\train.model"
batch_size_ = 2
lr = tf.Variable(0.0001, dtype=tf.float32)
x = tf.placeholder(tf.float32, [None, 224, 224, 3])
y_ = tf.placeholder(tf.float32, [None, 6 * 26])
tfrecord_path = r'F:\resnet_for_captcha\2to_tfrecord\tfrecord'
train_list = []
for file in os.listdir(tfrecord_path):
train_list.append(os.path.join(tfrecord_path, file))
min_after_dequeue = 1000
# 隨機打亂順序
img, label = read_and_decode_tfrecord(train_list)
img_batch, label_batch = tf.train.shuffle_batch([img, label], num_threads=2, batch_size=batch_size_,
capacity=min_after_dequeue + 3 * batch_size_,
min_after_dequeue=min_after_dequeue)
pred, end_points = nets.resnet_v2.resnet_v2_50(x, num_classes=6 * 26, is_training=True)
pred = tf.reshape(pred, shape=[-1, 6 * 26])
# 定義損失函數(shù)
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=y_))
optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)
# 準(zhǔn)確度
predict = tf.reshape(pred, [-1, 6, 26])
max_idx_p = tf.argmax(predict, 2)
max_idx_l = tf.argmax(tf.reshape(y_, [-1, 6, 26]), 2)
correct_pred = tf.equal(max_idx_p, max_idx_l)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
saver = tf.train.Saver()
with tf.Session(config=config) as sess:
sess.run(tf.global_variables_initializer())
# saver.restore(sess, r'F:\resnet_for_captcha\3train\model\train.model-60000')
# 創(chuàng)建一個協(xié)調(diào)器,管理線程
coord = tf.train.Coordinator()
# 啟動QueueRunner,此時文件名隊列已經(jīng)進隊
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
i = 60000
cycle_num = 0
while True:
i += 1
b_image, b_label = sess.run([img_batch, label_batch])
_, loss_ = sess.run([optimizer, loss], feed_dict={x: b_image, y_: b_label})
if i % 20 == 0:
print('step: {}, loss: {}'.format(i, loss_))
if i % 100 == 0:
_loss, acc_train = sess.run([loss, accuracy], feed_dict={x: b_image, y_: b_label})
print('--------------------------------------------------------')
print('step: {} train_acc: {} loss: {}'.format(i, acc_train, _loss))
print('--------------------------------------------------------')
if i % 20000 == 0:
saver.save(sess, save_dir, global_step=i)
cycle_num += 1
if cycle_num == 10:
break
coord.request_stop()
# 其他所有線程關(guān)閉之后训措,這一函數(shù)才能返回
coord.join(threads)
構(gòu)建隊列使用的是隨機順序隊列伪节,使用 resnet_v2_50 模型。在這里使用了線程來管理隊列绩鸣,每20步輸出一次loss值怀大,每100步輸出一次準(zhǔn)確率,每20000步保存一次模型呀闻。這里選擇 batch_size_ = 2 化借,個人測試后覺得不錯的經(jīng)驗值。最后捡多,按照整個流程訓(xùn)練了14萬步停止蓖康。
這部分完整代碼如下:
import os
import tensorflow as tf
import tensorflow.contrib.slim.nets as nets
import datetime
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
def read_and_decode_tfrecord(filename):
filename_deque = tf.train.string_input_producer(filename)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_deque)
features = tf.parse_single_example(serialized_example, features={
'label': tf.FixedLenFeature([], tf.string),
'img_raw': tf.FixedLenFeature([], tf.string)})
label = tf.decode_raw(features['label'], tf.float64)
label = tf.reshape(label, [6 * 26])
label = tf.cast(label, tf.float32)
img = tf.decode_raw(features['img_raw'], tf.uint8)
img = tf.reshape(img, [224, 224, 3])
img = tf.cast(img, tf.float32) / 255.0 # 歸一化
return img, label
def main():
save_dir = r"F:\resnet_for_captcha\3train\model\train.model"
batch_size_ = 2
lr = tf.Variable(0.0001, dtype=tf.float32)
x = tf.placeholder(tf.float32, [None, 224, 224, 3])
y_ = tf.placeholder(tf.float32, [None, 6 * 26])
tfrecord_path = r'F:\resnet_for_captcha\2to_tfrecord\tfrecord'
train_list = []
for file in os.listdir(tfrecord_path):
train_list.append(os.path.join(tfrecord_path, file))
min_after_dequeue = 1000
# 隨機打亂順序
img, label = read_and_decode_tfrecord(train_list)
img_batch, label_batch = tf.train.shuffle_batch([img, label], num_threads=2, batch_size=batch_size_,
capacity=min_after_dequeue + 3 * batch_size_,
min_after_dequeue=min_after_dequeue)
pred, end_points = nets.resnet_v2.resnet_v2_50(x, num_classes=6 * 26, is_training=True)
pred = tf.reshape(pred, shape=[-1, 6 * 26])
# 定義損失函數(shù)
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=y_))
optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)
# 準(zhǔn)確度
predict = tf.reshape(pred, [-1, 6, 26])
max_idx_p = tf.argmax(predict, 2)
max_idx_l = tf.argmax(tf.reshape(y_, [-1, 6, 26]), 2)
correct_pred = tf.equal(max_idx_p, max_idx_l)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
saver = tf.train.Saver()
with tf.Session(config=config) as sess:
sess.run(tf.global_variables_initializer())
# saver.restore(sess, r'F:\resnet_for_captcha\3train\model\train.model-60000')
# 創(chuàng)建一個協(xié)調(diào)器铐炫,管理線程
coord = tf.train.Coordinator()
# 啟動QueueRunner,此時文件名隊列已經(jīng)進隊
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
i = 60000
cycle_num = 0
while True:
i += 1
b_image, b_label = sess.run([img_batch, label_batch])
_, loss_ = sess.run([optimizer, loss], feed_dict={x: b_image, y_: b_label})
if i % 20 == 0:
print('step: {}, loss: {}'.format(i, loss_))
if i % 100 == 0:
_loss, acc_train = sess.run([loss, accuracy], feed_dict={x: b_image, y_: b_label})
print('--------------------------------------------------------')
print('step: {} train_acc: {} loss: {}'.format(i, acc_train, _loss))
print('--------------------------------------------------------')
if i % 20000 == 0:
saver.save(sess, save_dir, global_step=i)
cycle_num += 1
if cycle_num == 10:
break
coord.request_stop()
# 其他所有線程關(guān)閉之后,這一函數(shù)才能返回
coord.join(threads)
if __name__ == '__main__':
# 運行時間
starttime = datetime.datetime.now().timestamp()
main()
endtime = datetime.datetime.now().timestamp()
print(starttime)
print(endtime)
run_hour = (endtime - starttime) / 3600
print('共運行{}小時蒜焊!'.format(run_hour))
注:加上 config = tf.ConfigProto() 和 config.gpu_options.allow_growth = True 后能夠提高GPU的利用率倒信,最后我們計算程序運行的時間。
預(yù)測模型
我們分別搭建兩個預(yù)測模型來展開這部分:一個模型來預(yù)測咱們訓(xùn)練的3430張圖片泳梆,看模型學(xué)習(xí)后的準(zhǔn)確率如何鳖悠;另一個模型來預(yù)測樣本外的10張驗證碼圖片,來檢測我們模型的泛化能力优妙。
預(yù)測訓(xùn)練集
import tensorflow as tf
import tensorflow.contrib.slim.nets as nets
import os
import numpy as np
def read_and_decode_tfrecord(filename):
filename_deque = tf.train.string_input_producer(filename)
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_deque)
features = tf.parse_single_example(serialized_example, features={
'label': tf.FixedLenFeature([], tf.string),
'img_raw': tf.FixedLenFeature([], tf.string)})
label = tf.decode_raw(features['label'], tf.float64)
label = tf.reshape(label, [6 * 26])
label = tf.cast(label, tf.float32)
img = tf.decode_raw(features['img_raw'], tf.uint8)
img = tf.reshape(img, [224, 224, 3])
img = tf.cast(img, tf.float32) / 255.0 # 歸一化
return img, label
def vec2name(vec):
name = []
for i in vec:
a = chr(i + 97)
name.append(a)
return "".join(name)
model_dir = r'F:\resnet_for_captcha\3train\model\train.model-140000'
tfrecord_path = r'F:\resnet_for_captcha\2to_tfrecord\tfrecord'
train_list = []
for file in os.listdir(tfrecord_path):
train_list.append(os.path.join(tfrecord_path, file))
x = tf.placeholder(tf.float32, [None, 224, 224, 3])
y_ = tf.placeholder(tf.float32, [None, 6 * 26])
batch_size_ = 1
min_after_dequeue = 1000
# 順序讀取
img, label = read_and_decode_tfrecord(train_list)
img_batch, label_batch = tf.train.batch([img, label], num_threads=2, batch_size=batch_size_,
capacity=min_after_dequeue + 3 * batch_size_)
pred, end_points = nets.resnet_v2.resnet_v2_50(x, num_classes=6 * 26, is_training=True)
predict = tf.reshape(pred, [-1, 6, 26])
max_idx_p = tf.argmax(predict, 2)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver.restore(sess, model_dir)
coord = tf.train.Coordinator()
# 啟動QueueRunner,此時文件名隊列已經(jīng)進隊
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
nn = 0
count_true_num = 0
count_false_num = 0
while True:
nn += 1
b_image, b_label = sess.run([img_batch, label_batch])
pre_index = sess.run(max_idx_p, feed_dict={x: b_image})
vec = pre_index[0].tolist()
predict_text = vec2name(vec)
max_idx_l = np.argmax(np.reshape(b_label, [-1, 6, 26]), 2)
vec1 = max_idx_l[0].tolist()
true_text = vec2name(vec1)
print('{} 真實值:{} 預(yù)測值:{}'.format(nn, true_text, predict_text))
if true_text == predict_text:
count_true_num += 1
else:
count_false_num += 1
if nn == 3430:
break
print('正確:{} 錯誤:{} 準(zhǔn)確率:{}'.format(count_true_num, count_false_num,
count_true_num / (count_true_num + count_false_num)))
coord.request_stop()
# 其他所有線程關(guān)閉之后乘综,這一函數(shù)才能返回
coord.join(threads)
因為是預(yù)測訓(xùn)練集,所以這里的隊列不再是隨機打亂順序套硼,而是使用了順序讀取 tf.train.batch 卡辰,最終按照14萬步模型,訓(xùn)練模型的準(zhǔn)確率為0.9915邪意,如果進一步訓(xùn)練九妈,準(zhǔn)確率還會更高。
預(yù)測測試集
對于測試集抄罕,我提供了10張樣本外的驗證碼圖片允蚣,由于測試集數(shù)量比較少,就不需要構(gòu)建tfrecord文件呆贿。
import tensorflow as tf
import tensorflow.contrib.slim.nets as nets
from PIL import Image
import os
import numpy as np
# 向量轉(zhuǎn)成標(biāo)簽名字
def vec2name(vec):
name = []
for i in vec:
a = chr(i + 97)
name.append(a)
return "".join(name)
model_dir = r'F:\resnet_for_captcha\3train\model\train.model-140000'
x = tf.placeholder(tf.float32, [None, 224, 224, 3])
pred, end_points = nets.resnet_v2.resnet_v2_50(x, num_classes=6 * 26, is_training=True)
predict = tf.reshape(pred, [-1, 6, 26])
max_idx_p = tf.argmax(predict, 2)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver.restore(sess, model_dir)
test_dir = r'F:\resnet_for_captcha\test'
for pic in os.listdir(test_dir):
pic_path = os.path.join(test_dir, pic)
img = Image.open(pic_path)
arr = np.array(img) * 255
im = Image.fromarray(arr)
im = im.resize((224, 224))
arr = np.array(im)
xx = np.zeros([224, 224, 3])
for ii in range(224):
for jj in range(224):
xx[ii, jj, :] = arr[ii, jj]
img1 = Image.fromarray(xx.astype('uint8'))
img2 = tf.reshape(img1, [1, 224, 224, 3])
img3 = tf.cast(img2, tf.float32) / 255.0
name = os.path.splitext(pic)[0]
b_image = sess.run(img3)
t_label = sess.run(max_idx_p, feed_dict={x: b_image})
vec = t_label[0].tolist()
predict_text = vec2name(vec)
print('真實值:{} 預(yù)測值:{}'.format(name, predict_text))
得到的結(jié)果如下:
咱們實現(xiàn)了100%的準(zhǔn)確率H峦谩!做入!比之前的預(yù)測結(jié)果高了很多冒晰。
總結(jié)
本文介紹了如何處理驗證碼圖片、制作tfrecord格式文件竟块、構(gòu)建訓(xùn)練和預(yù)測模型壶运。使用ResNet網(wǎng)絡(luò)識別驗證碼圖片,實現(xiàn)了非常高的準(zhǔn)確率浪秘〗椋可見ResNet網(wǎng)絡(luò)的普適性和效果都是非常好的。本文代碼放在我的github耸携,所有代碼和模型全部放到這里棵癣,密碼:f3k7。涉及到細節(jié)的部分本文并沒有作過多的介紹夺衍,默認(rèn)你有一定的TensorFlow基礎(chǔ)狈谊,如有問題請在下面留言。