如果把文件全部加載到內(nèi)存中凉当,對(duì)大數(shù)據(jù)量來(lái)說(shuō),是不可行的,tensorflow使用列隊(duì)驻龟,通過(guò)多線程來(lái)操作隊(duì)列進(jìn)出。舉例子來(lái)說(shuō)明>
tf.train.slice_input_producer是一個(gè)tensor生成器缸匪,作用是按照設(shè)定翁狐,每次從一個(gè)tensor列表中按順序或者隨機(jī)抽取出一個(gè)tensor放入文件名隊(duì)列。
下面這個(gè)例子是將文件名加入到隊(duì)列中凌蔬,每次從列隊(duì)中只能取出一個(gè)tensor露懒,然后讀取圖片數(shù)據(jù),還是頻繁io操作砂心,
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
def get_image(image_path):
content = tf.read_file(image_path)
tf_image = tf.image.decode_jpeg(content, channels=3)
return tf_image
def plot_pic(batch_img_one_val, batch_img_two_val, label):
fig = plt.figure(figsize=(6,2))
plt.suptitle(label)
ax1 = fig.add_subplot(1,2,1)
#ax1.set_title(label)
ax1.imshow(batch_img_one_val)
ax1.axis('off')
ax2 = fig.add_subplot(1,2,2)
ax2.imshow(batch_img_two_val)
ax2.axis('off')
plt.show()
def slice_input_producer_one_sample():
# 重置graph
tf.reset_default_graph()
batch_size = 1
images_one_path_list = ['lda.png', 'snapshot.png','hua.jpeg']
images_two_path_list = ['tuzi.jpg', 'test.png', 'hua.jpeg']
label_list = ['lad_tuzi', 'snap_test', 'hua']
# 構(gòu)造數(shù)據(jù)queue
# capacity隊(duì)列的大小懈词,本例子中一個(gè)隊(duì)列元素是['lda.png','tuzi.jpg','lad_tuzi],理解slice切片功能
train_input_queue = tf.train.slice_input_producer(
[images_one_path_list, images_two_path_list,label_list],
capacity= 1*batch_size, shuffle=False)
# queue輸出數(shù)據(jù)
img_one_queue = get_image(train_input_queue[0])
img_two_queue = get_image(train_input_queue[1])
label_queue = train_input_queue[2]
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# 啟動(dòng)queue線程
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(10):
batch_img_one_val, batch_img_two_val, label= sess.run(
[img_one_queue, img_two_queue,label_queue])
plot_pic(batch_img_one_val, batch_img_two_val, label)
coord.request_stop()
coord.join(threads)
sess.close()
slice_input_producer_one_sample()
第一個(gè)
第二個(gè)
等等。计贰。注意每次讀取兩個(gè)圖片一個(gè)label與輸入list的對(duì)應(yīng)關(guān)系
現(xiàn)在把讀取的圖片內(nèi)存加入到新列隊(duì)中
使用tf.train.shuffle_batch
取兩次圖片钦睡,每次取三個(gè),這樣程序就從列隊(duì)中取出已經(jīng)加載好的圖片內(nèi)存數(shù)據(jù)
import matplotlib.pyplot as plt
def conver_image_size(img,hsize, wsize):
img = tf.image.convert_image_dtype(img, dtype=tf.float32)
img = tf.image.resize_images(img, [hsize, wsize])
return img
def slice_input_producer_demo():
# 重置graph
tf.reset_default_graph()
# 獲取圖片系統(tǒng)路徑,標(biāo)簽信息
batch_size = 3
hsize = 377
wsize = 500
images_one_path_list = ['lda.png', 'snapshot.png','hua.jpeg']
images_two_path_list = ['tuzi.jpg', 'test.png', 'hua.jpeg']
label_list = ['lad_tuzi', 'snap_test', 'hua']
# 構(gòu)造數(shù)據(jù)queue
train_input_queue = tf.train.slice_input_producer(
[images_one_path_list, images_two_path_list,label_list],
capacity= 3, shuffle=False)
# queue輸出數(shù)據(jù)
img_one_queue = get_image(train_input_queue[0])
img_two_queue = get_image(train_input_queue[1])
label_queue = train_input_queue[2]
# shuffle_batch 批量從queue批量讀取數(shù)據(jù)
img_one_queue = conver_image_size(img_one_queue, hsize, wsize)
img_two_queue = conver_image_size(img_two_queue, hsize, wsize)
batch_img_one, batch_img_two, batch_label = tf.train.shuffle_batch(
[img_one_queue, img_two_queue, label_queue],
batch_size=batch_size,
capacity = 10 + 10* batch_size,
min_after_dequeue = 10,
num_threads=16)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# 啟動(dòng)queue線程
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(2):
batch_img_one_val, batch_img_two_val, label= sess.run(
[batch_img_one, batch_img_two,batch_label])
print label
fig = plt.figure(figsize=(4,6))
for k in range(batch_size):
ax1 = fig.add_subplot(batch_size,2,2*k+1)
ax1.set_title(label[k])
plt.imshow(batch_img_one_val[k])
ax2 = fig.add_subplot(batch_size,2,2*k+2)
ax2.set_title(label[k])
ax2.imshow(batch_img_two_val[k])
plt.show()
coord.request_stop()
coord.join(threads)
sess.close()
第一次
第二次
string_input_producer加載序列
def string_input_producter_demo():
tf.reset_default_graph()
images_one_path_list = ['lda.png', 'snapshot.png','hua.jpeg']
images_two_path_list = ['tuzi.jpg', 'test.png', 'hua.jpeg']
label_list = ['lad_tuzi', 'snap_test', 'hua']
batch_size = 2
hsize = 377
wsize = 500
# 構(gòu)造數(shù)據(jù)queue
train_input_queue = tf.train.string_input_producer(
images_one_path_list, capacity=10*batch_size)
#queue輸出數(shù)據(jù)
img_one_queue = get_image(train_input_queue.dequeue())
img_one_queue = conver_image_size(img_one_queue, hsize, wsize)
# 將圖片數(shù)據(jù)加載到新的隊(duì)列
batch_img_one = tf.train.shuffle_batch(
[img_one_queue],
batch_size=batch_size,
capacity = 10 + 10* batch_size,
min_after_dequeue = 10,
num_threads=16)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(2):
for k in range(batch_size):
img_one_val = sess.run(batch_img_one[k])
fig = plt.figure()
plt.imshow(img_one_val)
plt.show()
coord.request_stop()
coord.join(threads)
sess.close()
string_input_producter_demo()
A.csv文件如下加載CSV文件
import tensorflow as tf
from tensorflow.python.framework import ops
ops.reset_default_graph()
batch_size = 2
filenames = ['A.csv', 'B.csv', 'C.csv']
filename_queue = tf.train.string_input_producer(
filenames, shuffle=False)
# 定義Reader
reader = tf.TextLineReader()
key,value = reader.read(filename_queue)
# 定義Decoder
example, label = tf.decode_csv(
value, record_defaults = [['null'], ['null']])
batch_data,label_data = tf.train.shuffle_batch(
[example, label],
batch_size=batch_size,
capacity = 10 + 10* batch_size,
min_after_dequeue = 10,
num_threads=16)
with tf.Session() as sess:
# 創(chuàng)建一個(gè)協(xié)調(diào)器,管理線程
coord = tf.train.Coordinator()
# 啟動(dòng)QueueRunner,此時(shí)文件數(shù)據(jù)列隊(duì)已經(jīng)進(jìn)隊(duì)
threads = tf.train.start_queue_runners(coord=coord)
sess.run(tf.global_variables_initializer())
for i in range(9):
batch_, label_ = sess.run([batch_data, label_data])
print batch_
print label_
print '-----'
coord.request_stop()
coord.join(threads)
每次從列隊(duì)中加載兩個(gè)數(shù)據(jù)
參考
Tensorflow 數(shù)據(jù)預(yù)讀取--Queue