Tensorflow多线程图像加载

时间:2016-11-25 11:12:52

标签: multithreading tensorflow

所以我有这个玩具示例代码;

import glob
from tqdm import tqdm
import tensorflow as tf

imgPaths = glob.glob("/home/msmith/imgs/*/*") # Some images

filenameQ = tf.train.string_input_producer(imgPaths)
reader = tf.WholeFileReader()
key, value = reader.read(filenameQ)

img = tf.image.decode_jpeg(value)
init_op = tf.initialize_all_variables()

with tf.Session() as sess:
    sess.run(init_op)
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    for i in tqdm(range(10000)):
        img.eval().mean()

加载图像并打印每个图像的平均值。如何编辑它以便对图像的加载部分进行多线程处理,这是我的tf图像脚本的瓶颈。

1 个答案:

答案 0 :(得分:3)

EDIT(2018/3/5):现在使用tf.data API更容易获得相同的结果。

import glob
from tqdm import tqdm
import tensorflow as tf

imgPaths = glob.glob("/home/msmith/imgs/*/*") # Some images

dataset = (tf.data.Dataset.from_tensor_slices(imgPaths)
           .map(lambda x: tf.reduce_mean(tf.decode_jpeg(tf.read_file(x))),
                num_parallel_calls=16)
           .prefetch(128))

iterator = dataset.make_one_shot_iterator()
next_mean = iterator.get_next()

with tf.Session() as sess:
    for i in tqdm(range(10000)):
        sess.run(next_mean)

正如sygi在their comment中建议的那样,tf.train.QueueRunner可用于定义在单独线程中运行的一些操作,并且(通常)将值排入TensorFlow队列。

import glob
from tqdm import tqdm
import tensorflow as tf

imgPaths = glob.glob("/home/msmith/imgs/*/*") # Some images

filenameQ = tf.train.string_input_producer(imgPaths)

# Define a subgraph that takes a filename, reads the file, decodes it, and                                                                                     
# enqueues it.                                                                                                                                                 
filename = filenameQ.dequeue()
image_bytes = tf.read_file(filename)
decoded_image = tf.image.decode_jpeg(image_bytes)
image_queue = tf.FIFOQueue(128, [tf.uint8], None)
enqueue_op = image_queue.enqueue(decoded_image)

# Create a queue runner that will enqueue decoded images into `image_queue`.                                                                                   
NUM_THREADS = 16
queue_runner = tf.train.QueueRunner(
    image_queue,
    [enqueue_op] * NUM_THREADS,  # Each element will be run from a separate thread.                                                                                       
    image_queue.close(),
    image_queue.close(cancel_pending_enqueues=True))

# Ensure that the queue runner threads are started when we call                                                                                               
# `tf.train.start_queue_runners()` below.                                                                                                                      
tf.train.add_queue_runner(queue_runner)

# Dequeue the next image from the queue, for returning to the client.                                                                                          
img = image_queue.dequeue()

init_op = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init_op)
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    for i in tqdm(range(10000)):
        img.eval().mean()