我有必要同时计算多于一个的准确度。
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
print(sess.run(accuracy, feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
这段代码与 TensorFlow 教程中的 mnist 示例相同,但不是:
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))
我有两个安慰者,因为我已经计算并存储了它们。
W = tf.placeholder(tf.float32, [784, 10])
b = tf.placeholder(tf.float32, [10])
我想用我所拥有的值填充网络然后计算准确性,这必须发生在我加载的每个网络上。
因此,如果我加载20个网络,我想并行计算每个网络的准确性。会话运行有一种方法可以使用不同的输入执行相同的操作吗?
答案 0 :(得分:2)
您可以通过多种方式同时实现目标:
我认为最后一个是最简单的,所以我在下面提供了一些示例代码来帮助您入门:
import tensorflow as tf
def construct_accuracy_calculation(i):
W = tf.placeholder(tf.float32, [784, 10], name=("%d_W" % i))
b = tf.placeholder(tf.float32, [10], name=("%d_b" % i))
# ...
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
return (W, b, accuracy)
def main():
accuracy_computations = []
feed_dict={}
for i in xrange(NUM_NETWORKS):
(W, b) = load_network(i)
(W_op, b_op, accuracy) = construct_accuracy_calculation(i)
feed_dict[W_op] = W
feed_dict[b_op] = b
accuracy_computations.append(accuracy)
# sess = ...
accuracy_values = sess.run(accuracy_computations, feed_dict=feed_dict)
if __name__ == "__main__":
main()
答案 1 :(得分:2)
并行化TF计算的一种方法是使用线程并行执行run
调用(TF与multiprocessing
不兼容)。它比其他方法复杂一点,因为你必须自己在Python端处理并行性。
这是一个在不同Python线程中使用不同馈送输入在同一会话中运行相同matmul op的示例,与1个线程相比,4个线程的运行速度提高约4倍
import os, sys, queue, threading, time
import tensorflow as tf
import numpy as np
def p(s):
# helper function for printing from multiple threads
# need to append \n or results get intermixed in notebook
print(s+"\n", flush=True, end="")
num_threads = 4
data_size = 32 # number of data points to enqueue
work_per_thread = data_size/num_threads
timeout = 10 # grace period for dequeing
input_queue = queue.Queue(data_size)
output_queue = queue.Queue(data_size)
dtype = np.float32
# use matrix vector matmul since it's compute intensive and uses single core
# see issue #6752
n = 16*1024
with tf.device("/cpu:0"):
x = tf.placeholder(dtype)
matrix = tf.Variable(tf.ones((n, n)))
vector = tf.Variable(tf.ones((n, 1)))
y = tf.matmul(matrix, vector)[0, 0] + x
# turn off graph-rewriting optimizations
sess = tf.Session(config=tf.ConfigProto(graph_options=tf.GraphOptions(optimizer_options=tf.OptimizerOptions(opt_level=tf.OptimizerOptions.L0))))
sess.run(tf.global_variables_initializer())
done = False
def runner(runner_id):
p("Starting runner %s" % (runner_id,))
count = 0
while not done:
try:
x_val = input_queue.get(timeout=1)
except queue.Empty:
# retry on empty queue
continue
p("Start computing %d on %d" %(x_val, runner_id))
out = sess.run(y, {x: x_val})
count+=1
output_queue.put(out)
if count>=work_per_thread:
break
else:
p("Stopping runner "+str(runner_id))
threads = []
print("Creating threads.")
for i in range(num_threads):
t = threading.Thread(target=runner, args=(i,))
threads.append(t)
for i in range(data_size):
input_queue.put(i, timeout=timeout)
# start threads
p("Launching runners.")
start_time = time.time()
for t in threads:
t.start()
p("Reading results.")
for i in range(data_size):
try:
p("Main thread: obtained %.2f" % (output_queue.get(timeout=timeout),))
except queue.Empty:
print("No results after %d, terminating computation."%(timeout,))
break
else:
p("Computed successfully.")
done = True
p("Waiting for threads to finish.")
for t in threads:
t.join()
print("Done in %.2f seconds" %(time.time() - start_time))