下面的代码显示了我尝试在单个GPU上运行算法并使用FIFO队列向其提供数据。数据存在于CSV文件中。我使用一个单独的python线程一次从文件中读取一行,并将该行排入FIFO。
StrictMode.ThreadPolicy policy = new StrictMode.ThreadPolicy.Builder().
detectNetwork().build();
StrictMode.setThreadPolicy(policy);
我遇到的第一个问题是:
N = 16
num_ckfs =80000
q = [0.01 for i in range(N)]
q_ckfs = np.array([q for i in range(num_ckfs)])
r = [5]
r_ckfs = np.array([r for i in range(num_ckfs)])
init_var = [10.0 for i in range(N)]
init_var_ckfs = np.array([init_var for i in range(num_ckfs)])
init_state = [0.0 for i in range(N)]
init_state_ckfs = np.array([init_state for i in range(num_ckfs)])
class CKF(object):
def __init__(self, num_ckfs, N):
self.init_variances = tf.Variable(init_var_ckfs, name='init_variances', dtype=tf.float64)
self.init_states = tf.Variable(init_state_ckfs, name='init_states', dtype=tf.float64)
init_states_expanded = tf.expand_dims(self.init_states, 2) # num_ckfs X N X 1
self.q_values = tf.constant(q_ckfs, name='q_values', dtype=tf.float64)
self.r_values = tf.constant(r_ckfs, name='r_values', dtype=tf.float64)
self.input_vectors = tf.placeholder(tf.float64, shape=[num_ckfs, N], name='input_vectors')
self.z_k = tf.placeholder(tf.float64, shape=[num_ckfs, 1], name='z_k');
q = tf.FIFOQueue(200, [tf.float64, tf.float64], shapes=[[num_ckfs,1], [num_ckfs,N]])
self.enqueue_op = q.enqueue([self.z_k, self.input_vectors])
observations, inputs = q.dequeue()
#further processing using the input data
with tf.device('/gpu:0'):
ckf_gpu0 = CKF(num_ckfs, N)
def load_and_enqueue():
#read one line at a time
#obvs_list corresponds to the first column
#data_list corresponds to the rest of the columns
session.run(ckf_gpu0.enqueue_op, feed_dict={
ckf_gpu0.input_vectors: data_list[0], ckf_gpu0.z_k: obvs_list[0]})
count += 1
t = threading.Thread(target=load_and_enqueue)
t.start()
for i in range( num_rows):
out = session.run([ckf_gpu0.projected_output ])
是否有另一种方法可以做这样的事情,即在GPU上进行计算时隐藏I / O延迟?