我使用tf.TextLineReader读取了csv文件 我用GPU(GTX1050TI) 当我使用批量1时,需要65%的负载。 但我使用批量100,只需10%的GPU负载。
1 Row datasize is 24X60Xfloat32 = 5760Bytes
所以我检查执行代码的次数
批量大小为1时
min_after_dequeue = 100
capacity = min_after_dequeue + 3 * batch_size
example_batch,
label_batch = sess.run([examples, labels]) takes 20ms
sess.run(train_op, feed_dict={X:example_batch, Y:label_batch, p_keep_conv: 0.8, p_keep_hidden: 0.5}) takes 60ms
批量大小为100时
min_after_dequeue = 200
capacity = min_after_dequeue + 3 * batch_size
example_batch,
label_batch = sess.run([examples, labels]) takes 5.7s
sess.run(train_op, feed_dict={X:example_batch, Y:label_batch, p_keep_conv: 0.8, p_keep_hidden: 0.5})
takes 22ms
example_batch,
label_batch = sess.run([examples, labels]) execution time increased 300 times.
100行数据仅为50Mb,我使用SSD作为数据。 我不明白为什么读取数据需要花费太多时间。
这是我的文件读取代码。
def read_my_file_format(filename_queue):
reader = tf.TextLineReader(skip_header_lines=1)
_, value = reader.read(filename_queue)
record_defaults = [[1],[1],[1], ... ,[1],[1],[1]] # it is 1440 columns
Col1, Col2, .... , Col1622,Col1623=tf.decode_csv(value, record_defaults=record_defaults)record_defaults=record_defaults)
features = tf.stack([[[Col4],[Col5], ...., [Col1618],[Col1619],[Col1620]]])
label = tf.stack([Col29])
return features, label
def input_pipeline(batch_size, num_epochs):
min_after_dequeue = 10000
capacity = min_after_dequeue + 3 * batch_size + 20000
#capacity = 100000
filename_queue = tf.train.string_input_producer(["dataset_n_day_2.csv"], num_epochs=num_epochs, shuffle=True, capacity=capacity )
example, label = read_my_file_format(filename_queue)
example_batch, label_batch = tf.train.shuffle_batch([example, label],
batch_size=batch_size,
capacity=capacity,
min_after_dequeue=min_after_dequeue)
return example_batch, label_batch
examples, labels = input_pipeline(1,5)
i = 0
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
merged = tf.summary.merge_all()
trainwriter =tf.summary.FileWriter("./board/custom", sess.graph)
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
try:
while not coord.should_stop():
i = i + 1
st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
print("#1:", st)
example_batch, label_batch = sess.run([examples, labels])
st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
print("#2:", st)
sess.run(train_op, feed_dict={X:example_batch, Y:label_batch, p_keep_conv: 0.8, p_keep_hidden: 0.5})
st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
print("#3:", st)
if i % 1 == 0:
summary = sess.run(merged, feed_dict={X: example_batch, Y: label_batch, p_keep_conv: 1, p_keep_hidden: 1})
trainwriter.add_summary(summary,i)
print(cost.eval(feed_dict={X: example_batch, Y: label_batch, p_keep_conv: 1, p_keep_hidden: 1}, session = sess))
except tf.errors.OutOfRangeError:
print('Done training -- epoch limit reached')
finally:
# When done, ask the threads to stop.
coord.request_stop()
# Wait for threads to finish.
coord.join(threads)
sess.close()