在使用tf.data.dataset进行训练时,我无法加快数据管道的速度,我想这里缺少一些东西。由于数据集中有不同的选项可以预加载数据,因此数据集速度仍然很慢。
我有一个复杂的数据管道,但是我在下面简化了一个小例子。我尝试微调num_parallel_calls,cycle_length,预取等,但似乎无法顺利进行数据集生成。我想念什么?有什么建议么?在此处输入代码
import tensorflow as tf
tf.enable_eager_execution()
from timeit import default_timer as timer
feature_count = 400
batch_size = 1024
look_back = 100
target_groups = 21
def random_data_generator(x=0):
while True:
x_data = tf.random.uniform(
shape=(batch_size, look_back, feature_count),
minval=-1.0,
maxval=5,
dtype=tf.dtypes.float32)
Y_data = tf.random.uniform(
shape=(batch_size, target_groups),
minval=1,
maxval=21,
dtype=tf.dtypes.int32)
yield x_data, Y_data
def get_simple_Dataset_generator():
dataset = tf.data.Dataset.from_tensor_slices([0,1,2])
dataset = dataset.interleave(lambda x: tf.data.Dataset.from_generator(random_data_generator,
output_types=(tf.float32, tf.float32), args=(x,)),
cycle_length=3,
block_length=3,
num_parallel_calls= tf.data.experimental.AUTOTUNE)
#dataset = dataset.prefetch(2)
while True:
for x, Y in dataset:
yield x, Y
def test_speed():
generator = get_simple_Dataset_generator()
print("Testing generator speed ")
for i in range(1,100):
start_time = timer()
next(generator)
lap_time = timer()-start_time
print("%s Time - %fsec "%(i, lap_time))
if __name__ == '__main__':
test_speed()```
I was hoping to see consistent generator speed but it still very erratic.
Output
1 Time - 3.417578sec
2 Time - 1.257846sec
3 Time - 1.286210sec
4 Time - 0.000456sec
5 Time - 0.027772sec
6 Time - 0.058985sec
7 Time - 0.000416sec
8 Time - 0.026721sec
9 Time - 0.027316sec
10 Time - 0.777332sec
11 Time - 1.379266sec
12 Time - 1.172304sec
13 Time - 0.000365sec
14 Time - 0.026909sec
15 Time - 0.045409sec
16 Time - 0.000708sec
17 Time - 0.025682sec
18 Time - 0.027223sec
19 Time - 0.577131sec
20 Time - 1.220682sec
21 Time - 1.189601sec
22 Time - 0.000573sec
23 Time - 0.079531sec
24 Time - 0.624080sec
25 Time - 0.038932sec
答案 0 :(得分:0)
我相信,急于解决的纠结尚待解决。将您的代码更改为基于图形的执行,我得到的每次迭代大约需要0.06
秒,而不是急切版本的1
秒。
这是代码段:
import tensorflow as tf
from timeit import default_timer as timer
feature_count = 400
batch_size = 1024
look_back = 100
target_groups = 21
def random_data(x=0):
x_data = tf.random.uniform(
shape=(batch_size, look_back, feature_count),
minval=-1.0,
maxval=5,
dtype=tf.dtypes.float32)
Y_data = tf.random.uniform(
shape=(batch_size, target_groups),
minval=1,
maxval=21,
dtype=tf.dtypes.int32)
return x_data, Y_data
def get_simple_Dataset():
dataset = tf.data.Dataset.from_tensor_slices(tf.zeros(100))
dataset = dataset.map(random_data)
dataset = dataset.prefetch(10)
return dataset
def test_speed():
dataset = get_simple_Dataset()
iterator = dataset.make_one_shot_iterator()
fetch = iterator.get_next()
with tf.Session() as sess:
for i in range(1,100):
start_time = timer()
sess.run(fetch)
lap_time = timer()-start_time
print("%s Time - %fsec "%(i, lap_time))
if __name__ == '__main__':
test_speed()
1 Time - 0.108968sec
2 Time - 0.071986sec
3 Time - 0.068198sec
4 Time - 0.065433sec
5 Time - 0.066582sec
6 Time - 0.064175sec
7 Time - 0.067372sec
8 Time - 0.064265sec
9 Time - 0.065510sec
10 Time - 0.068043sec