当尝试使用生成器生成测试数据时,我遇到了一个奇怪的问题。 这是我的代码:
from __future__ import absolute_import, division, print_function
import tensorflow as tf
import os
# tf.enable_eager_execution()
def _parse_function(data):
split_data = tf.string_split([data], ",")
tmp = tf.string_to_number(split_data.values, out_type=tf.int32)
result = tf.map_fn(lambda x: (tmp[0], x), tmp[1:], dtype=(tf.int32, tf.int32))
return result
data_path = "data"
file_names = os.listdir(data_path)
file_names = list(map(lambda x: os.path.join(data_path, x), file_names))
dataset = tf.data.TextLineDataset(file_names)
dataset = dataset.map(_parse_function)
dataset = dataset.apply(tf.data.experimental.unbatch())
dataset = dataset.batch(20)
user_id, item_id = dataset.make_one_shot_iterator().get_next()
user_id = tf.reshape(user_id, shape=(-1, ))
item_id = tf.reshape(item_id, shape=(-1, ))
print(user_id)
print(item_id)
with tf.Session() as sess:
for i in range(10):
user_ids = sess.run([user_id])
item_ids = sess.run([item_id])
print(user_ids)
print(item_ids)
这是要处理的原始数据:
1,2,3,4,5
6,7,8,9,10,11
12,13,14,15,16,17
18,19,20
21,22,23
24,25,26
第一列是用户ID,其他列是项目ID。
目标数据是:
1,2
1,3
1,4
...
24,25
24,26
这是我的错误:
Caused by op 'IteratorGetNext', defined at:
File "C:/Users/Liheng/Desktop/xlearning/tensorflow_data.py", line 22, in
<module>
user_id, item_id = dataset.make_one_shot_iterator().get_next()
File "F:\ProgramData\Anaconda3\lib\site-
packages\tensorflow\python\data\ops\iterator_ops.py", line 421, in get_next
name=name)), self._output_types,
File "F:\ProgramData\Anaconda3\lib\site-
packages\tensorflow\python\ops\gen_dataset_ops.py", line 2068, in
iterator_get_next
output_shapes=output_shapes, name=name)
File "F:\ProgramData\Anaconda3\lib\site-
packages\tensorflow\python\framework\op_def_library.py", line 787, in
_apply_op_helper
op_def=op_def)
File "F:\ProgramData\Anaconda3\lib\site-
packages\tensorflow\python\util\deprecation.py", line 488, in new_func
return func(*args, **kwargs)
File "F:\ProgramData\Anaconda3\lib\site-
packages\tensorflow\python\framework\ops.py", line 3274, in create_op
op_def=op_def)
File "F:\ProgramData\Anaconda3\lib\site-
packages\tensorflow\python\framework\ops.py", line 1770, in __init__
self._traceback = tf_stack.extract_stack()
OutOfRangeError (see above for traceback): End of sequence
[[node IteratorGetNext (defined at
C:/Users/Liheng/Desktop/xlearning/tensorflow_data.py:22) =
IteratorGetNext[output_shapes=[[?], [?]], output_types=[DT_INT32, DT_INT32],
_device="/job:localhost/replica:0/task:0/device:CPU:0"](OneShotIterator)]]
但是如果我以急切模式运行,则代码运行良好,并且输出在这里:
tf.Tensor([ 1 1 1 1 6 6 6 6 6 12 12 12 12 12 18 18 21 21 24 24],
shape=(20,), dtype=int32)
tf.Tensor([ 2 3 4 5 7 8 9 10 11 13 14 15 16 17 19 20 22 23 25 26],
shape=(20,), dtype=int32)
答案 0 :(得分:1)
我认为,数据集很小,无法执行批处理。 尝试删除该行
数据集= dataset.batch(20)
或
更改为 数据集= dataset.batch(2)