我有一个RNN模型,我试图通过输入管道从不同长度的输入序列中提取,这些输入管道从包含序列化SequenceExamples的多个TF记录文件中随机抽样,批量填充和多个批次的混洗
每个序列示例有3个元素,长度:常量,输入:1-D数组,标签:1-D数组
程序如下
def read_file_queue(self,filename_queue):
reader = tf.TFRecordReader()
key, ex = reader.read(filename_queue)
context_features = {
"seq-len": tf.FixedLenFeature([],dtype=tf.int64)
}
sequence_features = {
"tokens": tf.FixedLenSequenceFeature([],dtype=tf.int64),
"labels": tf.FixedLenSequenceFeature([],dtype=tf.int64)
}
context_parsed, sequence_parsed = tf.parse_single_sequence_example(serialized=ex,
context_features=context_features,
sequence_features=sequence_features)
return context_parsed["seq-len"], sequence_parsed["tokens"],sequence_parsed["labels"]
def get_batch_data(self):
fqueue = tf.train.string_input_producer(self.data_filelist,
shuffle=True,
num_epochs=self.num_epochs)
# read from multiple tf records as defined by read_threads
ex = [self.read_file_fmt(fqueue) for _ in range(self.read_threads)]
print(ex)
# ex = self.read_file_fmt(fqueue)
pad_output = self.padding_pipeline(ex)
shuffle_output = self.shuffle_pipeline(pad_output)
return shuffle_output
def padding_pipeline(self, input):
padding_queue = tf.PaddingFIFOQueue(
capacity=self.pad_capacity,
dtypes=[tf.int64, tf.int64, tf.int64],
shapes=[[], [None], [None]])
# use enqueue_many instead enqueue because
# the input is list of tuples from each tf record reader thread
padding_enqueue_op = padding_queue.enqueue_many(input) # <<< !!!!! error here !!!!!
padding_queue_runner = tf.train.QueueRunner(padding_queue, [padding_enqueue_op] * self.pad_threads)
tf.train.add_queue_runner(padding_queue_runner)
padding_dequeue_op = padding_queue.dequeue_up_to(self.batch_size)
return padding_dequeue_op
def shuffle_pipeline(self,input):
shuffle_queue = tf.RandomShuffleQueue(
capacity=self.shuffle_capacity,
min_after_dequeue=self.shuffle_min_after_dequeue,
dtypes=[tf.int64, tf.int64, tf.int64],
shapes=None)
shuffle_enqueue_op = shuffle_queue.enqueue(input)
shuffle_queue_runner = tf.train.QueueRunner(
shuffle_queue, [shuffle_enqueue_op] * self.shuffle_threads)
tf.train.add_queue_runner(shuffle_queue_runner)
shuffle_dequeue_op = shuffle_queue.dequeue()
return shuffle_dequeue_op
我收到以下错误:
ValueError:形状必须等于等级,但是0和1来自合并 形状0与其他形状。对于 &#39; padding_fifo_queue_EnqueueMany / component_0&#39; (op:&#39; Pack&#39;)带输入 形状:[],[?],[?]。
我确定我在这里做了些傻事,但是,我找不到我做错了什么..
答案 0 :(得分:0)
从here中提示,或许您应该有以下内容?
padding_queue = tf.PaddingFIFOQueue(
capacity=self.pad_capacity,
dtypes=[tf.int64, tf.int64, tf.int64],
shapes=[None, [None], [None]])
顺便说一句,如果您可以添加一些基本脚本来生成您正在使用的格式的随机数据,那么复制会更容易。感谢。