我正在尝试在Cloud TPU上使用TPU Estimator API训练模型。错误日志和用于读取我的输入数据的代码如下所示。我尝试使用python调试器确定遇到错误的位置。在遇到错误之前,控件不会退出traing_input_fn
函数。因此,我相信我的数据管道是问题的根源。有人可以帮我解决这个问题吗?如有必要,我将乐意提供更多信息。谢谢
INFO:tensorflow:Error recorded from training_loop: The features to the model returned by input_fn must have static shape. Tensor: Tensor("Inf[25/1805]
dequeue:0", shape=(16, ?, 50, 1024), dtype=float32, device=/device:TPU_REPLICATED_CORE:0)
INFO:tensorflow:training_loop marked as finished
WARNING:tensorflow:Reraising captured error
Traceback (most recent call last):
File "estimator_task.py", line 303, in <module>
main(**arguments)
File "estimator_task.py", line 261, in main
estimator.train(input_fn=train_input_fn, max_steps=train_steps, hooks=hooks)
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 2457, in train
rendezvous.raise_errors()
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow/contrib/tpu/python/tpu/error_handling.py", line 128, in raise_error$
six.reraise(typ, value, traceback)
File "/home/abi/.local/lib/python3.5/site-packages/six.py", line 693, in reraise
raise value
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 2452, in train
saving_listeners=saving_listeners)
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 358, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1124, in _train_mode$
return self._train_model_default(input_fn, hooks, saving_listeners)
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1154, in _train_mode$
_default
features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 2251, in _call_model$
fn
config)
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/estimator.py", line 1112, in _call_model$
fn
model_fn_results = self._model_fn(features=features, **kwargs)
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 2558, in _model_fn
_train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn))
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 2893, in _train_on_t$
u_system
device_assignment=ctx.device_assignment)
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow/contrib/tpu/python/tpu/tpu.py", line 890, in split_compile_and_shar$
name=name)
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow/contrib/tpu/python/tpu/tpu.py", line 689, in split_compile_and_repl$
cate
outputs = computation(*computation_inputs)
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 2886, in multi_tpu_t$
ain_steps_on_single_shard
[_INITIAL_LOSS])
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow/contrib/tpu/python/tpu/training_loop.py", line 208, in repeat
cond, body_wrapper, inputs=inputs, infeed_queue=infeed_queue, name=name)
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow/contrib/tpu/python/tpu/training_loop.py", line 170, in while_loop
condition_wrapper, body_wrapper, inputs, name="", parallel_iterations=1)
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 3556, in while_loop
return_same_structure)
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 3087, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 3022, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow/contrib/tpu/python/tpu/training_loop.py", line 121, in body_wrapper
outputs = body(*(inputs + dequeue_ops))
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow/contrib/tpu/python/tpu/training_loop.py", line 204, in body_wrapper
return [i + 1] + _convert_to_list(body(*args))
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 1359, in train_step
self._call_model_fn(features, labels))
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 1552, in _call_model_
fn
self._validate_model_features_and_labels(features, labels, is_export_mode)
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 1546, in _validate_mo
del_features_and_labels
validate(features, 'features')
File "/home/abi/anaconda3/envs/myenv_3_5/lib/python3.5/site-packages/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py", line 1538, in validate
' Tensor: {}'.format(obj_name, obj))
ValueError: The features to the model returned by input_fn must have static shape. Tensor: Tensor("InfeedQueue/dequeue:0", shape=(16, ?, 50, 1024), dt
ype=float32, device=/device:TPU_REPLICATED_CORE:0)
这是我的训练数据管道
def train_input_fn(params):
def decode_example(example_proto, t=50, dim=1024):
features = tf.parse_single_example(
example_proto,
features = {
'X': tf.FixedLenSequenceFeature([], tf.float32, allow_missing=True),
'Y': tf.FixedLenSequenceFeature([], tf.int64, allow_missing=True),
}
)
feat = features['X']
feat = tf.squeeze(feat)
feat.set_shape([t, dim])
labels = features['Y']
labels = tf.cast(labels, dtype=tf.int32)
return feat, labels
train_files = params["train_filenames"]
batch_size = params['batch_size']
dataset = tf.data.TFRecordDataset(train_files, num_parallel_reads=8)
dataset = dataset.apply(
tf.contrib.data.shuffle_and_repeat(buffer_size=100))
dataset = dataset.apply(
tf.contrib.data.map_and_batch(decode_example, batch_size, drop_remainder=False))
dataset = dataset.prefetch(1)
dataset = dataset.apply(tf.contrib.data.batch_and_drop_remainder(batch_size))
return dataset
答案 0 :(得分:1)
我遇到了一个非常相似的问题,我通过在您的情况下为形状尺寸设置一个显式值来解决了这个问题:
feat.set_shape([50, 1024])
不太方便,但是对我有用。