嗨,这是来自TensorFlow's Estimator froze with low CPU usage的后续问题。
如果evaluate
步骤为1
,则以下代码可以正常工作,但如果它为空或2
,则应该是正确的步数,因为{4}中有四行{1}}和feature_a
且batch_size为2,它将引发feature_b
错误。我想Estimator应该捕获这个OutOfRange并使用它来停止评估,但它没有,并且程序退出。
OutOfRange
错误堆栈如下所示
import tensorflow as tf
from tensorflow.contrib.layers.python.layers.optimizers import optimize_loss
from tensorflow.contrib.learn.python.learn.estimators import model_fn
from tensorflow.contrib.learn.python.learn.estimators.estimator import Estimator
from tensorflow.python import debug as tf_debug
from tensorflow.python.framework import ops
def main(_):
hooks = [tf_debug.LocalCLIDebugHook()]
def func(features, targets, mode, params):
idx = tf.concat([features['a'], features['b']], axis=1)
embedding = tf.get_variable("embed", [10, 20], dtype=tf.float32)
pred = tf.reduce_sum(tf.nn.embedding_lookup(embedding, idx))
train_op = optimize_loss(loss=pred,
global_step=tf.train.get_global_step(),
learning_rate=0.001,
optimizer='Adam',
variables=tf.trainable_variables(),
name="training_loss_optimizer")
eval_metric_dict = dict()
eval_metric_dict['metric'] = pred
return model_fn.ModelFnOps(mode=mode,
predictions=pred,
loss=pred,
train_op=train_op,
eval_metric_ops=eval_metric_dict)
model = Estimator(func, params={})
model.fit(
input_fn=lambda: (
{'a': ops.convert_to_tensor([[1, 2, 3, 4, 5]]), 'b': ops.convert_to_tensor([[2, 3, 4, 3, 5]])},
None), max_steps=10)
testing_data_a = [[1, 2, 3, 4, 5], [1, 2, 3, 4, 5], [1, 2, 3, 4, 5], [1, 2, 3, 4, 5]]
testing_data_b = [[2, 3, 4, 3, 5], [2, 3, 4, 3, 5], [2, 3, 4, 3, 5], [2, 3, 4, 3, 5]]
def test_input_fn():
feature_a = tf.train.limit_epochs(testing_data_a, num_epochs=1)
feature_b = tf.train.limit_epochs(testing_data_b, num_epochs=1)
feature_a_producer = tf.train.batch([feature_a], batch_size=2, enqueue_many=True, allow_smaller_final_batch=True)
feature_b_producer = tf.train.batch([feature_b], batch_size=2, enqueue_many=True, allow_smaller_final_batch=True)
return {'a': feature_a_producer, 'b': feature_b_producer}, None
for i in range(10):
# This does not work
print(model.evaluate(input_fn=test_input_fn))
# This does not work
# print(model.evaluate(input_fn=test_input_fn, steps=2))
# This do work
# print(model.evaluate(input_fn=test_input_fn, steps=1))
if __name__ == "__main__":
tf.app.run()