Question

嗨，这是来自TensorFlow's Estimator froze with low CPU usage的后续问题。

如果evaluate步骤为1，则以下代码可以正常工作，但如果它为空或2，则应该是正确的步数，因为{4}中有四行{1}}和feature_a且batch_size为2，它将引发feature_b错误。我想Estimator应该捕获这个OutOfRange并使用它来停止评估，但它没有，并且程序退出。

OutOfRange

错误堆栈如下所示

import tensorflow as tf
from tensorflow.contrib.layers.python.layers.optimizers import optimize_loss
from tensorflow.contrib.learn.python.learn.estimators import model_fn
from tensorflow.contrib.learn.python.learn.estimators.estimator import Estimator
from tensorflow.python import debug as tf_debug
from tensorflow.python.framework import ops


def main(_):
    hooks = [tf_debug.LocalCLIDebugHook()]

    def func(features, targets, mode, params):
        idx = tf.concat([features['a'], features['b']], axis=1)

        embedding = tf.get_variable("embed", [10, 20], dtype=tf.float32)

        pred = tf.reduce_sum(tf.nn.embedding_lookup(embedding, idx))

        train_op = optimize_loss(loss=pred,
                                 global_step=tf.train.get_global_step(),
                                 learning_rate=0.001,
                                 optimizer='Adam',
                                 variables=tf.trainable_variables(),
                                 name="training_loss_optimizer")

        eval_metric_dict = dict()
        eval_metric_dict['metric'] = pred

        return model_fn.ModelFnOps(mode=mode,
                                   predictions=pred,
                                   loss=pred,
                                   train_op=train_op,
                                   eval_metric_ops=eval_metric_dict)

    model = Estimator(func, params={})

    model.fit(
        input_fn=lambda: (
            {'a': ops.convert_to_tensor([[1, 2, 3, 4, 5]]), 'b': ops.convert_to_tensor([[2, 3, 4, 3, 5]])},
            None), max_steps=10)

    testing_data_a = [[1, 2, 3, 4, 5], [1, 2, 3, 4, 5], [1, 2, 3, 4, 5], [1, 2, 3, 4, 5]]
    testing_data_b = [[2, 3, 4, 3, 5], [2, 3, 4, 3, 5], [2, 3, 4, 3, 5], [2, 3, 4, 3, 5]]

    def test_input_fn():
        feature_a = tf.train.limit_epochs(testing_data_a, num_epochs=1)
        feature_b = tf.train.limit_epochs(testing_data_b, num_epochs=1)

        feature_a_producer = tf.train.batch([feature_a], batch_size=2, enqueue_many=True, allow_smaller_final_batch=True)
        feature_b_producer = tf.train.batch([feature_b], batch_size=2, enqueue_many=True, allow_smaller_final_batch=True)

        return {'a': feature_a_producer, 'b': feature_b_producer}, None

    for i in range(10):
        # This does not work
        print(model.evaluate(input_fn=test_input_fn))
        # This does not work
        # print(model.evaluate(input_fn=test_input_fn, steps=2))
        # This do work
        # print(model.evaluate(input_fn=test_input_fn, steps=1))




if __name__ == "__main__":
    tf.app.run()

TensorFlow的Estimator只能从tf.train.limit_epochs获得N-1批次

0 个答案: