Question

我正在尝试使用自定义估算器在GPU上训练mnist数据集，对此问题我感到很困惑：

KeyError: "The name 'softmax_tensor:0' refers to a Tensor which does
not exist. The operation, 'softmax_tensor', does not exist in the
graph."

我已经检查了我的代码，但是我不太明白为什么"softmax_tensor"不存在。我该如何解决？

这是我的model_fn代码：

def cnn_model_fn(features, labels, mode):
  with tf.variable_scope('my_model', reuse=tf.AUTO_REUSE):
    #Input layer
    feature_column = tf.feature_column.numeric_column("images")
    input_layer = tf.reshape(tf.feature_column.input_layer(features, feature_columns=[feature_column]),[-1, 28, 28, 1])
    print(input_layer)
    # Convolutional Layer #1
    conv1 = tf.layers.conv2d(inputs=input_layer, filters=32, kernel_size=[5, 5], padding='same', activation=tf.nn.relu, name="conv1")
    # Pooling Layer #1
    pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2, name="pool1")

    # Conv2 and Pool2
    conv2 = tf.layers.conv2d(inputs=pool1, filters=64, kernel_size=[5, 5], padding='same', activation=tf.nn.relu, name="conv2")
    pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2, name="pool2")

    # Dense Layer
    pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
    dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
    dropout = tf.layers.dropout(inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)

    # Logits Layer
    logits = tf.layers.dense(inputs=dropout, units=10)

    predictions = {
      "classes" : tf.argmax(input=logits, axis=1),
      "probabilities" : tf.nn.softmax(logits, name="softmax_tensor")
    }

    if mode == tf.estimator.ModeKeys.PREDICT:
      return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

    if mode == tf.estimator.ModeKeys.TRAIN:
      optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
      # GPU training: Second, wrap your optimizer with TowerOptimizer in your model_fn
      optimizer = tf.contrib.estimator.TowerOptimizer(optimizer)
      train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
      return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)

    # add evaluation metrics
    eval_metric_ops = {
      "accuracy" : tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])
    }

    return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)

这是我定义估算器的方式：

mnist_classifier = tf.estimator.Estimator(
  model_fn=tf.contrib.estimator.replicate_model_fn(cnn_model_fn),
  config = run_config,
  model_dir="./models/mnist_convnet_model")

然后，这就是我训练模型的方式：

tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=50)

mnist_classifier.train(
  input_fn=train_inpf,
  steps=20000,
  hooks=[logging_hook]
)

最后：例外：

<ipython-input-15-deef8ba38561> in <module>()
      4   input_fn=train_inpf,
      5   steps=20000,
----> 6   hooks=[logging_hook]
      7 )

/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
    356 
    357       saving_listeners = _check_listeners_type(saving_listeners)
--> 358       loss = self._train_model(input_fn, hooks, saving_listeners)
    359       logging.info('Loss for final step: %s.', loss)
    360       return self

/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)
   1120   def _train_model(self, input_fn, hooks, saving_listeners):
   1121     if self._train_distribution:
-> 1122       return self._train_model_distributed(input_fn, hooks, saving_listeners)
   1123     else:
   1124       return self._train_model_default(input_fn, hooks, saving_listeners)

/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in _train_model_distributed(self, input_fn, hooks, saving_listeners)
   1183       self._config._train_distribute.configure(self._config.session_config)
   1184       return self._actual_train_model_distributed(
-> 1185           self._config._train_distribute, input_fn, hooks, saving_listeners)
   1186     # pylint: enable=protected-access
   1187 

/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in _actual_train_model_distributed(self, strategy, input_fn, hooks, saving_listeners)
   1285         return self._train_with_estimator_spec(estimator_spec, worker_hooks,
   1286                                                hooks, global_step_tensor,
-> 1287                                                saving_listeners)
   1288 
   1289   def _train_with_estimator_spec(self, estimator_spec, worker_hooks, hooks,

/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in _train_with_estimator_spec(self, estimator_spec, worker_hooks, hooks, global_step_tensor, saving_listeners)
   1401         save_summaries_steps=save_summary_steps,
   1402         config=self._session_config,
-> 1403         log_step_count_steps=log_step_count_steps) as mon_sess:
   1404       loss = None
   1405       any_step_done = False

/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/monitored_session.py in MonitoredTrainingSession(master, is_chief, checkpoint_dir, scaffold, hooks, chief_only_hooks, save_checkpoint_secs, save_summaries_steps, save_summaries_secs, config, stop_grace_period_secs, log_step_count_steps, max_wait_secs, save_checkpoint_steps, summary_dir)
    506       session_creator=session_creator,
    507       hooks=all_hooks,
--> 508       stop_grace_period_secs=stop_grace_period_secs)
    509 
    510 

/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/monitored_session.py in __init__(self, session_creator, hooks, stop_grace_period_secs)
    932     super(MonitoredSession, self).__init__(
    933         session_creator, hooks, should_recover=True,
--> 934         stop_grace_period_secs=stop_grace_period_secs)
    935 
    936 

/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/monitored_session.py in __init__(self, session_creator, hooks, should_recover, stop_grace_period_secs)
    634     self._hooks = hooks or []
    635     for h in self._hooks:
--> 636       h.begin()
    637 
    638     worker_context = distribute_coordinator_context.get_current_worker_context()

/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/basic_session_run_hooks.py in begin(self)
    225     # Convert names to tensors if given
    226     self._current_tensors = {tag: _as_graph_element(tensor)
--> 227                              for (tag, tensor) in self._tensors.items()}
    228 
    229   def before_run(self, run_context):  # pylint: disable=unused-argument

/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/basic_session_run_hooks.py in <dictcomp>(.0)
    225     # Convert names to tensors if given
    226     self._current_tensors = {tag: _as_graph_element(tensor)
--> 227                              for (tag, tensor) in self._tensors.items()}
    228 
    229   def before_run(self, run_context):  # pylint: disable=unused-argument

/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/basic_session_run_hooks.py in _as_graph_element(obj)
   1073     element = graph.as_graph_element(obj)
   1074   else:
-> 1075     element = graph.as_graph_element(obj + ":0")
   1076     # Check that there is no :1 (e.g. it's single output).
   1077     try:

/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in as_graph_element(self, obj, allow_tensor, allow_operation)
   3476 
   3477     with self._lock:
-> 3478       return self._as_graph_element_locked(obj, allow_tensor, allow_operation)
   3479 
   3480   def _as_graph_element_locked(self, obj, allow_tensor, allow_operation):

/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in _as_graph_element_locked(self, obj, allow_tensor, allow_operation)
   3518           raise KeyError("The name %s refers to a Tensor which does not "
   3519                          "exist. The operation, %s, does not exist in the "
-> 3520                          "graph." % (repr(name), repr(op_name)))
   3521         try:
   3522           return op.outputs[out_n]

KeyError: "The name 'softmax_tensor:0' refers to a Tensor which does not exist. The operation, 'softmax_tensor', does not exist in the graph."

如何解决'名称'softmax_tensor：0'指的是不存在的张量。图中不存在操作“ softmax_tensor”。

0 个答案: