我正在尝试使用自定义估算器在GPU上训练mnist数据集,对此问题我感到很困惑:
KeyError: "The name 'softmax_tensor:0' refers to a Tensor which does
not exist. The operation, 'softmax_tensor', does not exist in the
graph."
我已经检查了我的代码,但是我不太明白为什么"softmax_tensor"
不存在。我该如何解决?
这是我的model_fn代码:
def cnn_model_fn(features, labels, mode):
with tf.variable_scope('my_model', reuse=tf.AUTO_REUSE):
#Input layer
feature_column = tf.feature_column.numeric_column("images")
input_layer = tf.reshape(tf.feature_column.input_layer(features, feature_columns=[feature_column]),[-1, 28, 28, 1])
print(input_layer)
# Convolutional Layer #1
conv1 = tf.layers.conv2d(inputs=input_layer, filters=32, kernel_size=[5, 5], padding='same', activation=tf.nn.relu, name="conv1")
# Pooling Layer #1
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2, name="pool1")
# Conv2 and Pool2
conv2 = tf.layers.conv2d(inputs=pool1, filters=64, kernel_size=[5, 5], padding='same', activation=tf.nn.relu, name="conv2")
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2, name="pool2")
# Dense Layer
pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
dense = tf.layers.dense(inputs=pool2_flat, units=1024, activation=tf.nn.relu)
dropout = tf.layers.dropout(inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)
# Logits Layer
logits = tf.layers.dense(inputs=dropout, units=10)
predictions = {
"classes" : tf.argmax(input=logits, axis=1),
"probabilities" : tf.nn.softmax(logits, name="softmax_tensor")
}
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
if mode == tf.estimator.ModeKeys.TRAIN:
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
# GPU training: Second, wrap your optimizer with TowerOptimizer in your model_fn
optimizer = tf.contrib.estimator.TowerOptimizer(optimizer)
train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
# add evaluation metrics
eval_metric_ops = {
"accuracy" : tf.metrics.accuracy(labels=labels, predictions=predictions["classes"])
}
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
这是我定义估算器的方式:
mnist_classifier = tf.estimator.Estimator(
model_fn=tf.contrib.estimator.replicate_model_fn(cnn_model_fn),
config = run_config,
model_dir="./models/mnist_convnet_model")
然后,这就是我训练模型的方式:
tensors_to_log = {"probabilities": "softmax_tensor"}
logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=50)
mnist_classifier.train(
input_fn=train_inpf,
steps=20000,
hooks=[logging_hook]
)
最后: 例外:
<ipython-input-15-deef8ba38561> in <module>()
4 input_fn=train_inpf,
5 steps=20000,
----> 6 hooks=[logging_hook]
7 )
/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
356
357 saving_listeners = _check_listeners_type(saving_listeners)
--> 358 loss = self._train_model(input_fn, hooks, saving_listeners)
359 logging.info('Loss for final step: %s.', loss)
360 return self
/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in _train_model(self, input_fn, hooks, saving_listeners)
1120 def _train_model(self, input_fn, hooks, saving_listeners):
1121 if self._train_distribution:
-> 1122 return self._train_model_distributed(input_fn, hooks, saving_listeners)
1123 else:
1124 return self._train_model_default(input_fn, hooks, saving_listeners)
/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in _train_model_distributed(self, input_fn, hooks, saving_listeners)
1183 self._config._train_distribute.configure(self._config.session_config)
1184 return self._actual_train_model_distributed(
-> 1185 self._config._train_distribute, input_fn, hooks, saving_listeners)
1186 # pylint: enable=protected-access
1187
/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in _actual_train_model_distributed(self, strategy, input_fn, hooks, saving_listeners)
1285 return self._train_with_estimator_spec(estimator_spec, worker_hooks,
1286 hooks, global_step_tensor,
-> 1287 saving_listeners)
1288
1289 def _train_with_estimator_spec(self, estimator_spec, worker_hooks, hooks,
/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py in _train_with_estimator_spec(self, estimator_spec, worker_hooks, hooks, global_step_tensor, saving_listeners)
1401 save_summaries_steps=save_summary_steps,
1402 config=self._session_config,
-> 1403 log_step_count_steps=log_step_count_steps) as mon_sess:
1404 loss = None
1405 any_step_done = False
/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/monitored_session.py in MonitoredTrainingSession(master, is_chief, checkpoint_dir, scaffold, hooks, chief_only_hooks, save_checkpoint_secs, save_summaries_steps, save_summaries_secs, config, stop_grace_period_secs, log_step_count_steps, max_wait_secs, save_checkpoint_steps, summary_dir)
506 session_creator=session_creator,
507 hooks=all_hooks,
--> 508 stop_grace_period_secs=stop_grace_period_secs)
509
510
/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/monitored_session.py in __init__(self, session_creator, hooks, stop_grace_period_secs)
932 super(MonitoredSession, self).__init__(
933 session_creator, hooks, should_recover=True,
--> 934 stop_grace_period_secs=stop_grace_period_secs)
935
936
/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/monitored_session.py in __init__(self, session_creator, hooks, should_recover, stop_grace_period_secs)
634 self._hooks = hooks or []
635 for h in self._hooks:
--> 636 h.begin()
637
638 worker_context = distribute_coordinator_context.get_current_worker_context()
/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/basic_session_run_hooks.py in begin(self)
225 # Convert names to tensors if given
226 self._current_tensors = {tag: _as_graph_element(tensor)
--> 227 for (tag, tensor) in self._tensors.items()}
228
229 def before_run(self, run_context): # pylint: disable=unused-argument
/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/basic_session_run_hooks.py in <dictcomp>(.0)
225 # Convert names to tensors if given
226 self._current_tensors = {tag: _as_graph_element(tensor)
--> 227 for (tag, tensor) in self._tensors.items()}
228
229 def before_run(self, run_context): # pylint: disable=unused-argument
/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/basic_session_run_hooks.py in _as_graph_element(obj)
1073 element = graph.as_graph_element(obj)
1074 else:
-> 1075 element = graph.as_graph_element(obj + ":0")
1076 # Check that there is no :1 (e.g. it's single output).
1077 try:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in as_graph_element(self, obj, allow_tensor, allow_operation)
3476
3477 with self._lock:
-> 3478 return self._as_graph_element_locked(obj, allow_tensor, allow_operation)
3479
3480 def _as_graph_element_locked(self, obj, allow_tensor, allow_operation):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py in _as_graph_element_locked(self, obj, allow_tensor, allow_operation)
3518 raise KeyError("The name %s refers to a Tensor which does not "
3519 "exist. The operation, %s, does not exist in the "
-> 3520 "graph." % (repr(name), repr(op_name)))
3521 try:
3522 return op.outputs[out_n]
KeyError: "The name 'softmax_tensor:0' refers to a Tensor which does not exist. The operation, 'softmax_tensor', does not exist in the graph."