我从https://github.com/tensorflow/tensorflow/blob/r0.7/tensorflow/models/image/cifar10/cifar10_multi_gpu_train.py复制了tower_loss方法,并且对loss_averages
变量的范围有疑问。有什么问题,我该如何解决?
def tower_loss(scope, img_batch, label_batch):
# Calculate the total loss on a single tower running the model.
# scope: unique prefix string identifying the tower, e.g. 'tower_0'
# returns total loss for a batch of data
# Build inference Graph.
logits = inference(img_batch)
# Build the portion of the Graph calculating the losses. Note that we will
# assemble the total_loss using a custom function below.
_ = model_loss(logits, label_batch)
# Assemble all of the losses for the current tower only.
losses = tf.get_collection('losses', scope)
# Calculate the total loss for the current tower.
total_loss = tf.add_n(losses, name='total_loss')
loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
print("current scope:", scope)# tower_name_1/
print("loss_averages:", loss_averages.name) # avg
print("total_loss:", total_loss.name) # tower_name_1/total_loss_1:0
print()
# need scope: tower_name_1/mean_sq_error/avg/
loss_averages_op = loss_averages.apply(losses + [total_loss])
with tf.control_dependencies([loss_averages_op]):
total_loss = tf.identity(total_loss)
return total_loss
踪迹:
total_loss: tower_name_0/total_loss_1:0
curr scope: tower_name_0/
loss_averages: avg
Tensor("tower_name_1/IteratorGetNext:0", shape=(?, 227, 227, 3), dtype=float32, device=/device:GPU:1)
Tensor("tower_name_1/IteratorGetNext:1", shape=(?,), dtype=float32, device=/device:GPU:1)
total_loss: tower_name_1/total_loss_1:0
curr scope: tower_name_1/
loss_averages: avg
Traceback (most recent call last):
File "new_FCN-LSTM.py", line 363, in <module>
train()
File "new_FCN-LSTM.py", line 259, in train
loss = tower_loss(scope, img_batch, label_batch)
File "new_FCN-LSTM.py", line 165, in tower_loss
loss_averages_op = loss_averages.apply(losses + [total_loss])
File "C:\Users\User\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\training\moving_averages.py", line 415, in apply
"VarHandleOp"]))
File "C:\Users\User\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\training\slot_creator.py", line 183, in create_zeros_slot
colocate_with_primary=colocate_with_primary)
File "C:\Users\User\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\training\slot_creator.py", line 160, in create_slot_with_initializer
dtype)
File "C:\Users\User\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\training\slot_creator.py", line 65, in _create_slot_var
validate_shape=validate_shape)
File "C:\Users\User\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\ops\variable_scope.py", line 1487, in get_variable
aggregation=aggregation)
File "C:\Users\User\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\ops\variable_scope.py", line 1237, in get_variable
aggregation=aggregation)
File "C:\Users\User\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\ops\variable_scope.py", line 540, in get_variable
aggregation=aggregation)
File "C:\Users\User\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\ops\variable_scope.py", line 492, in _true_getter
aggregation=aggregation)
File "C:\Users\User\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\ops\variable_scope.py", line 879, in _get_single_variable
"reuse=tf.AUTO_REUSE in VarScope?" % name)
ValueError: Variable tower_name_1/mean_sq_error/avg/ does not exist, or was not created with tf.get_variable(). Did you mean to set reuse=tf.AUTO_REUSE in VarScope?