我将peleenet_ssd.py
的一行(注释行)从tf.layers
更改为tf.keras.layers
,然后收到类似Inputs to operation model/gradients/AddN_449 of type AddN must have the same size and shape
的错误
我运行下面的测试代码,尝试使用不同的batch_size,但是tf.layers
和tf.keras.layers
的输出形状相同:
if __name__ == '__main__':
with tf.Graph().as_default():
sess = tf.Session()
model = PeleeNetSSD(21, [6, 6, 6, 6, 6, 6])
x = tf.ones([8, 304, 304, 3])
y = model(x, True)
init = tf.global_variables_initializer()
sess.run(init)
loc, cla = sess.run(y)
for l in loc:
print(l.shape)
for c in cla:
print(c.shape)
因此,我真的无法弄清楚为什么两个网络在发生compute_gradients
错误时会输出相同的内容(在错误中),除了这两个API都非常接近彼此。我不知道是什么原因导致了错误?
peleenet_ssd.py
class PeleeNet:
def __init__(self):
self.inner_channels = [16, 32, 64, 64]
self.extra_channel = 16
def conv_bn_relu(self, inputs, is_training, output_channel, kernel_size, stride, padding='same', use_relu=True):
conv = tf.keras.layers.Conv2D(output_channel, kernel_size, stride, padding, use_bias=False)(inputs)
# conv = tf.layers.Conv2D(output_channel, kernel_size, stride, padding, use_bias=False)(inputs)
conv_bn = tf.layers.BatchNormalization()(conv, training=is_training)
if use_relu:
return tf.nn.relu(conv_bn)
else:
return conv_bn
def stem_block(self, inputs, is_training):
stem1 = self.conv_bn_relu(inputs, is_training, 32, 3, 2)
stem2 = self.conv_bn_relu(stem1, is_training, 16, 1, 1)
stem3 = self.conv_bn_relu(stem2, is_training, 32, 3, 2)
stem1_pool = tf.layers.MaxPooling2D(2, 2)(stem1)
stem_cat = tf.concat([stem3, stem1_pool], axis=-1)
stem_final = self.conv_bn_relu(stem_cat, is_training, 32, 1, 1)
return stem_final
def dense_block(self, inputs, is_training, inner_channel, extra_channel):
dense_a1 = self.conv_bn_relu(inputs, is_training, inner_channel, 1, 1)
dense_a2 = self.conv_bn_relu(dense_a1, is_training, extra_channel, 3, 1)
dense_b1 = self.conv_bn_relu(inputs, is_training, inner_channel, 1, 1)
dense_b2 = self.conv_bn_relu(dense_b1, is_training, extra_channel, 3, 1)
dense_b3 = self.conv_bn_relu(dense_b2, is_training, extra_channel, 3, 1)
dense_final = tf.concat([inputs, dense_a2, dense_b3], axis=-1)
return dense_final
def transition_block(self, inputs, is_training, is_pooling):
input_channel = inputs.get_shape().as_list()[3]
if is_pooling:
trans_0 = self.conv_bn_relu(inputs, is_training, input_channel, 1, 1)
trans_pool = tf.layers.MaxPooling2D(2, 2)(trans_0)
return trans_pool
else:
trans_0 = self.conv_bn_relu(inputs, is_training, input_channel, 1, 1)
return trans_0
def __call__(self, inputs, is_training):
with tf.variable_scope('stage_0'):
with tf.variable_scope('stem_block'):
stage_0 = self.stem_block(inputs, is_training)
with tf.variable_scope('stage_1'):
stage_1_input = stage_0
scope_name = 'dense_block_%d'
for i in range(3):
with tf.variable_scope(scope_name % i):
stage_1_input = self.dense_block(stage_1_input, is_training, self.inner_channels[0],
self.extra_channel)
with tf.variable_scope('transition_block'):
stage_1 = self.transition_block(stage_1_input, is_training, is_pooling=True)
with tf.variable_scope('stage_2'):
stage_2_input = stage_1
scope_name = 'dense_block_%d'
for i in range(4):
with tf.variable_scope(scope_name % i):
stage_2_input = self.dense_block(stage_2_input, is_training, self.inner_channels[1],
self.extra_channel)
with tf.variable_scope('transition_block'):
stage_2 = self.transition_block(stage_2_input, is_training, is_pooling=True)
with tf.variable_scope('stage_3'):
stage_3_input = stage_2
scope_name = 'dense_block_%d'
for i in range(8):
with tf.variable_scope(scope_name % i):
stage_3_input = self.dense_block(stage_3_input, is_training, self.inner_channels[2],
self.extra_channel)
with tf.variable_scope('transition_block'):
stage_3 = self.transition_block(stage_3_input, is_training, is_pooling=True)
with tf.variable_scope('stage_4'):
stage_4_input = stage_3
scope_name = 'dense_block_%d'
for i in range(6):
with tf.variable_scope(scope_name % i):
stage_4_input = self.dense_block(stage_4_input, is_training, self.inner_channels[3],
self.extra_channel)
with tf.variable_scope('transition_block'):
stage_4 = self.transition_block(stage_4_input, is_training, is_pooling=False)
return stage_3_input, stage_4
class PeleeNetClassify:
def __init__(self, num_classes):
self.backbone = PeleeNet()
self.num_classes = num_classes
def __call__(self, inputs, is_training):
_, feature = self.backbone(inputs, is_training)
pooling = tf.layers.AveragePooling2D(7, 1)(feature)
flatten = tf.layers.Flatten()(pooling)
dense = tf.layers.Dense(self.num_classes)(flatten)
return dense
class PeleeNetSSD:
def __init__(self, num_classes, anchor_depth_per_layer):
self.peleenet = PeleeNet()
self.extra_output_channel = 256
self.num_classes = num_classes
self.anchor_depth_per_layer = anchor_depth_per_layer
def conv_bn_relu(self, inputs, is_training, output_channel, kernel_size, stride, padding='same', use_relu=True):
conv = tf.layers.Conv2D(output_channel, kernel_size, stride, padding, use_bias=False)(inputs)
conv_bn = tf.layers.BatchNormalization()(conv, training=is_training)
if use_relu:
return tf.nn.relu(conv_bn)
else:
return conv_bn
def add_extra(self, inputs, is_training, output_channel):
a2 = self.conv_bn_relu(inputs, is_training, output_channel, 1, 1, use_relu=False)
b2a = self.conv_bn_relu(inputs, is_training, int(output_channel / 2), 1, 1)
b2b = self.conv_bn_relu(b2a, is_training, int(output_channel / 2), 3, 1)
b2c = self.conv_bn_relu(b2b, is_training, output_channel, 1, 1, use_relu=False)
return a2 + b2c
def multibox_layer(self, feature_layers):
locations = []
classes = []
for i, feature in enumerate(feature_layers):
locations.append(
tf.layers.Conv2D(self.anchor_depth_per_layer[i] * 4, kernel_size=3, padding='same',
use_bias=True)(feature))
classes.append(
tf.layers.Conv2D(self.anchor_depth_per_layer[i] * self.num_classes, kernel_size=3,
padding='same', use_bias=True)(feature))
return locations, classes
def __call__(self, inputs, is_training):
feature_layers = []
# backbone
stage3, stage4 = self.peleenet(inputs, is_training)
with tf.variable_scope('extra_pm2'):
pm2_inputs = stage3
pm2_res = self.add_extra(pm2_inputs, is_training, self.extra_output_channel)
feature_layers.append(pm2_res)
feature_layers.append(pm2_res)
with tf.variable_scope('extra_pm3'):
pm3_inputs = stage4
pm3_res = self.add_extra(pm3_inputs, is_training, self.extra_output_channel)
feature_layers.append(pm3_res)
with tf.variable_scope('extra_pm3_to_pm4'):
pm3_to_pm4 = tf.layers.Conv2D(self.extra_output_channel, 1, 1, activation=tf.nn.relu)(pm3_inputs)
pm3_to_pm4 = tf.layers.Conv2D(self.extra_output_channel, 3, 2, padding='same',
activation=tf.nn.relu)(pm3_to_pm4)
with tf.variable_scope('extra_pm4'):
pm4_inputs = pm3_to_pm4
pm4_res = self.add_extra(pm4_inputs, is_training, self.extra_output_channel)
feature_layers.append(pm4_res)
with tf.variable_scope('extra_pm4_to_pm5'):
pm4_to_pm5 = tf.layers.Conv2D(self.extra_output_channel, 1, 1, activation=tf.nn.relu)(pm4_inputs)
pm4_to_pm5 = tf.layers.Conv2D(self.extra_output_channel, 3, 1, activation=tf.nn.relu)(pm4_to_pm5)
with tf.variable_scope('extra_pm5'):
pm5_inputs = pm4_to_pm5
pm5_res = self.add_extra(pm5_inputs, is_training, self.extra_output_channel)
feature_layers.append(pm5_res)
with tf.variable_scope('extra_pm5_to_pm6'):
pm5_to_pm6 = tf.layers.Conv2D(self.extra_output_channel, 1, 1, activation=tf.nn.relu)(pm5_inputs)
pm5_to_pm6 = tf.layers.Conv2D(self.extra_output_channel, 3, 1, activation=tf.nn.relu)(pm5_to_pm6)
with tf.variable_scope('extra_pm6'):
pm6_inputs = pm5_to_pm6
pm6_res = self.add_extra(pm6_inputs, is_training, self.extra_output_channel)
feature_layers.append(pm6_res)
with tf.variable_scope('mutibox_layer'):
locations, classes = self.multibox_layer(feature_layers)
return locations, classes
if __name__ == '__main__':
with tf.Graph().as_default():
sess = tf.Session()
model = PeleeNetSSD(21, [6, 6, 6, 6, 6, 6])
x = tf.ones([8, 304, 304, 3])
y = model(x, True)
init = tf.global_variables_initializer()
sess.run(init)
loc, cla = sess.run(y)
for l in loc:
print(l.shape)
for c in cla:
print(c.shape)
错误
Caused by op 'model/gradients/AddN_449', defined at:
...
File "/home/xxx/xxxxx/PocketFlow/learners/full_precision/learner.py", line 181, in __build
grads = optimizer.compute_gradients(loss, self.trainable_vars)
File "/home/xxx/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/training/optimizer.py", line 519, in compute_gradients
colocate_gradients_with_ops=colocate_gradients_with_ops)
File "/home/xxx/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 630, in gradients
gate_gradients, aggregation_method, stop_gradients)
File "/home/xxx/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 740, in _GradientsHelper
aggregation_method)
File "/home/xxx/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 1121, in _AggregatedGrads
out_grads[i] = _MultiDeviceAddN(out_grad, gradient_uid)
File "/home/xxx/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/gradients_impl.py", line 1007, in _MultiDeviceAddN
summands.append(math_ops.add_n(tensors))
File "/home/xxx/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 2173, in add_n
return gen_math_ops.add_n(inputs, name=name)
File "/home/xxx/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 363, in add_n
"AddN", inputs=inputs, name=name)
File "/home/xxx/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/home/xxx/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
return func(*args, **kwargs)
File "/home/xxx/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
op_def=op_def)
File "/home/xxx/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1770, in __init__
self._traceback = tf_stack.extract_stack()
InvalidArgumentError (see above for traceback): Inputs to operation model/gradients/AddN_449 of type AddN must have the same size and shape. Input 0: [3,3,16,16] != input 1: [1,1,32,16]
[[node model/gradients/AddN_449 (defined at /home/xxx/xxxxx/PocketFlow/learners/full_precision/learner.py:181) = AddN[N=2, T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"](model/gradients/model/L2Loss_12_grad/mul, model/gradients/model/peleenet-ssd/stage_1/dense_block_0/conv2d_4/Conv2D_grad/Conv2DBackpropFilter, ^model/gradients/model/peleenet-ssd/stage_1/dense_block_0/conv2d_4/Conv2D_grad/Conv2DBackpropInput)]]
相关代码(第181行)
# loss & extra evalution metrics
loss, metrics = self.calc_loss(labels, logits, self.trainable_vars)
if self.enbl_dst:
loss += self.helper_dst.calc_loss(logits, logits_dst)
tf.summary.scalar('loss', loss)
for key, value in metrics.items():
tf.summary.scalar(key, value)
# optimizer & gradients
if is_train:
self.global_step = tf.train.get_or_create_global_step()
lrn_rate, self.nb_iters_train =
self.setup_lrn_rate(self.global_step)
optimizer = tf.train.MomentumOptimizer(lrn_rate, FLAGS.momentum)
if FLAGS.enbl_multi_gpu:
optimizer = mgw.DistributedOptimizer(optimizer)
grads = optimizer.compute_gradients(loss, self.trainable_vars) # Line 181
我不希望有确切的原因,请告诉我可能导致这种结果的原因,我将尝试自己找出答案。非常感谢!