我正在尝试在tensorflow中实现带有区域提议网络的U-Net。我有一个可编译的keras模型,但是当我尝试使用GradientTape()编写训练循环时,出现“ ValueError:没有为任何变量提供渐变:”错误。这似乎是一个由多种原因引起的相当普遍的问题,在这种情况下,我正在努力找出原因。
这里是我定义模型的函数(函数外部定义了各种块):
def get_UNet_RPN(params):
n_filters=params['n_filters']
batchnorm=params['batchnorm']
attention=params['attention']
imh=params['imh']
imw=params['imw']
input_img=Input(shape=(imh,imw,3))
c1 = conv2d_block(input_img, n_filters=n_filters*1, c1=True, kernel_size=3, batchnorm=batchnorm)
p1 = MaxPooling2D((2, 2)) (c1)
#p1 = Dropout(dropout*0.5)(p1)
c2 = conv2d_block(p1, n_filters=n_filters*2, c1=False, kernel_size=3, batchnorm=batchnorm)
p2 = MaxPooling2D((2, 2)) (c2)
#p2 = Dropout(dropout)(p2)
c3 = conv2d_block(p2, n_filters=n_filters*4, c1=False, kernel_size=3, batchnorm=batchnorm)
p3 = MaxPooling2D((2, 2)) (c3)
#p3 = Dropout(dropout)(p3)
c4 = conv2d_block(p3, n_filters=n_filters*8, c1=False, kernel_size=3, batchnorm=batchnorm)
p4 = MaxPooling2D(pool_size=(2, 2)) (c4)
#p4 = Dropout(dropout)(p4)
c5 = conv2d_block(p4, n_filters=n_filters*16, c1=False, kernel_size=3, batchnorm=batchnorm)
# expansive path
u6 = Conv2DTranspose(n_filters*8, (3, 3), strides=(2, 2), padding='same') (c5)
if attention:
c4 = att_block(c4, c5, n_filters=n_filters*8, kernel_size=2, batchnorm=batchnorm)
u6 = concatenate([u6, c4])
#u6 = Dropout(dropout)(u6)
c6 = conv2d_block(u6, n_filters=n_filters*8, c1=False, kernel_size=3, batchnorm=batchnorm)
u7 = Conv2DTranspose(n_filters*4, (3, 3), strides=(2, 2), padding='same') (c6)
if attention:
c3 = att_block(c3, c6, n_filters=n_filters*4, kernel_size=2, batchnorm=batchnorm)
u7 = concatenate([u7, c3])
#u7 = Dropout(dropout)(u7)
c7 = conv2d_block(u7, n_filters=n_filters*4, c1=False, kernel_size=3, batchnorm=batchnorm)
u8 = Conv2DTranspose(n_filters*2, (3, 3), strides=(2, 2), padding='same') (c7)
if attention:
c2 = att_block(c2, c7, n_filters=n_filters*2, kernel_size=2, batchnorm=batchnorm)
u8 = concatenate([u8, c2])
#u8 = Dropout(dropout)(u8)
c8 = conv2d_block(u8, n_filters=n_filters*2, c1=False, kernel_size=3, batchnorm=batchnorm)
u9 = Conv2DTranspose(n_filters*1, (3, 3), strides=(2, 2), padding='same') (c8)
if attention:
c1 = att_block(c1, c8, n_filters=n_filters*1, kernel_size=2, batchnorm=batchnorm)
u9 = concatenate([u9, c1], axis=3)
#u9 = Dropout(dropout)(u9)
c9 = conv2d_block(u9, n_filters=n_filters*1, c1=False, kernel_size=3, batchnorm=batchnorm)
UNet_out = Conv2D(1, (1, 1), activation='sigmoid') (c9)
# Define RPN
scales=params['scales']
ratios=params['ratios']
input_height=params['imh']
input_width=params['imw']
feat_map=p4 #change this to alter where RPN input gets pulled
feat_map_shape=tf.shape(feat_map)
feat_map_shape=tf.cast(feat_map_shape,tf.float64)
num_ref_anchors = scales.shape[0] * ratios.shape[0]
rpn = Conv2D(512, (3, 3),activation="relu", padding="same", name="rpn_conv/3x3")(feat_map)
# This is the score to determine an anchor is foreground vs background
rpn_cls_score = Conv2D(num_ref_anchors, [1, 1], activation='sigmoid',
padding='valid', name='rpn_cls_score')(rpn)
# change it so that the score has 2 as its channel size
rpn_cls_score_reshape = tf.reshape(rpn_cls_score, [-1, 2])
# Now the shape of rpn_cls_score_reshape is (H * W * num_anchors, 4)
rpn_cls_prob = Softmax()(rpn_cls_score_reshape)
# rpn_bbox_pred has shape (?, H, W, num_anchors * 4)
rpn_bbox_pred = Conv2D(num_ref_anchors * 4, [1, 1],
(1, 1), activation="linear", name='rpn_bbox_pred')(rpn)
# Generate segmentation mask
pred_mask = tf.argmax(UNet_out, axis=3)
pred_mask = tf.reshape(pred_mask,[input_height,input_width])
pred_mask = tf.cast(pred_mask,dtype=tf.float32)
model=Model(input_img,[pred_mask,rpn_bbox_pred,rpn_cls_score,rpn_cls_prob,feat_map_shape],name='unet_rpn')
return model
这是训练循环的骨架:
unet_rpn=get_UNet_RPN(params)
unet_rpn.summary()
optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3)
for epoch in range(0,epochs):
#Shuffle training data
np.random.shuffle(train_i)
#Iterate through each image
for i in train_i:
with tf.GradientTape(watch_accessed_variables=False) as tape:
tape.watch(unet_rpn.trainable_variables)
# forward pass through the unet/rpn
y_pred, bbox_pred, cls_pred, cls_prob, feat_map_shape = unet_rpn(i)
# Generate region proposals
...
# Calculate loss
loss=...
grads=tape.gradient(loss,unet_rpn.trainable_weights)
optimizer.apply_gradients(zip(grads,unet_rpn.trainable_variables))
我对文档的细读发现了所有渐变可能都返回“ None”(https://www.tensorflow.org/guide/autodiff)的许多原因,但对我来说都没有意义,因为我的循环基本上遵循了文档中的结构(https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch)。任何帮助将不胜感激!