我正在尝试在Tensorflow中实现U-Net,但是我并没有感到失落,无论学习率如何,这种损失都在(显着)增加。我尝试从0.1到1.0E-8的学习率,学习率越低,损失消除的时间就越长,但是在某些时候(损失仍然很高; LR值为1.0E-8,大约进行了600次迭代)损失增加到NaN。
我猜我的梯度正在爆炸,但我不知道如何解决。 我注意到,当我使用S形而不是softmax交叉熵损失时,我的网络确实会收敛。
CNN的输入是[256、256、3]形状的RGB图像(值0-255)和具有[256、256、3] 3类形状的标签,其中3个维中的每一个仅由的0和1。
这是我的代码:
x_input = tf.placeholder(tf.float32, [None, 256, 256, 3])
y_input = tf.placeholder(tf.float32, [None, 256, 256, 3])
activation = tf.nn.elu
conv1_1 = tf.layers.conv2d(x_input, 64, 3, padding="same", activation=activation)
conv1_2 = tf.layers.conv2d(conv1_1, 64, 3, padding="same", activation=activation)
pool1_1 = tf.layers.max_pooling2d(conv1_2, pool_size=2, strides=2)
conv2_1 = tf.layers.conv2d(pool1_1, 64*2, 3, padding="same", activation=activation)
conv2_2 = tf.layers.conv2d(conv2_1, 64*2, 3, padding="same", activation=activation)
pool2_1 = tf.layers.max_pooling2d(conv2_2, pool_size=2, strides=2)
conv3_1 = tf.layers.conv2d(pool2_1, 64*2*2, 3, padding="same", activation=activation)
conv3_2 = tf.layers.conv2d(conv3_1, 64*2*2, 3, padding="same", activation=activation)
pool3_1 = tf.layers.max_pooling2d(conv3_2, pool_size=2, strides=2)
conv4_1 = tf.layers.conv2d(pool3_1, 64*2*2*2, 3, padding="same", activation=activation)
conv4_2 = tf.layers.conv2d(conv4_1, 64*2*2*2, 3, padding="same", activation=activation)
pool4_1 = tf.layers.max_pooling2d(conv4_2, pool_size=2, strides=2)
conv5_1 = tf.layers.conv2d(pool4_1, 64*2*2*2*2, 3, padding="same", activation=activation)
conv5_2 = tf.layers.conv2d(conv5_1, 64*2*2*2*2, 3, padding="same", activation=activation)
deconv6_1 = tf.layers.conv2d_transpose(conv5_2, 64*2*2*2, 2, strides=2, padding="valid", activation=activation)
cc6_1 = crop_concat(conv4_2, deconv6_1)
conv6_1 = tf.layers.conv2d(cc6_1, 64*2*2*2, 3, padding="same", activation=activation)
conv6_2 = tf.layers.conv2d(conv6_1, 64*2*2*2, 3, padding="same", activation=activation)
deconv7_1 = tf.layers.conv2d_transpose(conv6_2, 64*2*2, 2, strides=2, padding="valid", activation=activation)
cc7_1 = crop_concat(conv3_2, deconv7_1)
conv7_1 = tf.layers.conv2d(cc7_1, 64*2*2, 3, padding="same", activation=activation)
conv7_2 = tf.layers.conv2d(conv7_1, 64*2*2, 3, padding="same", activation=activation)
deconv8_1 = tf.layers.conv2d_transpose(conv7_2, 64*2, 2, strides=2, padding="valid", activation=activation)
cc8_1 = crop_concat(conv2_2, deconv8_1)
conv8_1 = tf.layers.conv2d(cc8_1, 64*2, 3, padding="same", activation=activation)
conv8_2 = tf.layers.conv2d(conv8_1, 64*2, 3, padding="same", activation=activation)
deconv9_1 = tf.layers.conv2d_transpose(conv8_2, 64, 2, strides=2, padding="valid", activation=activation)
cc9_1 = crop_concat(conv1_2, deconv9_1)
conv9_1 = tf.layers.conv2d(cc9_1, 64, 3, padding="same", activation=activation)
conv9_2 = tf.layers.conv2d(conv9_1, 64, 3, padding="same", activation=activation)
logits = tf.layers.conv2d(conv9_2, 3, 1, padding="same", activation=None)
# ------------------------------------------------------
flat_logits = tf.reshape(logits, [-1, 3])
flat_labels = tf.reshape(y_input, [-1, 3])
#loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=flat_labels, logits=flat_logits) # seems to work
loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=flat_labels, logits=flat_logits)
loss = tf.reduce_mean(loss)
optimizer = tf.train.AdamOptimizer(0.0001).minimize(loss)