我有一个ConvNet模型。它为所有前向传播情况输出完全相同的值。
最初,由于辍学率设定为1且没有学习率,因此在训练期间并未进行评估。这让我相信我正在恢复模型不正确,但是,我决定在训练期间通过删除辍学并设置学习零来测试它。当我输出softmax值时,它们中的每一个都是常数。
然后我分析了通过张量板显示的图像和标签,并且每个图像和标签似乎都在变化,肯定会有不断变化的数据。
所以问题不在于输入,而在于前向传播本身,但我无法看到它出错的地方。
def weight_variable(shape):
with tf.device('/gpu:0'):
initial = tf.random_normal(shape, stddev=0.00125)
return tf.Variable(initial)
def bias_variable(shape):
with tf.device('/cpu:0'):
initial = tf.constant(0.1, shape = shape)
return tf.Variable(initial)
def conv(images, W):
return tf.nn.conv2d(images, W, strides = [1, 1, 1, 1], padding = 'SAME')
def forward_propagation(images, dropout_value2):
with tf.device('/gpu:0'):
conv1_feature = weight_variable([8, 8, 3, 16])
conv1_bias = bias_variable([16])
image_matrix = tf.reshape(images, [-1, 800, 800, 3])
conv1_result = tf.nn.relu(conv(image_matrix, conv1_feature) + conv1_bias)
_activation_summary(conv1_result)
conv1_pool = tf.nn.max_pool(conv1_result, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
norm1 = tf.nn.lrn(conv1_pool, 4, bias = 1.0, alpha = 0.001 / 9.0, beta = 0.75, name = 'norm1')
conv2_feature = weight_variable([3, 3, 16, 64])
conv2_bias = bias_variable([64])
conv2_result = tf.nn.relu(conv(norm1, conv2_feature) + conv2_bias)
_activation_summary(conv2_result)
conv2_pool = tf.nn.max_pool(conv2_result, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
norm2 = tf.nn.lrn(conv2_pool, 4, bias = 1.0, alpha = 0.001 / 9.0, beta = 0.75, name = 'norm2')
conv3_feature = weight_variable([3, 3, 64, 128])
conv3_bias = bias_variable([128])
conv3_result = tf.nn.relu(conv(norm2, conv3_feature) + conv3_bias)
_activation_summary(conv3_result)
conv3_pool = tf.nn.max_pool(conv3_result, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
norm3 = tf.nn.lrn(conv3_pool, 4, bias = 1.0, alpha = 0.001 / 9.0, beta = 0.75, name = 'norm3')
conv4_feature = weight_variable([3, 3, 128, 256])
conv4_bias = bias_variable([256])
conv4_result = tf.nn.relu(conv(norm3, conv4_feature) + conv4_bias)
_activation_summary(conv4_result)
conv4_pool = tf.nn.max_pool(conv4_result, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
norm4 = tf.nn.lrn(conv4_pool, 4, bias = 1.0, alpha = 0.001 / 9.0, beta = 0.75, name = 'norm4')
conv5_feature = weight_variable([3, 3, 256, 512])
conv5_bias = bias_variable([512])
conv5_result = tf.nn.relu(conv(norm4, conv5_feature) + conv5_bias)
_activation_summary(conv5_result)
conv5_pool = tf.nn.max_pool(conv5_result, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
norm5 = tf.nn.lrn(conv5_pool, 4, bias = 1.0, alpha = 0.001 / 9.0, beta = 0.75, name = 'norm5')
perceptron1_weight = weight_variable([25 * 25 * 512, 256])
perceptron1_bias = bias_variable([256])
flatten_dense_connect = tf.reshape(norm5, [-1, 25 * 25 * 512])
compute_perceptron1_layer = tf.nn.relu(tf.matmul(flatten_dense_connect, perceptron1_weight) + perceptron1_bias)
_activation_summary(compute_perceptron1_layer)
perceptron2_weight = weight_variable([256, 256])
perceptron2_bias = bias_variable([256])
compute_perceptron2_layer = tf.nn.relu(tf.matmul(compute_perceptron1_layer, perceptron2_weight) + perceptron2_bias)
perceptron3_weight = weight_variable([256, 100])
perceptron3_bias = bias_variable([100])
compute_perceptron3_layer = tf.nn.relu(tf.matmul(compute_perceptron2_layer, perceptron3_weight) + perceptron3_bias)
perceptron4_weight = weight_variable([100, 50])
perceptron4_bias = bias_variable([50])
compute_perceptron5_layer = tf.nn.relu(tf.matmul(compute_perceptron3_layer, perceptron4_weight) + perceptron4_bias)
perceptron5_weight = weight_variable([50, 4])
perceptron5_bias = bias_variable([4])
dropout = tf.nn.dropout(compute_perceptron5_layer, dropout_value2)
result1 = tf.matmul(dropout, perceptron5_weight) + perceptron5_bias
_activation_summary(result1)
return result1
def error(forward_propagation_results, labels):
with tf.device('/cpu:0'):
labels = tf.cast(labels, tf.int64)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=forward_propagation_results, labels=labels)
cost = tf.reduce_mean(cross_entropy)
tf.add_to_collection('losses', cost)
tf.summary.scalar('LOSS', cost)
return cost
def train(cost):
with tf.device('/gpu:0'):
train_loss = tf.train.AdamOptimizer(learning_rate = 0.01).minimize(cost)
return train_loss