神经网络为所有输入输出相同的值

时间:2017-04-16 02:01:08

标签: tensorflow neural-network conv-neural-network

我有一个ConvNet模型。它为所有前向传播情况输出完全相同的值。

最初,由于辍学率设定为1且没有学习率,因此在训练期间并未进行评估。这让我相信我正在恢复模型不正确,但是,我决定在训练期间通过删除辍学并设置学习零来测试它。当我输出softmax值时,它们中的每一个都是常数。

然后我分析了通过张量板显示的图像和标签,并且每个图像和标签似乎都在变化,肯定会有不断变化的数据。

所以问题不在于输入,而在于前向传播本身,但我无法看到它出错的地方。

def weight_variable(shape):
    with tf.device('/gpu:0'):
        initial = tf.random_normal(shape, stddev=0.00125)
        return tf.Variable(initial)

def bias_variable(shape):
    with tf.device('/cpu:0'):
        initial = tf.constant(0.1, shape = shape)
        return tf.Variable(initial)

def conv(images, W):
    return tf.nn.conv2d(images, W, strides = [1, 1, 1, 1], padding = 'SAME')

def forward_propagation(images, dropout_value2):
    with tf.device('/gpu:0'):
        conv1_feature = weight_variable([8, 8, 3, 16])
        conv1_bias = bias_variable([16])
        image_matrix = tf.reshape(images, [-1, 800, 800, 3])
        conv1_result = tf.nn.relu(conv(image_matrix, conv1_feature) + conv1_bias)
        _activation_summary(conv1_result)

        conv1_pool = tf.nn.max_pool(conv1_result, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
        norm1 = tf.nn.lrn(conv1_pool, 4, bias = 1.0, alpha = 0.001 / 9.0, beta = 0.75, name = 'norm1')

        conv2_feature = weight_variable([3, 3, 16, 64])
        conv2_bias = bias_variable([64])
        conv2_result = tf.nn.relu(conv(norm1, conv2_feature) + conv2_bias)
        _activation_summary(conv2_result)

        conv2_pool = tf.nn.max_pool(conv2_result, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
        norm2 = tf.nn.lrn(conv2_pool, 4, bias = 1.0, alpha = 0.001 / 9.0, beta = 0.75, name = 'norm2')

        conv3_feature = weight_variable([3, 3, 64, 128])
        conv3_bias = bias_variable([128])
        conv3_result = tf.nn.relu(conv(norm2, conv3_feature) + conv3_bias)
        _activation_summary(conv3_result)

        conv3_pool = tf.nn.max_pool(conv3_result, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
        norm3 = tf.nn.lrn(conv3_pool, 4, bias = 1.0, alpha = 0.001 / 9.0, beta = 0.75, name = 'norm3')

        conv4_feature = weight_variable([3, 3, 128, 256])
        conv4_bias = bias_variable([256])
        conv4_result = tf.nn.relu(conv(norm3, conv4_feature) + conv4_bias)
        _activation_summary(conv4_result)
        conv4_pool = tf.nn.max_pool(conv4_result, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
        norm4 = tf.nn.lrn(conv4_pool, 4, bias = 1.0, alpha = 0.001 / 9.0, beta = 0.75, name = 'norm4')

        conv5_feature = weight_variable([3, 3, 256, 512])
        conv5_bias = bias_variable([512])
        conv5_result = tf.nn.relu(conv(norm4, conv5_feature) + conv5_bias)
        _activation_summary(conv5_result)
        conv5_pool = tf.nn.max_pool(conv5_result, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME')
        norm5 = tf.nn.lrn(conv5_pool, 4, bias = 1.0, alpha = 0.001 / 9.0, beta = 0.75, name = 'norm5')

        perceptron1_weight = weight_variable([25 * 25 * 512, 256])
        perceptron1_bias = bias_variable([256])
        flatten_dense_connect = tf.reshape(norm5, [-1, 25 * 25 * 512])
        compute_perceptron1_layer = tf.nn.relu(tf.matmul(flatten_dense_connect, perceptron1_weight) + perceptron1_bias)
        _activation_summary(compute_perceptron1_layer)

        perceptron2_weight = weight_variable([256, 256])
        perceptron2_bias = bias_variable([256])

        compute_perceptron2_layer = tf.nn.relu(tf.matmul(compute_perceptron1_layer, perceptron2_weight) + perceptron2_bias)
        perceptron3_weight = weight_variable([256, 100])
        perceptron3_bias = bias_variable([100])

        compute_perceptron3_layer = tf.nn.relu(tf.matmul(compute_perceptron2_layer, perceptron3_weight) + perceptron3_bias)

        perceptron4_weight = weight_variable([100, 50])
        perceptron4_bias = bias_variable([50])

        compute_perceptron5_layer = tf.nn.relu(tf.matmul(compute_perceptron3_layer, perceptron4_weight) + perceptron4_bias)

        perceptron5_weight = weight_variable([50, 4])
        perceptron5_bias = bias_variable([4])

        dropout = tf.nn.dropout(compute_perceptron5_layer, dropout_value2)
        result1 = tf.matmul(dropout, perceptron5_weight) + perceptron5_bias
        _activation_summary(result1)
        return result1

def error(forward_propagation_results, labels):
    with tf.device('/cpu:0'):
        labels = tf.cast(labels, tf.int64)
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=forward_propagation_results, labels=labels)
        cost = tf.reduce_mean(cross_entropy)

        tf.add_to_collection('losses', cost)
        tf.summary.scalar('LOSS', cost)
        return cost

def train(cost):
    with tf.device('/gpu:0'):
        train_loss = tf.train.AdamOptimizer(learning_rate = 0.01).minimize(cost)
        return train_loss

0 个答案:

没有答案