Question

我正在Udacity深度学习课程的非MNIST Assignment 4上使用convnet探索TensorFlow模块。

我的初始模型使用tf.nn.conv2d和tf.nn.max_pool，并提供约90％的验证准确度

初始模型，tf.nn.*

batch_size = 32
patch_size = 5
depth = 16
num_hidden = 64
seed=4242

tf.reset_default_graph()

graph = tf.Graph()

with graph.as_default():

    # Input data.
    tf_train_dataset = tf.placeholder(
        tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)


    def conv_relu_maxpool(inputs, kernel_shape, bias_shape, bias_init_v=0.0):
        weights = tf.get_variable("weights", shape = kernel_shape,
                                 initializer = tf.truncated_normal_initializer(stddev=0.1, seed=seed))
        biases = tf.get_variable("biases", shape = bias_shape,
                                initializer = tf.constant_initializer(bias_init_v))
        conv = tf.nn.conv2d(inputs, weights, [1,1,1,1], padding='SAME')
        hidden = tf.nn.relu(conv + biases)
        maxpool = tf.nn.max_pool(hidden, [1,2,2,1], [1,2,2,1], 'SAME')

        return maxpool


    def dense(inputs, units, activation=None):
        return tf.layers.dense(
            inputs,
            units=units,
            activation=activation,
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.05, seed=seed),
            bias_initializer=tf.constant_initializer(0.1),
            name="Dense"
        )


    # Model.
    def model(data, keep_prob=1):
        with tf.variable_scope("conv1"):
            layer1 = conv_relu_maxpool(data, [patch_size, patch_size, num_channels, depth], [depth], 0.01)

        with tf.variable_scope("conv2"):
            layer2 = conv_relu_maxpool(layer1, [patch_size, patch_size, depth, depth], [depth], 1.0)
        shape = layer2.get_shape().as_list()
        reshape = tf.reshape(layer2, [shape[0], shape[1] * shape[2] * shape[3]])

        with tf.variable_scope("dense1"):
            dense1 = dense( reshape, 64, activation=tf.nn.relu)
        dropout1 = tf.nn.dropout( dense1, keep_prob=keep_prob, seed=seed)
        with tf.variable_scope("dense2"):
            dense2 = dense( dense1, 32, activation=tf.nn.relu)
        dropout2 = tf.nn.dropout( dense2, keep_prob=keep_prob, seed=seed)
        with tf.variable_scope("dense3"):
            dense3 = dense( dropout2, num_labels)

        return dense3


    with tf.variable_scope("model"):
        # Training computation.
        logits = model(tf_train_dataset, keep_prob=0.7)
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))

        # Optimizer.
        global_step = tf.Variable(0)  # count the number of steps taken.
        learning_rate = tf.train.exponential_decay(0.05, global_step, 1001, 0.8)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits)

    with tf.variable_scope("model", reuse=True):
        valid_prediction = tf.nn.softmax(model(tf_valid_dataset))

    with tf.variable_scope("model", reuse=True):
        test_prediction = tf.nn.softmax(model(tf_test_dataset))

我将其重写为tf.layers.conv2d和tf.layers.max_pooling2d，然后是tf.layers.dense和tf.layers.dropout。现在模型无法训练，运行速度太快，并且提供10％（随机选择）验证准确性。无法弄清楚是什么原因。

图表：

batch_size = 32
patch_size = 5
depth = 16
num_hidden = 64
seed=4242

graph = tf.Graph()
tf.reset_default_graph()
with graph.as_default():

    # Input data.
    tf_train_dataset = tf.placeholder(
        tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)


    # Model.
    def model(data, keep_prob=0.5, training=False):
        conv_1 = tf.layers.conv2d(
            data, filters=depth, kernel_size=1, padding='SAME',
            activation=tf.nn.relu,
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=seed),
            bias_initializer=tf.constant_initializer(0.0),
            name="Conv_1"
        )
        pool_1 = tf.layers.max_pooling2d( conv_1, pool_size=patch_size, strides=2, padding='SAME')

        conv_2 = tf.layers.conv2d(
            pool_1, filters=depth, kernel_size=1, padding='SAME',
            activation=tf.nn.relu,
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=2*seed),
            bias_initializer=tf.constant_initializer(1.0),
            name="Conv_2"
        )
        pool_2 = tf.layers.max_pooling2d( conv_2, pool_size=patch_size, strides=2, padding='SAME')

        shape = pool_2.get_shape().as_list()
        reshape = tf.reshape(pool_2, [shape[0], shape[1] * shape[2] * shape[3]])

        dense_1 = tf.layers.dense(
            reshape, units=num_hidden, activation=tf.nn.relu,
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=seed),
            bias_initializer=tf.constant_initializer(1.0)
        )

        dropout_1 = tf.layers.dropout( dense_1, rate=(1.0-keep_prob), seed=seed, training=training)

        dense_2 = tf.layers.dense(
            dropout_1, units=num_labels, activation=None,
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=seed),
            bias_initializer=tf.constant_initializer(1.0)
        )

        return dense_2


    with tf.variable_scope("model"):
        # Training computation.
        logits = model(tf_train_dataset, training=True)
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))

        # Optimizer.
        global_step = tf.Variable(0, name="globalStep", trainable=False)  # count the number of steps taken.
        learning_rate = tf.train.exponential_decay(0.05, global_step, 1001, 0.8)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits)

    with tf.variable_scope("model", reuse=True):
        valid_prediction = tf.nn.softmax(model(tf_valid_dataset))

    with tf.variable_scope("model", reuse=True):
        test_prediction = tf.nn.softmax(model(tf_test_dataset))

培训和评估：

num_steps = 1001

with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print('Initialized')

    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels}
        _, l, predictions = session.run(
            [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 50 == 0):
            print('Step {:4d} rate {:.4f} '.format(step, learning_rate.eval()), end='')
            accPred = accuracy(predictions, batch_labels)
            accValid = accuracy(valid_prediction.eval(), valid_labels)
            print('Loss {:2.3f}  Batch acc.: {:02.2f}%  Validation acc.: {:2.2f}%'.format(l,accPred,accValid))

    print('\nTest accuracy: {:.2f}%'.format(accuracy(test_prediction.eval(), test_labels)))

损失冻结在2.303，验证准确度为10％：

Initialized
Step    0 rate 0.0500 Loss 4.903  Batch acc.: 18.75%  Validation acc.: 9.98%
Step   50 rate 0.0494 Loss 2.301  Batch acc.: 15.62%  Validation acc.: 10.00%
Step  100 rate 0.0489 Loss 2.302  Batch acc.: 12.50%  Validation acc.: 10.00%
Step  150 rate 0.0483 Loss 2.303  Batch acc.: 6.25%  Validation acc.: 10.00%
Step  200 rate 0.0478 Loss 2.306  Batch acc.: 0.00%  Validation acc.: 10.00%
Step  250 rate 0.0473 Loss 2.306  Batch acc.: 3.12%  Validation acc.: 10.00%

请提出可能出错的建议？

github上的完整notebook问题版本。问题单元位于底部。

Answer 1

我看到两件奇怪的事情：

你的第二个模型与第一个模型的结构不同（第一个模型有三个密集层，第二个只有两个）
可能连接到1.，您错过了网络末尾的线性图层（您的输出为dense2，其中relu已激活）。

那么，我的想法就是忘记了网络的最后一块：

dense_3 = tf.layers.dense(
            dense_2, 
            units=num_labels, 
            activation=None, #<<<<<<<<<<<<<<<<<<<<<
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=seed),
            bias_initializer=tf.constant_initializer(1.0)
        )
return dense_3

或者，从dense_2删除激活功能。

更新

在笔记本中，我看到您定义了卷积+池层，如下所示：

conv_1 = tf.layers.conv2d(
    data, filters=depth, kernel_size=1, padding='SAME',
    activation=tf.nn.relu,
    kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=seed),
    bias_initializer=tf.constant_initializer(0.0),
    name="Conv_1"
)
pool_1 = tf.layers.max_pooling2d( conv_1, pool_size=patch_size, strides=2, padding='SAME')

如果我将其与您之前定义图层的方式进行比较：

def conv_relu_maxpool(inputs, kernel_shape, bias_shape, bias_init_v=0.0):
    weights = tf.get_variable("weights", shape = kernel_shape,
                              initializer = tf.truncated_normal_initializer(stddev=0.1, seed=seed))
    biases = tf.get_variable("biases", shape = bias_shape,
                             initializer = tf.constant_initializer(bias_init_v))
    conv = tf.nn.conv2d(inputs, weights, [1,1,1,1], padding='SAME')
    hidden = tf.nn.relu(conv + biases)
    maxpool = tf.nn.max_pool(hidden, [1,2,2,1], [1,2,2,1], 'SAME')

    return maxpool
conv_relu_maxpool(data, [patch_size, patch_size, num_channels, depth], [depth], 0.01)

我发现你有两个错误：

您在卷积中使用的是1 x 1内核，而不是patch_size x patch_size中的kernel_size=1 conv2d
您正在5 x 5个窗口而不是2 x 2（pool_size=patch_size调用max_pooling2d）进行maxpool。

请注意，架构之间可能存在更多差异，这些只是我刚才看到的。

从tf.nn. 切换到tf.layers。破坏了模型

1 个答案:

更新

从tf.nn. *切换到tf.layers。*破坏了模型

1 个答案:

更新

从tf.nn. 切换到tf.layers。破坏了模型