从tf.nn. *切换到tf.layers。*破坏了模型

时间:2017-11-22 10:35:47

标签: python tensorflow

我正在Udacity深度学习课程的非MNIST Assignment 4上使用convnet探索TensorFlow模块。

我的初始模型使用tf.nn.conv2dtf.nn.max_pool,并提供约90%的验证准确度

初始模型,tf.nn.*

batch_size = 32
patch_size = 5
depth = 16
num_hidden = 64
seed=4242

tf.reset_default_graph()

graph = tf.Graph()

with graph.as_default():

    # Input data.
    tf_train_dataset = tf.placeholder(
        tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)


    def conv_relu_maxpool(inputs, kernel_shape, bias_shape, bias_init_v=0.0):
        weights = tf.get_variable("weights", shape = kernel_shape,
                                 initializer = tf.truncated_normal_initializer(stddev=0.1, seed=seed))
        biases = tf.get_variable("biases", shape = bias_shape,
                                initializer = tf.constant_initializer(bias_init_v))
        conv = tf.nn.conv2d(inputs, weights, [1,1,1,1], padding='SAME')
        hidden = tf.nn.relu(conv + biases)
        maxpool = tf.nn.max_pool(hidden, [1,2,2,1], [1,2,2,1], 'SAME')

        return maxpool


    def dense(inputs, units, activation=None):
        return tf.layers.dense(
            inputs,
            units=units,
            activation=activation,
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.05, seed=seed),
            bias_initializer=tf.constant_initializer(0.1),
            name="Dense"
        )


    # Model.
    def model(data, keep_prob=1):
        with tf.variable_scope("conv1"):
            layer1 = conv_relu_maxpool(data, [patch_size, patch_size, num_channels, depth], [depth], 0.01)

        with tf.variable_scope("conv2"):
            layer2 = conv_relu_maxpool(layer1, [patch_size, patch_size, depth, depth], [depth], 1.0)
        shape = layer2.get_shape().as_list()
        reshape = tf.reshape(layer2, [shape[0], shape[1] * shape[2] * shape[3]])

        with tf.variable_scope("dense1"):
            dense1 = dense( reshape, 64, activation=tf.nn.relu)
        dropout1 = tf.nn.dropout( dense1, keep_prob=keep_prob, seed=seed)
        with tf.variable_scope("dense2"):
            dense2 = dense( dense1, 32, activation=tf.nn.relu)
        dropout2 = tf.nn.dropout( dense2, keep_prob=keep_prob, seed=seed)
        with tf.variable_scope("dense3"):
            dense3 = dense( dropout2, num_labels)

        return dense3


    with tf.variable_scope("model"):
        # Training computation.
        logits = model(tf_train_dataset, keep_prob=0.7)
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))

        # Optimizer.
        global_step = tf.Variable(0)  # count the number of steps taken.
        learning_rate = tf.train.exponential_decay(0.05, global_step, 1001, 0.8)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits)

    with tf.variable_scope("model", reuse=True):
        valid_prediction = tf.nn.softmax(model(tf_valid_dataset))

    with tf.variable_scope("model", reuse=True):
        test_prediction = tf.nn.softmax(model(tf_test_dataset))

我将其重写为tf.layers.conv2dtf.layers.max_pooling2d,然后是tf.layers.densetf.layers.dropout。现在模型无法训练,运行速度太快,并且提供10%(随机选择)验证准确性。无法弄清楚是什么原因。

图表:

batch_size = 32
patch_size = 5
depth = 16
num_hidden = 64
seed=4242

graph = tf.Graph()
tf.reset_default_graph()
with graph.as_default():

    # Input data.
    tf_train_dataset = tf.placeholder(
        tf.float32, shape=(batch_size, image_size, image_size, num_channels))
    tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)


    # Model.
    def model(data, keep_prob=0.5, training=False):
        conv_1 = tf.layers.conv2d(
            data, filters=depth, kernel_size=1, padding='SAME',
            activation=tf.nn.relu,
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=seed),
            bias_initializer=tf.constant_initializer(0.0),
            name="Conv_1"
        )
        pool_1 = tf.layers.max_pooling2d( conv_1, pool_size=patch_size, strides=2, padding='SAME')

        conv_2 = tf.layers.conv2d(
            pool_1, filters=depth, kernel_size=1, padding='SAME',
            activation=tf.nn.relu,
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=2*seed),
            bias_initializer=tf.constant_initializer(1.0),
            name="Conv_2"
        )
        pool_2 = tf.layers.max_pooling2d( conv_2, pool_size=patch_size, strides=2, padding='SAME')

        shape = pool_2.get_shape().as_list()
        reshape = tf.reshape(pool_2, [shape[0], shape[1] * shape[2] * shape[3]])

        dense_1 = tf.layers.dense(
            reshape, units=num_hidden, activation=tf.nn.relu,
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=seed),
            bias_initializer=tf.constant_initializer(1.0)
        )

        dropout_1 = tf.layers.dropout( dense_1, rate=(1.0-keep_prob), seed=seed, training=training)

        dense_2 = tf.layers.dense(
            dropout_1, units=num_labels, activation=None,
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=seed),
            bias_initializer=tf.constant_initializer(1.0)
        )

        return dense_2


    with tf.variable_scope("model"):
        # Training computation.
        logits = model(tf_train_dataset, training=True)
        loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))

        # Optimizer.
        global_step = tf.Variable(0, name="globalStep", trainable=False)  # count the number of steps taken.
        learning_rate = tf.train.exponential_decay(0.05, global_step, 1001, 0.8)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)

        # Predictions for the training, validation, and test data.
        train_prediction = tf.nn.softmax(logits)

    with tf.variable_scope("model", reuse=True):
        valid_prediction = tf.nn.softmax(model(tf_valid_dataset))

    with tf.variable_scope("model", reuse=True):
        test_prediction = tf.nn.softmax(model(tf_test_dataset))

培训和评估:

num_steps = 1001

with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print('Initialized')

    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset: batch_data, tf_train_labels: batch_labels}
        _, l, predictions = session.run(
            [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 50 == 0):
            print('Step {:4d} rate {:.4f} '.format(step, learning_rate.eval()), end='')
            accPred = accuracy(predictions, batch_labels)
            accValid = accuracy(valid_prediction.eval(), valid_labels)
            print('Loss {:2.3f}  Batch acc.: {:02.2f}%  Validation acc.: {:2.2f}%'.format(l,accPred,accValid))

    print('\nTest accuracy: {:.2f}%'.format(accuracy(test_prediction.eval(), test_labels)))

损失冻结在2.303,验证准确度为10%:

Initialized
Step    0 rate 0.0500 Loss 4.903  Batch acc.: 18.75%  Validation acc.: 9.98%
Step   50 rate 0.0494 Loss 2.301  Batch acc.: 15.62%  Validation acc.: 10.00%
Step  100 rate 0.0489 Loss 2.302  Batch acc.: 12.50%  Validation acc.: 10.00%
Step  150 rate 0.0483 Loss 2.303  Batch acc.: 6.25%  Validation acc.: 10.00%
Step  200 rate 0.0478 Loss 2.306  Batch acc.: 0.00%  Validation acc.: 10.00%
Step  250 rate 0.0473 Loss 2.306  Batch acc.: 3.12%  Validation acc.: 10.00%

请提出可能出错的建议?

github上的完整notebook问题版本。问题单元位于底部。

1 个答案:

答案 0 :(得分:2)

我看到两件奇怪的事情:

  1. 你的第二个模型与第一个模型的结构不同(第一个模型有三个密集层,第二个只有两个)
  2. 可能连接到1.,您错过了网络末尾的线性图层(您的输出为dense2,其中relu已激活)。
  3. 那么,我的想法就是忘记了网络的最后一块:

    dense_3 = tf.layers.dense(
                dense_2, 
                units=num_labels, 
                activation=None, #<<<<<<<<<<<<<<<<<<<<<
                kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=seed),
                bias_initializer=tf.constant_initializer(1.0)
            )
    return dense_3
    

    或者,从dense_2删除激活功能。

    更新

    在笔记本中,我看到您定义了卷积+池层,如下所示:

    conv_1 = tf.layers.conv2d(
        data, filters=depth, kernel_size=1, padding='SAME',
        activation=tf.nn.relu,
        kernel_initializer=tf.truncated_normal_initializer(stddev=0.1, seed=seed),
        bias_initializer=tf.constant_initializer(0.0),
        name="Conv_1"
    )
    pool_1 = tf.layers.max_pooling2d( conv_1, pool_size=patch_size, strides=2, padding='SAME')
    

    如果我将其与您之前定义图层的方式进行比较:

    def conv_relu_maxpool(inputs, kernel_shape, bias_shape, bias_init_v=0.0):
        weights = tf.get_variable("weights", shape = kernel_shape,
                                  initializer = tf.truncated_normal_initializer(stddev=0.1, seed=seed))
        biases = tf.get_variable("biases", shape = bias_shape,
                                 initializer = tf.constant_initializer(bias_init_v))
        conv = tf.nn.conv2d(inputs, weights, [1,1,1,1], padding='SAME')
        hidden = tf.nn.relu(conv + biases)
        maxpool = tf.nn.max_pool(hidden, [1,2,2,1], [1,2,2,1], 'SAME')
    
        return maxpool
    conv_relu_maxpool(data, [patch_size, patch_size, num_channels, depth], [depth], 0.01)
    

    我发现你有两个错误:

    1. 您在卷积中使用的是1 x 1内核,而不是patch_size x patch_size中的kernel_size=1 conv2d
    2. 您正在5 x 5个窗口而不是2 x 2pool_size=patch_size调用max_pooling2d)进行maxpool。
    3. 请注意,架构之间可能存在更多差异,这些只是我刚才看到的。