与Tensorflow中的卷积神经网络中的AdamOptimizer(0.95)相比,GradientDescentOptimizer提供的精度较低(~0.10)

时间:2018-02-07 17:14:24

标签: tensorflow convolution mnist cost-based-optimizer

我正在构建一个用于对MNIST数据进行分类的卷积神经网络。我使用了2个转换层和2个完全连接的层。

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
def _net_params():    
    weights = {
    'conv1': tf.Variable(tf.random_normal([5, 5, 1, 32])),
    'conv2': tf.Variable(tf.random_normal([5, 5, 32, 64])),
    'fc1': tf.Variable(tf.random_normal([7 * 7 * 64, 1024])),
    'fc2': tf.Variable(tf.random_normal([1024, 10])),
    }
    biases = {
    'conv1': tf.Variable(tf.random_normal([32]),tf.float32),
    'conv2': tf.Variable(tf.random_normal([64]),tf.float32),
    'fc1': tf.Variable(tf.random_normal([1024]),tf.float32),
    'fc2': tf.Variable(tf.random_normal([10]),tf.float32),
    }
    return weights, biases

def _fc_layer(inputs, weights, biases):
    return tf.add(tf.matmul(inputs, weights), biases)

def _conv_layer(inputs, weights, biases, stride=1, padding='SAME'):
    layer = tf.nn.conv2d(input=inputs,filter=weights,
                     strides=[1, stride, stride, 1],padding=padding)
    layer = tf.nn.bias_add(layer, biases)
    return tf.nn.relu(layer)

def pool_layer(inputs):
    pool = tf.nn.max_pool(inputs, ksize=[1, 2, 2, 1], strides=[1, 2, 
                          2, 1], padding="SAME")
    return pool

def conv_net(x):
    weights, biases = _net_params()
    x = tf.reshape(x, shape=[-1, 28, 28, 1])
    # Conv layers
    conv1 = _conv_layer(x, weights['conv1'], biases['conv1'])
    pool1 = pool_layer(conv1)
    conv2 = _conv_layer(pool1, weights['conv2'], biases['conv2'])
    pool2 = pool_layer(conv2)
    flattened = tf.reshape(pool2, [-1, 7 * 7 * 64])
    fc1 = _fc_layer(flattened, weights['fc1'], biases['fc1'])
    fc1 = tf.nn.relu(fc1)
    fc2 = _fc_layer(fc1, weights['fc2'], biases['fc2'])
    return fc2

def _training():
    x = tf.placeholder(tf.float32, [None, 784])
    y_ = tf.placeholder(tf.float32, [None, 10])
    learning_rate_ = tf.placeholder(tf.float32)
    pred = conv_net(x)
    cost = tf.reduce_mean( 
            tf.nn.softmax_cross_entropy_with_logits(logits=pred, 
            labels=y_))
    optimizer = tf.train.AdamOptimizer( 
               learning_rate=learning_rate_).minimize(cost)
    # optimizer = tf.train.GradientDescentOptimizer( 
               learning_rate=learning_rate_).minimize(cost)
    correct = tf.equal(tf.argmax(pred, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    return x, y_, learning_rate_, optimizer, cost, accuracy


def main():
    mnist = input_data.read_data_sets('tmp/data', one_hot=True)
    n_epochs = 3
    batch_size = 200
    learning_rate = 0.005
    x, y_, learning_rate_, optimizer, cost, accuracy = _training()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        current_epoch = 0
        while current_epoch < n_epochs:
            current_epoch += 1
            print('epoch %s' % (current_epoch,))
            current_batch = 1
            while current_batch * batch_size <= len( 
                                             mnist.train.images):
                current_batch += 1
                batch_x, batch_y = mnist.train.next_batch(batch_size)
                sess.run(fetches=optimizer, feed_dict={x: batch_x, 
                       y_: batch_y, learning_rate_: learning_rate, })
                if current_batch % 75 == 0:
                    loss, acc = sess.run([cost, accuracy], feed_dict=
                     {x: batch_x, y_: batch_y, learning_rate_: 0.,})
                    print('  batch %s: batch_loss=%s, 
                         training_accuracy=%s' % (current_batch, 
                         loss, acc,))
        print('Training complete !')
        print('Final accuracy is %s' % sess.run(accuracy, feed_dict=
              {x: mnist.test.images, y_: mnist.test.labels,                                                                     
                  learning_rate_: 0.}))

if __name__ == '__main__':
    main()

(将此代码置于堆栈中时,可能会出现一些缩进错误)

当我使用AdamOptimizer时,我的准确率> 95%。 Accuracy for AdamOptimizer

但是当我使用GradientDescentOptimizer时,我的准确率达到了10%。 accuracy for GradientDescentOptimizer

如果我想使用GradientDescentOptimizer,你知道为什么我会得到这个较低的准确度以及如何解决这个问题。

由于

0 个答案:

没有答案