MNIST分类:精度低(10%),损失无变化

时间:2017-11-16 03:42:01

标签: tensorflow deep-learning classification tensorboard mnist

我正在学习TensorFlow并且厌倦了在mnist数据库上应用。 我的问题是(见附图):

  • 什么可能导致这种输出的准确性(改善然后降级!)&损失(几乎不变!)
  • 只是徘徊在10%左右,准确度并不高。

Accuracy / Loss - Tensorboard]

尽管:

  • 5层网络(包括输出层),分别具有200/10/60/30/10神经元
  • 网络不学习吗?尽管学习率为0.1(我相信这是相当高的)

完整代码:https://github.com/vibhorj/tf> mnist-2.py

1)这里是如何定义图层的:

K,L,M,N=200,100,60,30
""" Layer 1 """
with tf.name_scope('L1'):
    w1 = tf.Variable(initial_value = tf.truncated_normal([28*28,K],mean=0,stddev=0.1), name = 'w1')
    b1 = tf.Variable(initial_value = tf.truncated_normal([K],mean=0,stddev=0.1), name = 'b1')
""" Layer 2 """
with tf.name_scope('L2'):
    w2 = tf.Variable(initial_value =tf.truncated_normal([K,L],mean=0,stddev=0.1), name = 'w2')
    b2 = tf.Variable(initial_value = tf.truncated_normal([L],mean=0,stddev=0.1), name = 'b2')
""" Layer 3 """
with tf.name_scope('L3'):
    w3 = tf.Variable(initial_value = tf.truncated_normal([L,M],mean=0,stddev=0.1), name = 'w3')
    b3 = tf.Variable(initial_value = tf.truncated_normal([M],mean=0,stddev=0.1), name = 'b3')
""" Layer 4 """
with tf.name_scope('L4'):
    w4 = tf.Variable(initial_value = tf.truncated_normal([M,N],mean=0,stddev=0.1), name = 'w4')
    b4 = tf.Variable(initial_value = tf.truncated_normal([N],mean=0,stddev=0.1), name = 'b4')
""" Layer output """
with tf.name_scope('L_out'):
    w_out = tf.Variable(initial_value = tf.truncated_normal([N,10],mean=0,stddev=0.1), name = 'w_out')
    b_out = tf.Variable(initial_value = tf.truncated_normal([10],mean=0,stddev=0.1), name = 'b_out')

2)损失功能

Y1 = tf.nn.sigmoid(tf.add(tf.matmul(X,w1),b1), name='Y1')
Y2 = tf.nn.sigmoid(tf.add(tf.matmul(Y1,w2),b2), name='Y2')
Y3 = tf.nn.sigmoid(tf.add(tf.matmul(Y2,w3),b3), name='Y3')
Y4 = tf.nn.sigmoid(tf.add(tf.matmul(Y3,w4),b4), name='Y4')

Y_pred_logits = tf.add(tf.matmul(Y4, w_out),b_out,name='logits')
Y_pred_prob = tf.nn.softmax(Y_pred_logits, name='probs')

error = -tf.matmul(Y
              , tf.reshape(tf.log(Y_pred_prob),[10,-1]), name ='err')
loss = tf.reduce_mean(error, name = 'loss')

3)优化功能

opt = tf.train.GradientDescentOptimizer(0.1)
grads_and_vars = opt.compute_gradients(loss)
ctr = tf.Variable(0.0, name='ctr')
z = opt.apply_gradients(grads_and_vars, global_step=ctr)  

4)Tensorboard代码:

evt_file = tf.summary.FileWriter('/Users/vibhorj/python/-tf/g_mnist')
evt_file.add_graph(tf.get_default_graph())

s1 = tf.summary.scalar(name='accuracy', tensor=accuracy)
s2 = tf.summary.scalar(name='loss', tensor=loss)
m1 = tf.summary.merge([s1,s2])

5)运行会话(测试数据是mnist.test.images& mnist.test.labels

with tf.Session() as sess:
    sess.run(tf.variables_initializer(tf.global_variables()))
    for i in range(300):
       """ calc. accuracy on test data - TENSORBOARD before iteration beings """
       summary = sess.run(m1, feed_dict=test_data)
       evt_file.add_summary(summary, sess.run(ctr))
       evt_file.flush()

       """ fetch train data """        
       a_train, b_train = mnist.train.next_batch(batch_size=100)
       train_data = {X: a_train , Y: b_train}

       """ train """
       sess.run(z, feed_dict = train_data)

感谢您提供任何洞察力的时间。我完全无能为力(甚至尝试用random_normal初始化w& b,学习率[0.1,0.01,0.001])

干杯!

1 个答案:

答案 0 :(得分:1)

请考虑

  1. 将偏差初始化为零
  2. 使用ReLU单位代替sigmoid - 避免饱和
  3. 使用Adam优化器 - 更快的学习
  4. 我觉得你的网络很大。你可以用一个较小的网络。

    K,L,M,N=200,100,60,30
    """ Layer 1 """
    with tf.name_scope('L1'):
        w1 = tf.Variable(initial_value = tf.truncated_normal([28*28,K],mean=0,stddev=0.1), name = 'w1')
        b1 = tf.zeros([K])#tf.Variable(initial_value = tf.truncated_normal([K],mean=0,stddev=0.01), name = 'b1')
    """ Layer 2 """
    with tf.name_scope('L2'):
        w2 = tf.Variable(initial_value =tf.truncated_normal([K,L],mean=0,stddev=0.1), name = 'w2')
        b2 = tf.zeros([L])#tf.Variable(initial_value = tf.truncated_normal([L],mean=0,stddev=0.01), name = 'b2')
    """ Layer 3 """
    with tf.name_scope('L3'):
        w3 = tf.Variable(initial_value = tf.truncated_normal([L,M],mean=0,stddev=0.1), name = 'w3')
        b3 = tf.zeros([M]) #tf.Variable(initial_value = tf.truncated_normal([M],mean=0,stddev=0.01), name = 'b3')
    """ Layer 4 """
    with tf.name_scope('L4'):
        w4 = tf.Variable(initial_value = tf.truncated_normal([M,N],mean=0,stddev=0.1), name = 'w4')
        b4 = tf.zeros([N])#tf.Variable(initial_value = tf.truncated_normal([N],mean=0,stddev=0.1), name = 'b4')
    """ Layer output """
    with tf.name_scope('L_out'):
        w_out = tf.Variable(initial_value = tf.truncated_normal([N,10],mean=0,stddev=0.1), name = 'w_out')
        b_out = tf.zeros([10])#tf.Variable(initial_value = tf.truncated_normal([10],mean=0,stddev=0.1), name = 'b_out')
    
    
    Y1 = tf.nn.relu(tf.add(tf.matmul(X,w1),b1), name='Y1')
    Y2 = tf.nn.relu(tf.add(tf.matmul(Y1,w2),b2), name='Y2')
    Y3 = tf.nn.relu(tf.add(tf.matmul(Y2,w3),b3), name='Y3')
    Y4 = tf.nn.relu(tf.add(tf.matmul(Y3,w4),b4), name='Y4')
    
    Y_pred_logits = tf.add(tf.matmul(Y4, w_out),b_out,name='logits')
    
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=Y_pred_logits, name='xentropy'))
    opt = tf.train.GradientDescentOptimizer(0.01)
    grads_and_vars = opt.compute_gradients(loss)
    ctr = tf.Variable(0.0, name='ctr', trainable=False)
    train_op = opt.minimize(loss, global_step=ctr)
    
    for v in tf.trainable_variables():
      print v.op.name
    
    with tf.Session() as sess:
        sess.run(tf.variables_initializer(tf.global_variables()))
        for i in range(3000):
           """ calc. accuracy on test data - TENSORBOARD before iteration beings """
           #summary = sess.run(m1, feed_dict=test_data)
           #evt_file.add_summary(summary, sess.run(ctr))
           #evt_file.flush()
    
           """ fetch train data """
           a_train, b_train = mnist.train.next_batch(batch_size=100)
           train_data = {X: a_train , Y: b_train}
    
           """ train """
           l = sess.run(loss, feed_dict = train_data)
           print l
           sess.run(train_op, feed_dict = train_data)