Question

我想训练NN预测sqrt中某个数字的tensorflow，以下是我的代码，但损失不能降为0，结果不正确，是什么问题？

#!/usr/bin/env python

import numpy as np
import tensorflow as tf

if __name__ == '__main__':
    dimension = 1
    X = tf.placeholder(tf.float32, [None, dimension])
    W = tf.Variable(tf.random_normal([dimension, 100], stddev=0.01))
    b = tf.Variable(tf.zeros([100]))
    h1 = tf.nn.relu(tf.matmul(X, W) + b)

    W2 = tf.Variable(tf.random_normal([100, 50], stddev=0.01))
    b2 = tf.Variable(tf.zeros([50]))
    h2 = tf.nn.relu(tf.matmul(h1, W2) + b2)

    W3 = tf.Variable(tf.random_normal([50, 1], stddev=0.01))
    b3 = tf.Variable(tf.zeros([1]))
    y = tf.nn.relu(tf.matmul(h2, W3) + b3)

    Y = tf.placeholder(tf.float32, [None, dimension])

    cost = tf.reduce_mean(tf.pow(y - Y, 2))
    optimizer = tf.train.GradientDescentOptimizer(0.01).minimize(cost)
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        for epoch in range(1000):
            sx = np.random.rand(1000, 1)
            sy = np.sqrt(sx)
            sess.run(optimizer, feed_dict={X: sx, Y: sy})
            c = sess.run(cost, feed_dict={X: sx, Y: sy})
            print("Epoch:", '%04d' % (epoch + 1), "cost=", "%.03f" % c)
        sx = np.random.rand(10000, 1)
        sy = np.sqrt(sx)
        tc = sess.run(cost, feed_dict={X: sx, Y: sy})
        print("Testing cost=", tc)
        sx = np.array([[0.01], [0.5]])
        sy = np.sqrt(sx)
        print sy
        print sess.run(y, feed_dict={X: sx, Y: sy})
        print sess.run(cost, feed_dict={X: sx, Y: sy})

这是输出，它无法得到正确的结果：

...
('Epoch:', '0999', 'cost=', '0.502')
('Epoch:', '1000', 'cost=', '0.499')
('Testing cost=', 0.49828479)
[[ 0.1       ]
 [ 0.70710678]]
[[ 0.]
 [ 0.]]
0.255

Answer 1

我猜有几件事要理解：

1.避免在最后一层使用RELU，因为它可能使渐变为零。

2.NN难以推断所有随机值。损失可能会减少，但它无法为您提供有关新数据的正确结果。

3.你必须选择一个数据子集（我在范围[1,50]中取整数），你可以观察它为该子集进行正确的训练和预测，但它并没有很好地推断出其他数据。

import numpy as np
import tensorflow as tf

if __name__ == '__main__':
    dimension = 1
    X = tf.placeholder(tf.float32, [None, dimension])
    W = tf.Variable(tf.random_normal([dimension, 100], stddev=0.01))
    b = tf.Variable(tf.zeros([100]))
    h1 = tf.nn.relu(tf.matmul(X, W) + b)

    W2 = tf.Variable(tf.random_normal([100, 50], stddev=0.01))
    b2 = tf.Variable(tf.zeros([50]))
    h2 = tf.nn.relu(tf.matmul(h1, W2) + b2)

    W3 = tf.Variable(tf.random_normal([50, 1], stddev=0.01))
    b3 = tf.Variable(tf.zeros([1]))
    y = (tf.matmul(h2, W3) + b3)

    Y = tf.placeholder(tf.float32, [None, dimension])

    cost = tf.reduce_mean(tf.squared_difference(y,Y))
    optimizer = tf.train.GradientDescentOptimizer(0.001).minimize(cost)
    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        cap = 50
        for epoch in range(2000):
            sx = np.random.randint(cap,size=(100, 1))
            #sx = np.random.rand(100,1)
            sy = np.sqrt(sx)
            op,c = sess.run([optimizer,cost], feed_dict={X: sx, Y: sy})
            if epoch % 100 == 0:
                print("Epoch:", '%04d' % (epoch + 1), "cost=", "%.03f" % c)

        #sx = np.random.rand(10,1)
        sx = np.random.randint(cap,size=(10,1))
        sy = np.sqrt(sx)
        print "Input"
        print sx
        print "Expected Output"
        print sy
        print "Predicted Output"
        print sess.run(y, feed_dict={X: sx, Y: sy})
        print "Error"
        print sess.run(cost, feed_dict={X: sx, Y: sy})

输出日志：

('Epoch:', '0001', 'cost=', '25.258')
('Epoch:', '0101', 'cost=', '0.428')
('Epoch:', '0201', 'cost=', '0.452')
('Epoch:', '0301', 'cost=', '0.456')
('Epoch:', '0401', 'cost=', '0.320')
('Epoch:', '0501', 'cost=', '0.306')
('Epoch:', '0601', 'cost=', '0.312')
('Epoch:', '0701', 'cost=', '0.321')
('Epoch:', '0801', 'cost=', '0.268')
('Epoch:', '0901', 'cost=', '0.228')
('Epoch:', '1001', 'cost=', '0.264')
('Epoch:', '1101', 'cost=', '0.246')
('Epoch:', '1201', 'cost=', '0.241')
('Epoch:', '1301', 'cost=', '0.251')
('Epoch:', '1401', 'cost=', '0.141')
('Epoch:', '1501', 'cost=', '0.218')
('Epoch:', '1601', 'cost=', '0.213')
('Epoch:', '1701', 'cost=', '0.146')
('Epoch:', '1801', 'cost=', '0.186')
('Epoch:', '1901', 'cost=', '0.176')
Input
[[29]
 [39]
 [10]
 [ 2]
 [ 2]
 [17]
 [ 4]
 [26]
 [ 3]
 [31]]
Expected Output
[[ 5.38516481]
 [ 6.244998  ]
 [ 3.16227766]
 [ 1.41421356]
 [ 1.41421356]
 [ 4.12310563]
 [ 2.        ]
 [ 5.09901951]
 [ 1.73205081]
 [ 5.56776436]]
Predicted Output
[[ 5.11237049]
 [ 6.35184956]
 [ 2.75735927]
 [ 1.76557863]
 [ 1.76557863]
 [ 3.62499475]
 [ 2.01356125]
 [ 4.74052668]
 [ 1.88956988]
 [ 5.36026621]]
Error
0.0941391

如何训练神经网络来预测Tensorflow中一个数字的SQRT？

1 个答案: