Question

我正在尝试训练神经网络来预测两个数字的总和。但我不明白我的模型有什么问题。模型由2个输入，2个隐藏层和1个输出层组成。每1000次迭代我打印测试执行，但结果越来越小。

import numpy as np
import tensorflow as tf


input_size = 2
hidden_size = 3
out_size = 1


def generate_test_data():
    inp = 0.5*np.random.rand(10, 2)
    oup = np.zeros((10, 1))
    for idx, val in enumerate(inp):
        oup[idx] = np.array([val[0] + val[1]])
    return inp, oup


def create_network():
    x = tf.placeholder(tf.float32, [None, input_size])

    w01 = tf.Variable(tf.truncated_normal([input_size, hidden_size], stddev=0.1))
    y1 = tf.sigmoid(tf.matmul(tf.sigmoid(x), w01))

    w12 = tf.Variable(tf.truncated_normal([hidden_size, out_size], stddev=0.1))
    y2 = tf.sigmoid(tf.matmul(y1, w12))

    y_ = tf.placeholder(tf.float32, [None, out_size])
    return x, y_, y2


def train(x, y_, y2):
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y2)
    )
    train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

    sess = tf.InteractiveSession()
    tf.global_variables_initializer().run()

    # Train
    for i in range(100000):
        batch_xs, batch_ys = generate_test_data()
        sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

        # Test
        if i % 1000 == 0:
            out_batch = sess.run(y2, {x: batch_xs})
            inx = 0
            print(batch_xs[inx][0], " + ", batch_xs[inx][1], " = ", out_batch[inx][0])



(x, y_, y2) = create_network()
train(x, y_, y2)

每1000次迭代输出：

0.37301352864927173  + 0.28949461772342683  = 0.49111518
0.050899466843458474 + 0.006174158992116541 = 0.0025260744
0.3974852369427063   + 0.22402098418952499  = 0.00090828544
0.15735921047969498  + 0.39645077887600294  = 0.0005903727
0.23560825884336228  + 0.29010766384718145  = 0.0004317883
0.4250063393420791   + 0.24181166029062096  = 0.00031525563
                                            = smaller and smaller

Answer 1

交叉熵丢失用于分类问题，而您的任务显然是回归。计算出的cross_entropy值没有意义，因此就是结果。

将您的损失更改为：

cross_entropy = tf.reduce_mean(
    tf.nn.l2_loss(y_ - y2)
)

......你会看到更明智的结果。

Answer 2

Maxim，非常感谢。现在它的工作。

import numpy as np
import tensorflow as tf


input_size = 2
hidden_size = 3
out_size = 1


def generate_test_data():
    inp = 0.5*np.random.rand(10, 2)
    oup = np.zeros((10, 1))
    for idx, val in enumerate(inp):
        oup[idx] = np.array([val[0] + val[1]])
    return inp, oup


def create_network():
    x = tf.placeholder(tf.float32, [None, input_size])

    w01 = tf.Variable(tf.truncated_normal([input_size, hidden_size], stddev=0.1))
    y1 = tf.matmul(x, w01)

    w12 = tf.Variable(tf.truncated_normal([hidden_size, out_size], stddev=0.1))
    y2 = tf.matmul(y1, w12)

    y_ = tf.placeholder(tf.float32, [None, out_size])
    return x, y_, y2


def train(x, y_, y2):
    cross_entropy = tf.reduce_mean(
        tf.nn.l2_loss(y_ - y2)
    )
    train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

    sess = tf.InteractiveSession()
    tf.global_variables_initializer().run()

    # Train
    for i in range(100000):
        batch_xs, batch_ys = generate_test_data()
        sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

        # Test
        if i % 2000 == 0:
            out_batch = sess.run(y2, {x: batch_xs})
            inx = 0
            print(batch_xs[inx][0], " + ", batch_xs[inx][1], " = ", out_batch[inx][0], "|", batch_xs[inx][0] + batch_xs[inx][1])


(x, y_, y2) = create_network()
train(x, y_, y2)

Answer 3

如果您考虑将每个数字预测为您在“0123456789”中预测值的分类问题，则可以使用交叉熵作为损失。有关参考，请参阅Keras - Addition RNN Example。

但正如马克西姆所说，它不应该用于回归问题。

在Tensorflow中学习两个数字的总和

3 个答案: