Question

我试图看到神经网络能够逼近简单函数的精确程度，例如几个变量中的标量值多项式。所以我有这些想法：

修复多个变量的多项式，例如f（x_1，..，x_n）。
使用numpy.random生成长度为n的50000个向量，该向量将用作训练数据。
在这些点评估f（x），该值将用作标签。
以相同的方式制作测试数据和标签
编写一个神经网络，看看它在测试集上的精确度是否接近f（x）。

这是我在tensorflow中实现的示例神经网络

import tensorflow as tf
import numpy as np

input_vector_length = int(10) 
output_vector_length = int(1)
train_data_size = int(50000)
test_data_size = int(10000)
train_input_domain = [-10, 10]  #Each component in an input vector is between -10 and 10
test_input_domain = [-10, 10]
iterations = 20000
batch_size = 200
regularizer = 0.01
sess = tf.Session()

x = tf.placeholder(tf.float32, shape=[None, input_vector_length], name="x")
y = tf.placeholder(tf.float32, shape =[None, output_vector_length], name="y")

function = tf.reduce_sum(x, 1) + 0.25*tf.pow(tf.reduce_sum(x,1), 2) + 0.025*tf.pow(tf.reduce_sum(x,1), 3)

#make train data input
train_input = (train_input_domain[1]-train_input_domain[0])*np.random.rand(train_data_size, input_vector_length) + train_input_domain[0]

#make train data label
train_label = sess.run(function, feed_dict = {x : train_input})
train_label = train_label.reshape(train_data_size, output_vector_length)

#make test data input
test_input = (test_input_domain[1]-test_input_domain[0])*np.random.rand(test_data_size, input_vector_length) + test_input_domain[0]

#make test data label
test_label = sess.run(function, feed_dict = {x : test_input})
test_label = test_label.reshape(test_data_size, output_vector_length)

def weight_variables(shape, name):
    initial = 10*tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)
def bias_variables(shape, name):
    initial = 10*tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)
def take_this_batch(data, batch_index=[]):
    A = []
    for i in range(len(batch_index)):
        A.append(data[i])
    return A

W_0 = weight_variables(shape=[input_vector_length, 10], name="W_0")
B_0 = bias_variables(shape=[10], name="W_0")
y_1 = tf.sigmoid(tf.matmul(x, W_0) + B_0)


W_1 = weight_variables(shape=[10, 20], name="W_1")
B_1 = bias_variables(shape=[20], name="B_1")
y_2 = tf.sigmoid(tf.matmul(y_1, W_1) + B_1)

W_2 = weight_variables(shape=[20,40], name="W_2")
B_2 = bias_variables(shape=[40], name="B_2")
y_3 = tf.sigmoid(tf.matmul(y_2, W_2) + B_2)

keep_prob = tf.placeholder(tf.float32, name="keep_prob")
y_drop = tf.nn.dropout(y_3, keep_prob)

W_output = weight_variables(shape=[40, output_vector_length], name="W_output")
B_output = bias_variables(shape=[output_vector_length], name="B_output")
y_output = tf.matmul(y_drop, W_output) + B_output


weight_sum = tf.reduce_sum(tf.square(W_0)) + tf.reduce_sum(tf.square(W_1)) + tf.reduce_sum(tf.square(W_2)) + tf.reduce_sum(tf.square(W_3))
cost = tf.reduce_mean(tf.square(y - y_output)) + regularizer*(weight_sum)
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cost)
error = cost

sess.run(tf.initialize_all_variables())
with sess.as_default():
    for step in range(iterations):
        batch_index = np.random.randint(low=0, high=train_data_size, size=batch_size)
        batch_input = take_this_batch(train_input, batch_index)
        batch_label = take_this_batch(train_label, batch_index)
        train_step.run(feed_dict = {x : batch_input, y:batch_label, keep_prob:0.5})
        if step % 1000 == 0:
            current_error = error.eval(feed_dict = {x:batch_input, y:batch_label, keep_prob:1.0})
            print("step %d, Current error is %f" % (step,current_error))


    print(error.eval(feed_dict={x:test_input, y:test_label, keep_prob:1.0}))

简单地说，这个神经网络的表现令人恐惧！我的神经网络有三个大小分别为10,20和40的隐藏层。输入层的大小为10，输出层的大小为1.我使用了一个简单的L ^ 2代价函数，并将其与权重平方正则化和正规则0.01。

在训练阶段，我注意到这个错误似乎卡住了，拒绝拒绝。我想知道什么可能出错？非常感谢您阅读这个长期的问题。任何建议表示赞赏。

Answer 1

由于您使用sigmoid作为隐藏层中的激活函数，因此这些神经元的值减小到（0,1）的范围。因此，最好规范化该网络的输入数据。

导致神经网络学习停滞的问题是什么？

1 个答案: