Question

这是用Keras编写的代码，用于回归正弦函数。它很完美。

import numpy as np
from keras.layers import Dense, Activation
from keras.models import Sequential
import matplotlib.pyplot as plt
import math
import time

x = np.arange(0, math.pi*2*2, 0.1)
y = np.sin(x)
model = Sequential([Dense(10, input_shape=(1,)), Activation('tanh'), Dense(3),Activation('tanh'),Dense(1)])

model.compile(loss='mean_squared_error', optimizer='SGD', metrics=['mean_squared_error'])

t1 = time.clock()
for i in range(40):
    model.fit(x, y, epochs=1000, batch_size=len(x), verbose=0)
    predictions = model.predict(x)
    print i," ", np.mean(np.square(predictions - y))," t: ", time.clock()-t1

    plt.hold(False)
    plt.plot(x, y, 'b', x, predictions, 'r--')
    plt.hold(True)
    plt.ylabel('Y / Predicted Value')
    plt.xlabel('X Value')
    plt.title([str(i)," Loss: ",np.mean(np.square(predictions - y))," t: ", str(time.clock()-t1)])
    plt.pause(0.001)
plt.savefig("fig2.png")
plt.show()

我尝试使用较低的API编写相同的代码，以了解神经网络的工作原理。以下是我用Tensorflow回归正弦函数的代码：

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import math

# Model input and output
x = tf.placeholder(tf.float32, [None, 1])
y = tf.placeholder(tf.float32, [None, 1])

# training data
x_plot = np.arange(0, math.pi*2*2, 0.1)
x_train = x_plot.reshape(-1, 1)
y_train_tf = tf.sin(x)

# Model parameters
W1 = tf.Variable(tf.ones([1,10])*.3, dtype=tf.float32)
b1 = tf.Variable(tf.ones([10])*(-.3), dtype=tf.float32)
W2 = tf.Variable(tf.ones([10,3])*.3, dtype=tf.float32)
b2 = tf.Variable(tf.ones([3])*(-.3), dtype=tf.float32)
W3 = tf.Variable(tf.ones([3,1])*.3, dtype=tf.float32)
b3 = tf.Variable(tf.ones([1])*(-.3), dtype=tf.float32)

layer1 = tf.tanh(tf.multiply(x,W1) + b1)
layer2 = tf.tanh(tf.matmul(layer1, W2) + b2)
linear_model = tf.reduce_sum(tf.matmul(layer2, W3), 1, keep_dims=True) + b3

# loss
loss = tf.reduce_sum(tf.square(linear_model - y)) # sum of the squares
# optimizer
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)

# training loop
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init) # reset values to wrong

fig, ax = plt.subplots()

for i in range(40000):
    y_train = sess.run(y_train_tf, {x: x_train}) # das kann weg, dafuer ist dann in der naechsten zeile nur xtrain input, kein ytrain
    f_predict, _ = sess.run([linear_model, train], feed_dict={x: x_train, y: y_train})
    curr_layer1, curr_layer2, curr_W1, curr_b1, curr_W2, curr_b2, curr_W3, curr_b3, curr_loss = sess.run([layer1, layer2, W1, b1, W2, b2, W3, b3, loss],
                                                                               {x: x_train, y: y_train})
    if i % 1000 == 999:
        print "step ", i
        print("W1: %s b1: %s" % (curr_W1, curr_b1))
        print("W2: %s b2: %s" % (curr_W2, curr_b2))
        print("W3: %s b3: %s" % (curr_W3, curr_b3))
        print("layer1: %s layer2: %s" % (curr_layer1, curr_layer2))
        print("linear_model: %s loss: %s" % (f_predict, curr_loss))
        print " "
        y_plot = y_train.reshape(1, -1)[0]
        pred_plot = f_predict.reshape(1, -1)[0]
        plt.hold(False)
        ax.plot(x_plot, y_train[:])
        plt.hold(True)
        ax.plot(x_plot, f_predict, 'o-')
        ax.set(xlabel='X Value', ylabel='Y / Predicted Value',
               title=[str(i)," Loss: ",curr_loss])
        plt.pause(0.001)

fig.savefig("fig1.png")
plt.show()

但它不起作用。我无法理解差异在哪里。 Keras代码中的学习率默认为0.01。优化器是相同的。网络是一样的。我不知道我的错误在哪里。

Answer 1

这是答案！我忘了找到正确的重量开始！ tf.random_normal（[1,10]，stddev = 0.03）

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import math

# Model input and output
x = tf.placeholder(tf.float32, [None, 1])

# training data
x_plot = np.arange(0, math.pi*2*2, 0.1)
x_train = x_plot.reshape(-1, 1)
y_train_tf = tf.sin(x)

# Model parameters
W1 = tf.Variable(tf.random_normal([1,10], stddev=0.03), dtype=tf.float32, name='W1')
b1 = tf.Variable(tf.random_normal([10], stddev=0.03), dtype=tf.float32, name='b1')
W2 = tf.Variable(tf.random_normal([10,3], stddev=0.03), dtype=tf.float32, name='W2')
b2 = tf.Variable(tf.random_normal([3], stddev=0.03), dtype=tf.float32, name='b2')
W3 = tf.Variable(tf.random_normal([3,1], stddev=0.03), dtype=tf.float32, name='W3')
b3 = tf.Variable(tf.random_normal([1], stddev=0.03), dtype=tf.float32, name='b3')

layer1 = tf.tanh(tf.multiply(x,W1) + b1)
layer2 = tf.tanh(tf.matmul(layer1, W2) + b2)
linear_model = tf.reduce_sum(tf.matmul(layer2, W3) + b3, 1, keep_dims=True)

# loss
#loss = tf.reduce_sum(tf.square(linear_model - y_train_tf)) # sum of the squares
loss = tf.losses.mean_squared_error(y_train_tf,linear_model)

tf.summary.scalar('loss', loss)
# optimizer
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)

# training loop
init = tf.global_variables_initializer()
sess = tf.Session()
# Merge all the summaries
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('train_tensorboard',sess.graph)

sess.run(init) # reset values to wrong

fig, ax = plt.subplots()

for i in range(40000):
    summary, f_predict, _ = sess.run([merged, linear_model, train], feed_dict={x: x_train})
    y_train, curr_layer1, curr_layer2, curr_W1, curr_b1, curr_W2, curr_b2, curr_W3, curr_b3, curr_loss = sess.run([y_train_tf,layer1, layer2, W1, b1, W2, b2, W3, b3, loss],
                                                                               {x: x_train})
    train_writer.add_summary(summary, i)
    if i % 1000 == 999:
        print "step ", i
        print("W1: %s b1: %s" % (curr_W1, curr_b1))
        print("W2: %s b2: %s" % (curr_W2, curr_b2))
        print("W3: %s b3: %s" % (curr_W3, curr_b3))
        print("layer1: %s layer2: %s" % (curr_layer1, curr_layer2))
        print("linear_model: %s loss: %s" % (f_predict, curr_loss))
        print " "
        y_plot = y_train.reshape(1, -1)[0]
        pred_plot = f_predict.reshape(1, -1)[0]
        plt.hold(False)
        ax.plot(x_plot, y_train[:])
        plt.hold(True)
        ax.plot(x_plot, f_predict, 'g--')
        ax.set(xlabel='X Value', ylabel='Y / Predicted Value', title=[str(i)," Loss: ", curr_loss])
        plt.pause(0.001)

fig.savefig("fig1.png")
plt.show()

使用张量流来预测正弦值与使用keras

1 个答案: