这是用Keras编写的代码,用于回归正弦函数。它很完美。
import numpy as np
from keras.layers import Dense, Activation
from keras.models import Sequential
import matplotlib.pyplot as plt
import math
import time
x = np.arange(0, math.pi*2*2, 0.1)
y = np.sin(x)
model = Sequential([Dense(10, input_shape=(1,)), Activation('tanh'), Dense(3),Activation('tanh'),Dense(1)])
model.compile(loss='mean_squared_error', optimizer='SGD', metrics=['mean_squared_error'])
t1 = time.clock()
for i in range(40):
model.fit(x, y, epochs=1000, batch_size=len(x), verbose=0)
predictions = model.predict(x)
print i," ", np.mean(np.square(predictions - y))," t: ", time.clock()-t1
plt.hold(False)
plt.plot(x, y, 'b', x, predictions, 'r--')
plt.hold(True)
plt.ylabel('Y / Predicted Value')
plt.xlabel('X Value')
plt.title([str(i)," Loss: ",np.mean(np.square(predictions - y))," t: ", str(time.clock()-t1)])
plt.pause(0.001)
plt.savefig("fig2.png")
plt.show()
我尝试使用较低的API编写相同的代码,以了解神经网络的工作原理。 以下是我用Tensorflow回归正弦函数的代码:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import math
# Model input and output
x = tf.placeholder(tf.float32, [None, 1])
y = tf.placeholder(tf.float32, [None, 1])
# training data
x_plot = np.arange(0, math.pi*2*2, 0.1)
x_train = x_plot.reshape(-1, 1)
y_train_tf = tf.sin(x)
# Model parameters
W1 = tf.Variable(tf.ones([1,10])*.3, dtype=tf.float32)
b1 = tf.Variable(tf.ones([10])*(-.3), dtype=tf.float32)
W2 = tf.Variable(tf.ones([10,3])*.3, dtype=tf.float32)
b2 = tf.Variable(tf.ones([3])*(-.3), dtype=tf.float32)
W3 = tf.Variable(tf.ones([3,1])*.3, dtype=tf.float32)
b3 = tf.Variable(tf.ones([1])*(-.3), dtype=tf.float32)
layer1 = tf.tanh(tf.multiply(x,W1) + b1)
layer2 = tf.tanh(tf.matmul(layer1, W2) + b2)
linear_model = tf.reduce_sum(tf.matmul(layer2, W3), 1, keep_dims=True) + b3
# loss
loss = tf.reduce_sum(tf.square(linear_model - y)) # sum of the squares
# optimizer
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
# training loop
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init) # reset values to wrong
fig, ax = plt.subplots()
for i in range(40000):
y_train = sess.run(y_train_tf, {x: x_train}) # das kann weg, dafuer ist dann in der naechsten zeile nur xtrain input, kein ytrain
f_predict, _ = sess.run([linear_model, train], feed_dict={x: x_train, y: y_train})
curr_layer1, curr_layer2, curr_W1, curr_b1, curr_W2, curr_b2, curr_W3, curr_b3, curr_loss = sess.run([layer1, layer2, W1, b1, W2, b2, W3, b3, loss],
{x: x_train, y: y_train})
if i % 1000 == 999:
print "step ", i
print("W1: %s b1: %s" % (curr_W1, curr_b1))
print("W2: %s b2: %s" % (curr_W2, curr_b2))
print("W3: %s b3: %s" % (curr_W3, curr_b3))
print("layer1: %s layer2: %s" % (curr_layer1, curr_layer2))
print("linear_model: %s loss: %s" % (f_predict, curr_loss))
print " "
y_plot = y_train.reshape(1, -1)[0]
pred_plot = f_predict.reshape(1, -1)[0]
plt.hold(False)
ax.plot(x_plot, y_train[:])
plt.hold(True)
ax.plot(x_plot, f_predict, 'o-')
ax.set(xlabel='X Value', ylabel='Y / Predicted Value',
title=[str(i)," Loss: ",curr_loss])
plt.pause(0.001)
fig.savefig("fig1.png")
plt.show()
但它不起作用。 我无法理解差异在哪里。 Keras代码中的学习率默认为0.01。 优化器是相同的。网络是一样的。我不知道我的错误在哪里。
答案 0 :(得分:2)
这是答案!我忘了找到正确的重量开始! tf.random_normal([1,10],stddev = 0.03)
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import math
# Model input and output
x = tf.placeholder(tf.float32, [None, 1])
# training data
x_plot = np.arange(0, math.pi*2*2, 0.1)
x_train = x_plot.reshape(-1, 1)
y_train_tf = tf.sin(x)
# Model parameters
W1 = tf.Variable(tf.random_normal([1,10], stddev=0.03), dtype=tf.float32, name='W1')
b1 = tf.Variable(tf.random_normal([10], stddev=0.03), dtype=tf.float32, name='b1')
W2 = tf.Variable(tf.random_normal([10,3], stddev=0.03), dtype=tf.float32, name='W2')
b2 = tf.Variable(tf.random_normal([3], stddev=0.03), dtype=tf.float32, name='b2')
W3 = tf.Variable(tf.random_normal([3,1], stddev=0.03), dtype=tf.float32, name='W3')
b3 = tf.Variable(tf.random_normal([1], stddev=0.03), dtype=tf.float32, name='b3')
layer1 = tf.tanh(tf.multiply(x,W1) + b1)
layer2 = tf.tanh(tf.matmul(layer1, W2) + b2)
linear_model = tf.reduce_sum(tf.matmul(layer2, W3) + b3, 1, keep_dims=True)
# loss
#loss = tf.reduce_sum(tf.square(linear_model - y_train_tf)) # sum of the squares
loss = tf.losses.mean_squared_error(y_train_tf,linear_model)
tf.summary.scalar('loss', loss)
# optimizer
optimizer = tf.train.GradientDescentOptimizer(0.01)
train = optimizer.minimize(loss)
# training loop
init = tf.global_variables_initializer()
sess = tf.Session()
# Merge all the summaries
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('train_tensorboard',sess.graph)
sess.run(init) # reset values to wrong
fig, ax = plt.subplots()
for i in range(40000):
summary, f_predict, _ = sess.run([merged, linear_model, train], feed_dict={x: x_train})
y_train, curr_layer1, curr_layer2, curr_W1, curr_b1, curr_W2, curr_b2, curr_W3, curr_b3, curr_loss = sess.run([y_train_tf,layer1, layer2, W1, b1, W2, b2, W3, b3, loss],
{x: x_train})
train_writer.add_summary(summary, i)
if i % 1000 == 999:
print "step ", i
print("W1: %s b1: %s" % (curr_W1, curr_b1))
print("W2: %s b2: %s" % (curr_W2, curr_b2))
print("W3: %s b3: %s" % (curr_W3, curr_b3))
print("layer1: %s layer2: %s" % (curr_layer1, curr_layer2))
print("linear_model: %s loss: %s" % (f_predict, curr_loss))
print " "
y_plot = y_train.reshape(1, -1)[0]
pred_plot = f_predict.reshape(1, -1)[0]
plt.hold(False)
ax.plot(x_plot, y_train[:])
plt.hold(True)
ax.plot(x_plot, f_predict, 'g--')
ax.set(xlabel='X Value', ylabel='Y / Predicted Value', title=[str(i)," Loss: ", curr_loss])
plt.pause(0.001)
fig.savefig("fig1.png")
plt.show()