我已经使用TF构建了一个简单的NN,用于学习二次方程式。令人惊讶的是,tf.layers.dense
的实际表现不如我自己编写NN,但我似乎无法理解为什么。我非常感谢有人可以指出我所缺少的内容-这是代码:
数据生成
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
def f(x):
return x**2-6*x+9
def data_generator_const_sigma(x,sigma,samples):
return np.random.normal(f(x),sigma,samples)
x_vals = np.arange(1,5.2,0.2)
x_arr = np.array([])
y_arr = np.array([])
samples = 50
for x in x_vals:
x_arr = np.append(x_arr, np.full(samples,x))
y_arr = np.append(y_arr, data_generator_rel_sigma(x,0.1,samples))
x_arr, y_arr = shuffle(x_arr, y_arr)
x_test = np.arange(1.1,5.1,0.2)
plt.figure(figsize=(7,7))
plt.grid(True)
plt.xlabel('x')
plt.ylabel('f(x)')
plt.scatter(x_arr,y_arr)
plt.plot(x_vals,map(f,x_vals),c='m')
输出:
低级API:
def nn_layer(prev_layer, weights, bias, activation=None):
layer = tf.add(tf.matmul(prev_layer,weights),bias)
if activation == 'tanh':
layer = tf.nn.tanh(layer)
elif activation == 'relu':
layer = tf.nn.relu(layer)
return layer
hidden_layers_size = [12,12]
epochs=1000
batch_size=50
learning_rate=0.0003
display_step=50
w = dict()
b = dict()
tf.reset_default_graph()
L = len(hidden_layers_size) + 2
_ls = [1]+hidden_layers_size+[1]
for i in range(1,L):
w[i] = tf.get_variable(name='w'+str(i), shape=[_ls[i-1],_ls[i]],
initializer=tf.contrib.layers.xavier_initializer())
b[i] = tf.get_variable(name='b'+str(i), shape=_ls[i],
initializer=tf.zeros_initializer())
x = tf.placeholder(name='x',shape=(None,1),dtype=tf.float32)
y = tf.placeholder(name='y',shape=(None,1),dtype=tf.float32)
layer = x
for i in range(1,L):
if i != L-1:
layer = nn_layer(layer,w[i],b[i],'tanh')
else:
layer = nn_layer(layer,w[i],b[i])
output = layer
cost = tf.reduce_mean(tf.losses.mean_squared_error(labels=y,predictions=output))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
batch_num = int(len(x_arr) / batch_size)
x_batches = np.array_split(x_arr, batch_num)
y_batches = np.array_split(y_arr, batch_num)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(epochs):
avg_cost = 0
for i in range(batch_num):
x_batch = np.expand_dims(x_batches[i],axis=1)
y_batch = np.expand_dims(y_batches[i],axis=1)
_, c = sess.run([optimizer,cost], feed_dict={x:x_batch, y:y_batch})
avg_cost += c/batch_num
if epoch % display_step == 0:
print('Epoch {0} | cost = {1:.4f}'.format(epoch,avg_cost))
y_pred = sess.run(output,feed_dict={x:np.expand_dims(x_test,axis=1)})
plt.figure(figsize=(7,7))
plt.grid(True)
plt.xlabel('x')
plt.ylabel('f(x)')
plt.scatter(x_arr,y_arr,c='b')
plt.scatter(x_test,y_pred,c='r')
plt.plot(x_vals,map(f,x_vals),c='m')
plt.show()
输出:
Epoch 0 | cost = 4.5745
Epoch 50 | cost = 1.7379
Epoch 100 | cost = 1.1629
Epoch 150 | cost = 0.4951
Epoch 200 | cost = 0.2626
Epoch 250 | cost = 0.1826
Epoch 300 | cost = 0.1479
Epoch 350 | cost = 0.1325
Epoch 400 | cost = 0.1233
Epoch 450 | cost = 0.1171
Epoch 500 | cost = 0.1128
Epoch 550 | cost = 0.1100
Epoch 600 | cost = 0.1080
Epoch 650 | cost = 0.1067
Epoch 700 | cost = 0.1058
Epoch 750 | cost = 0.1052
Epoch 800 | cost = 0.1048
Epoch 850 | cost = 0.1044
Epoch 900 | cost = 0.1042
Epoch 950 | cost = 0.1040
使用tf.layers.dense
:
hidden_layers_size = [12,12]
epochs=1000
batch_size=50
learning_rate=0.0003
display_step=50
tf.reset_default_graph()
x = tf.placeholder(name='x',shape=(None,1),dtype=tf.float32)
y = tf.placeholder(name='y',shape=(None,1),dtype=tf.float32)
layer = tf.layers.dense(inputs=x, units=1, activation=tf.nn.tanh,
kernel_initializer=tf.contrib.layers.xavier_initializer())
for l in hidden_layers_size:
layer = tf.layers.dense(inputs=layer, units=l, activation=tf.nn.tanh,
kernel_initializer=tf.contrib.layers.xavier_initializer())
output = tf.layers.dense(inputs=layer, units=1,
kernel_initializer=tf.contrib.layers.xavier_initializer())
cost = tf.reduce_mean(tf.losses.mean_squared_error(labels=y,predictions=output))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
batch_num = int(len(x_arr) / batch_size)
x_batches = np.array_split(x_arr, batch_num)
y_batches = np.array_split(y_arr, batch_num)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(epochs):
avg_cost = 0
for i in range(batch_num):
x_batch = np.expand_dims(x_batches[i],axis=1)
y_batch = np.expand_dims(y_batches[i],axis=1)
_, c = sess.run([optimizer,cost], feed_dict={x:x_batch, y:y_batch})
avg_cost += c/batch_num
if epoch % display_step == 0:
print('Epoch {0} | cost = {1:.4f}'.format(epoch,avg_cost))
y_pred = sess.run(output,feed_dict={x:np.expand_dims(x_test,axis=1)})
plt.figure(figsize=(7,7))
plt.grid(True)
plt.xlabel('x')
plt.ylabel('f(x)')
plt.scatter(x_arr,y_arr,c='b')
plt.scatter(x_test,y_pred,c='r')
plt.plot(x_vals,map(f,x_vals),c='m')
plt.show()
输出:
Epoch 0 | cost = 3.3261
Epoch 50 | cost = 1.9012
Epoch 100 | cost = 1.8725
Epoch 150 | cost = 1.8572
Epoch 200 | cost = 1.7526
Epoch 250 | cost = 1.4249
Epoch 300 | cost = 1.3330
Epoch 350 | cost = 1.3043
Epoch 400 | cost = 1.2935
Epoch 450 | cost = 1.2893
Epoch 500 | cost = 1.2877
Epoch 550 | cost = 1.2868
Epoch 600 | cost = 1.2861
Epoch 650 | cost = 1.2854
Epoch 700 | cost = 1.2847
Epoch 750 | cost = 1.2838
Epoch 800 | cost = 1.2828
Epoch 850 | cost = 1.2814
Epoch 900 | cost = 1.2798
Epoch 950 | cost = 1.2780
答案 0 :(得分:0)
发现了问题-在第二次尝试输入后,我不小心在tanh中添加了一个附加的1单元层。 这个:
layer = tf.layers.dense(inputs=x, units=1, activation=tf.nn.tanh,
kernel_initializer=tf.contrib.layers.xavier_initializer())
应替换为:
layer = x