Question

我已经使用TF构建了一个简单的NN，用于学习二次方程式。令人惊讶的是，tf.layers.dense的实际表现不如我自己编写NN，但我似乎无法理解为什么。我非常感谢有人可以指出我所缺少的内容-这是代码：

数据生成

import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.utils import shuffle

def f(x):
    return x**2-6*x+9 

def data_generator_const_sigma(x,sigma,samples):
    return np.random.normal(f(x),sigma,samples)

x_vals = np.arange(1,5.2,0.2)
x_arr = np.array([])
y_arr = np.array([])
samples = 50
for x in x_vals:
    x_arr = np.append(x_arr, np.full(samples,x))
    y_arr = np.append(y_arr, data_generator_rel_sigma(x,0.1,samples))
x_arr, y_arr = shuffle(x_arr, y_arr)
x_test = np.arange(1.1,5.1,0.2)

plt.figure(figsize=(7,7))
plt.grid(True)
plt.xlabel('x')
plt.ylabel('f(x)')
plt.scatter(x_arr,y_arr)
plt.plot(x_vals,map(f,x_vals),c='m')

输出：

低级API：

def nn_layer(prev_layer, weights, bias, activation=None):
    layer = tf.add(tf.matmul(prev_layer,weights),bias)
    if activation == 'tanh':
        layer = tf.nn.tanh(layer)
    elif activation == 'relu':
        layer = tf.nn.relu(layer)
    return layer

hidden_layers_size = [12,12]
epochs=1000
batch_size=50
learning_rate=0.0003
display_step=50

w = dict()
b = dict()
tf.reset_default_graph()
L = len(hidden_layers_size) + 2
_ls = [1]+hidden_layers_size+[1]
for i in range(1,L):
    w[i] = tf.get_variable(name='w'+str(i), shape=[_ls[i-1],_ls[i]],
                           initializer=tf.contrib.layers.xavier_initializer())
    b[i] = tf.get_variable(name='b'+str(i), shape=_ls[i],
                           initializer=tf.zeros_initializer())
x = tf.placeholder(name='x',shape=(None,1),dtype=tf.float32)
y = tf.placeholder(name='y',shape=(None,1),dtype=tf.float32)

layer = x
for i in range(1,L):
    if i != L-1:
        layer = nn_layer(layer,w[i],b[i],'tanh')
    else:
        layer = nn_layer(layer,w[i],b[i])
output = layer
cost = tf.reduce_mean(tf.losses.mean_squared_error(labels=y,predictions=output))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
batch_num = int(len(x_arr) / batch_size)
x_batches = np.array_split(x_arr, batch_num)
y_batches = np.array_split(y_arr, batch_num)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(epochs):
        avg_cost = 0
        for i in range(batch_num):
            x_batch = np.expand_dims(x_batches[i],axis=1)
            y_batch = np.expand_dims(y_batches[i],axis=1)
            _, c = sess.run([optimizer,cost], feed_dict={x:x_batch, y:y_batch})
            avg_cost += c/batch_num
        if epoch % display_step == 0:
            print('Epoch {0} | cost = {1:.4f}'.format(epoch,avg_cost))
    y_pred = sess.run(output,feed_dict={x:np.expand_dims(x_test,axis=1)})

plt.figure(figsize=(7,7))
plt.grid(True)
plt.xlabel('x')
plt.ylabel('f(x)')
plt.scatter(x_arr,y_arr,c='b')
plt.scatter(x_test,y_pred,c='r')
plt.plot(x_vals,map(f,x_vals),c='m')
plt.show()

输出：

Epoch 0 | cost = 4.5745
Epoch 50 | cost = 1.7379
Epoch 100 | cost = 1.1629
Epoch 150 | cost = 0.4951
Epoch 200 | cost = 0.2626
Epoch 250 | cost = 0.1826
Epoch 300 | cost = 0.1479
Epoch 350 | cost = 0.1325
Epoch 400 | cost = 0.1233
Epoch 450 | cost = 0.1171
Epoch 500 | cost = 0.1128
Epoch 550 | cost = 0.1100
Epoch 600 | cost = 0.1080
Epoch 650 | cost = 0.1067
Epoch 700 | cost = 0.1058
Epoch 750 | cost = 0.1052
Epoch 800 | cost = 0.1048
Epoch 850 | cost = 0.1044
Epoch 900 | cost = 0.1042
Epoch 950 | cost = 0.1040

使用tf.layers.dense：

hidden_layers_size = [12,12]
epochs=1000
batch_size=50
learning_rate=0.0003
display_step=50

tf.reset_default_graph()
x = tf.placeholder(name='x',shape=(None,1),dtype=tf.float32)
y = tf.placeholder(name='y',shape=(None,1),dtype=tf.float32)

layer = tf.layers.dense(inputs=x, units=1, activation=tf.nn.tanh,
                        kernel_initializer=tf.contrib.layers.xavier_initializer())
for l in hidden_layers_size:
    layer = tf.layers.dense(inputs=layer, units=l, activation=tf.nn.tanh,
                            kernel_initializer=tf.contrib.layers.xavier_initializer())
output = tf.layers.dense(inputs=layer, units=1, 
                         kernel_initializer=tf.contrib.layers.xavier_initializer())

cost = tf.reduce_mean(tf.losses.mean_squared_error(labels=y,predictions=output))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
batch_num = int(len(x_arr) / batch_size)
x_batches = np.array_split(x_arr, batch_num)
y_batches = np.array_split(y_arr, batch_num)
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for epoch in range(epochs):
        avg_cost = 0
        for i in range(batch_num):
            x_batch = np.expand_dims(x_batches[i],axis=1)
            y_batch = np.expand_dims(y_batches[i],axis=1)
            _, c = sess.run([optimizer,cost], feed_dict={x:x_batch, y:y_batch})
            avg_cost += c/batch_num
        if epoch % display_step == 0:
            print('Epoch {0} | cost = {1:.4f}'.format(epoch,avg_cost))
    y_pred = sess.run(output,feed_dict={x:np.expand_dims(x_test,axis=1)})

plt.figure(figsize=(7,7))
plt.grid(True)
plt.xlabel('x')
plt.ylabel('f(x)')
plt.scatter(x_arr,y_arr,c='b')
plt.scatter(x_test,y_pred,c='r')
plt.plot(x_vals,map(f,x_vals),c='m')
plt.show()

输出：

Epoch 0 | cost = 3.3261
Epoch 50 | cost = 1.9012
Epoch 100 | cost = 1.8725
Epoch 150 | cost = 1.8572
Epoch 200 | cost = 1.7526
Epoch 250 | cost = 1.4249
Epoch 300 | cost = 1.3330
Epoch 350 | cost = 1.3043
Epoch 400 | cost = 1.2935
Epoch 450 | cost = 1.2893
Epoch 500 | cost = 1.2877
Epoch 550 | cost = 1.2868
Epoch 600 | cost = 1.2861
Epoch 650 | cost = 1.2854
Epoch 700 | cost = 1.2847
Epoch 750 | cost = 1.2838
Epoch 800 | cost = 1.2828
Epoch 850 | cost = 1.2814
Epoch 900 | cost = 1.2798
Epoch 950 | cost = 1.2780

Answer 1

发现了问题-在第二次尝试输入后，我不小心在tanh中添加了一个附加的1单元层。这个：

layer = tf.layers.dense(inputs=x, units=1, activation=tf.nn.tanh,
                        kernel_initializer=tf.contrib.layers.xavier_initializer())

应替换为：

layer = x

tf.layers.dense表现不及低阶api？

1 个答案: