这是我第一次使用Tensorflow。这是用于回归的基本MLP的实现。代码从标准MNIST分类器修改:
我只更改了输入,输出,超参数,成本函数
cost = tf.reduce_mean(tf.square(pred-y))
并在out_layer之后添加了
out = tf.sigmoid(out_layer)
我培训4440输入数据,包含5个功能并测试2956个数据。在第3纪元之后,训练集的所有值都相同。问题是,对于测试集,我得到了相同的预测值。
Training started...
Epoch 1
Loss= 0.001181 , y_pred= 0.485037 , y_actual= 0.450664
Loss= 0.014749 , y_pred= 0.206193 , y_actual= 0.32764
Loss= 0.000000 , y_pred= 0.323003 , y_actual= 0.323016
Loss= 0.028031 , y_pred= 0.276502 , y_actual= 0.109078
Loss= 0.024109 , y_pred= 0.283097 , y_actual= 0.127827
Loss= 0.000688 , y_pred= 0.222412 , y_actual= 0.196174
Loss= 0.022695 , y_pred= 0.285257 , y_actual= 0.13461
Loss= 0.043803 , y_pred= 0.228042 , y_actual= 0.437334
Loss= 0.002999 , y_pred= 0.251055 , y_actual= 0.30582
Epoch 2
Loss= 0.041213 , y_pred= 0.247654 , y_actual= 0.450664
Loss= 0.005612 , y_pred= 0.252729 , y_actual= 0.32764
Loss= 0.001075 , y_pred= 0.29023 , y_actual= 0.323016
Loss= 0.018882 , y_pred= 0.246489 , y_actual= 0.109078
Loss= 0.018060 , y_pred= 0.262215 , y_actual= 0.127827
Loss= 0.001204 , y_pred= 0.23087 , y_actual= 0.196174
Loss= 0.018622 , y_pred= 0.271072 , y_actual= 0.13461
Loss= 0.038593 , y_pred= 0.240883 , y_actual= 0.437334
Loss= 0.002938 , y_pred= 0.251615 , y_actual= 0.30582
Epoch 3
Loss= 0.041822 , y_pred= 0.24616 , y_actual= 0.450664
Loss= 0.005700 , y_pred= 0.252141 , y_actual= 0.32764
Loss= 0.001073 , y_pred= 0.29026 , y_actual= 0.323016
Loss= 0.018882 , y_pred= 0.24649 , y_actual= 0.109078
Loss= 0.018059 , y_pred= 0.26221 , y_actual= 0.127827
Loss= 0.001203 , y_pred= 0.230861 , y_actual= 0.196174
Loss= 0.018622 , y_pred= 0.271074 , y_actual= 0.13461
Loss= 0.038595 , y_pred= 0.240879 , y_actual= 0.437334
Loss= 0.002938 , y_pred= 0.251613 , y_actual= 0.30582
Epoch 4
Loss= 0.041822 , y_pred= 0.24616 , y_actual= 0.450664
Loss= 0.005700 , y_pred= 0.252141 , y_actual= 0.32764
Loss= 0.001073 , y_pred= 0.29026 , y_actual= 0.323016
Loss= 0.018882 , y_pred= 0.24649 , y_actual= 0.109078
Loss= 0.018059 , y_pred= 0.26221 , y_actual= 0.127827
Loss= 0.001203 , y_pred= 0.23086 , y_actual= 0.196174
Loss= 0.018623 , y_pred= 0.271074 , y_actual= 0.13461
Loss= 0.038595 , y_pred= 0.240879 , y_actual= 0.437334
Loss= 0.002938 , y_pred= 0.251613 , y_actual= 0.30582
Training Finished!
Testing started...
Loss= 0.010336 , y_pred= 0.246348 , y_actual= 0.348012
Loss= 0.123387 , y_pred= 0.246348 , y_actual= 0.597613
Loss= 0.005033 , y_pred= 0.246348 , y_actual= 0.175401
Loss= 0.022147 , y_pred= 0.246348 , y_actual= 0.0975305
Loss= 0.004484 , y_pred= 0.246348 , y_actual= 0.313307
Loss= 0.010506 , y_pred= 0.246348 , y_actual= 0.348845
Loss= 0.000052 , y_pred= 0.246348 , y_actual= 0.239131
我已经尝试了描述相同问题的各个帖子提供的所有可能的解决方案。就像数据被混洗和归一化一样,y和pred的维度是相同的。
1)TensorFlow always converging to same output for all items after training
2)MLP in tensorflow for regression... not converging
3)tensorflow deep neural network for regression always predict same results in one batch
这是代码。非常感谢。
# In[67]:
import tensorflow as tf
import numpy as np
# In[68]:
# Parameters
learning_rate = 0.01
epoch = 1
dropout = 0.75
# Network Parameters
n_hidden_1 = 256 # 1st layer number of features
n_hidden_2 = 256 # 2nd layer number of features
n_hidden_3 = 256
n_hidden_4 = 256
n_input = 5
n_val = 1
train_set = 4440
# tf Graph input
x = tf.placeholder("float", [None, n_input], name = "x")
y = tf.placeholder("float", [None, n_val], name = "y")
# keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
# In[69]:
# Create model
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Hidden layer with RELU activation
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Hidden layer with RELU activation
layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
layer_3 = tf.nn.relu(layer_3)
# Hidden layer with RELU activation
layer_4 = tf.add(tf.matmul(layer_3, weights['h4']), biases['b4'])
layer_4 = tf.nn.relu(layer_4)
# Output layer with linear activation
out_layer = tf.matmul(layer_4, weights['out']) + biases['out']
out = tf.sigmoid(out_layer)
return out
# In[70]:
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], mean=0.0, stddev=0.01 ,dtype=tf.float32, name = "h1")),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], mean=0.0, stddev=0.01 ,dtype=tf.float32, name = "h2")),
'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3], mean=0.0, stddev=0.01 ,dtype=tf.float32,name = "h3")),
'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4], mean=0.0, stddev=0.01 ,dtype=tf.float32,name = "h4")),
'out': tf.Variable(tf.random_normal([n_hidden_4, n_val], mean=0.0, stddev=0.01 ,dtype=tf.float32,name = "out"))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1], mean=0.0, stddev=0.01 ,dtype=tf.float32,name = "b1")),
'b2': tf.Variable(tf.random_normal([n_hidden_2], mean=0.0, stddev=0.01 ,dtype=tf.float32,name = "b2")),
'b3': tf.Variable(tf.random_normal([n_hidden_3], mean=0.0, stddev=0.01 ,dtype=tf.float32,name = "b3")),
'b4': tf.Variable(tf.random_normal([n_hidden_4], mean=0.0, stddev=0.01 ,dtype=tf.float32,name = "b4")),
'out': tf.Variable(tf.random_normal([n_val], mean=0.0, stddev=0.01 ,dtype=tf.float32,name = "out"))
}
# Construct model
pred = multilayer_perceptron(x, weights, biases)
# pred = tf.transpose(pred)
# Define loss and optimizer
cost = tf.reduce_mean(tf.square(pred-y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
# In[71]:
# Launch the graph
with tf.Session() as sess:
sess.run(init)
# Training
print "Training started...\n"
for ep in range(1,epoch+1):
print "Epoch",ep
print
num = 0
with open('norm_rand_feature_y.csv') as f:
for line in f:
data = line.split(",")
x_temp = data[0:5]
y_temp = data[5]
x_temp = np.asarray(x_temp)
x_temp = x_temp.reshape(1,x_temp.shape[0])
x_temp = x_temp.astype(np.float32)
y_temp = np.asarray(y_temp)
y_temp = y_temp.reshape(1,1)
y_temp = y_temp.astype(np.float32)
sess.run(optimizer, feed_dict={x: x_temp, y: y_temp})
loss,y_pre = sess.run([cost,pred], feed_dict={x: x_temp,
y: y_temp})
# print tuple(pred.get_shape().as_list())
# print y.shape
if num%500 == 0:
print "Loss= " + "{:.6f}".format(loss), ", y_pred=",y_pre[0][0], ", y_actual=",y_temp[0][0]
num = num+1
if num == train_set:
break
# variables_names =[v.name for v in tf.trainable_variables()]
# values = sess.run(variables_names)
# for k,v in zip(variables_names, values):
# print(k, v)
# print sess.run("h1", feed_dict={x: x_temp,y: y_temp, keep_prob:1.0})
print "Training Finished!\n"
#Testing
y_value = list()
y_actual = list()
error = 0
num=0
print "Testing started...\n"
with open('norm_rand_feature_y.csv') as f:
for j in range(train_set):
f.next()
for line in f:
data = line.split(",")
x_temp = data[0:5]
y_temp = float(data[5])
x_temp = np.asarray(x_temp)
x_temp = x_temp.astype(np.float32)
x_temp = x_temp.reshape(1,x_temp.shape[0])
y_temp = np.asarray(y_temp)
y_temp = y_temp.reshape(1,1)
y_temp = y_temp.astype(np.float32)
loss = sess.run(cost, feed_dict={x: x_temp, y:y_temp})
y_pred = sess.run(pred, feed_dict={x: x_temp})
print "Loss= " + "{:.6f}".format(loss), ", y_pred=",y_pre[0][0], ", y_actual=",y_temp[0][0]
y_value.append(y_pred[0][0])
y_actual.append(y_temp)
error = error + abs(y_pred[0][0] - y_temp)
# num = num+1
# if num == 100:
# break
print
print "Testing Finished!\n"
error = error/(7396-train_set+1)
print "Total error:",error[0][0]
y_row = zip(y_value,y_actual)
np.savetxt("test_y_mlp.csv", y_row, delimiter=",")
答案 0 :(得分:0)
我会先尝试一下: