我建立了一个带回归的神经网络模型来预测保险素数。我的损失函数减少到接近零,但损失函数值在每次新模型启动时会有所不同。
...
Epoch: 30/30 Train Loss: 0.1665
Epoch: 30/30 Validation Loss: 1.2689
...
Epoch: 30/30 Train Loss: 4.6166
Epoch: 30/30 Validation Loss: 4.4621
...
Epoch: 30/30 Train Loss: 1.3190
Epoch: 30/30 Validation Loss: 1.3700
每次损失收敛于不同的值时是否正确?损失输出后的预测值
模特代码:
#################
#TRAIN E TEST SET
#################
from sklearn.model_selection import train_test_split
train_data, test_data = train_test_split(df, test_size=0.25)
train_id=train_data["ID"]
test_id=test_data["ID"]
##################
#TESNORFLOW MODEL
##################
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
############################
#Split training in validation
############################
def split_valid_test_data(data, fraction=(1 - 0.8)):
data_y=train_data.as_matrix(columns=[train_data.columns[8]])
data_x = data.drop(["Prime"], axis=1)
train_x, valid_x, train_y, valid_y = train_test_split(data_x, data_y, test_size=fraction)
return train_x.values, train_y, valid_x, valid_y
train_x, train_y, valid_x, valid_y = split_valid_test_data(train_data)
print("train_x:{}".format(train_x.shape))
print("train_y:{}".format(train_y.shape))
print("train_y content:{}".format(train_y[:3]))
print("valid_x:{}".format(valid_x.shape))
print("valid_y:{}".format(valid_y.shape))
##########
#Parameters
##########
learning_rate = 0.001
training_epochs = 30
batch_size =100
display_step = 1
total_len = train_x.shape[0]
# Network Parameters
n_hidden_1 = 16# 1st layer number of features
n_hidden_2 = 14 # 2nd layer number of features
n_hidden_3 = 12
n_hidden_4 = 10
n_input = train_x.shape[1]
n_classes = 1
###############
#tf Graph input
###############
x = tf.placeholder("float", [None, train_x.shape[1]])
y = tf.placeholder("float", [None,n_classes])
#################
#Model
#################
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Hidden layer with RELU activation
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Hidden layer with RELU activation
layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
layer_3 = tf.nn.relu(layer_3)
# Hidden layer with RELU activation
layer_4 = tf.add(tf.matmul(layer_3, weights['h4']), biases['b4'])
layer_4 = tf.nn.relu(layer_4)
# Output layer with linear activation
out_layer = tf.matmul(layer_4, weights['out']) + biases['out']
return out_layer
##################################
#weight & bias
##################################
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0,0.1, dtype=tf.float32)),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], 0,0.1,dtype=tf.float32)),
'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3], 0,0.1,dtype=tf.float32)),
'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4], 0,0.1,dtype=tf.float32)),
'out': tf.Variable(tf.random_normal([n_hidden_4, n_classes], 0, 0.1,dtype=tf.float32))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1], 0,0.1,dtype=tf.float32)),
'b2': tf.Variable(tf.random_normal([n_hidden_2], 0,0.1,dtype=tf.float32)),
'b3': tf.Variable(tf.random_normal([n_hidden_3], 0,0.1,dtype=tf.float32)),
'b4': tf.Variable(tf.random_normal([n_hidden_4], 0,0.1,dtype=tf.float32)),
'out': tf.Variable(tf.random_normal([n_classes], 0, 0.1,dtype=tf.float32))
}
pred= multilayer_perceptron(x, weights, biases)
##############################################
# loss e optimizer
###############################################
###########
#MSE
###########
cost = tf.reduce_mean(tf.square(pred-y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
########
#batch
########
def get_batch(data_x,data_y,batch_size):
batch_n=len(data_x)//batch_size
for i in range(batch_n):
batch_x=data_x[i*batch_size:(i+1)*batch_size]
batch_y=data_y[i*batch_size:(i+1)*batch_size]
yield batch_x,batch_y
train_collect = 50
train_print=train_collect*2
x_collect = []
train_loss_collect = []
train_acc_collect = []
valid_loss_collect = []
valid_acc_collect = []
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
iteration=0
for e in range(training_epochs) :
for batch_x,batch_y in get_batch(train_x,train_y,batch_size):
iteration+=1
feed = {x: train_x,
y: train_y
}
sess.run(optimizer, feed_dict={x:train_x, y: train_y})
train_loss, train_acc = sess.run([cost, accuracy], feed_dict=feed) #_, optimizer
prediction_train = sess.run([pred], feed_dict=feed)
if iteration % train_collect == 0:
x_collect.append(e)
train_loss_collect.append(train_loss)
train_acc_collect.append(train_acc)
if iteration % train_print==0:
print("Epoch: {}/{}".format(e + 1, training_epochs),
"Train Loss: {:.4f}".format(train_loss),
"Train Acc: {:.4f}".format(train_acc))
feed = {x: valid_x,
y: valid_y
}
val_loss, val_acc = sess.run([cost, accuracy], feed_dict=feed)
valid_loss_collect.append(val_loss)
valid_acc_collect.append(val_acc)
if iteration % train_print==0:
print("Epoch: {}/{}".format(e + 1,training_epochs),
"Validation Loss: {:.4f}".format(val_loss),
"Validation Acc: {:.4f}".format(val_acc))
saver.save(sess, "./prova_habitas.ckpt")
答案 0 :(得分:0)
有时,神经网络对初始化非常敏感。
尝试减少变量的初始化值,例如
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0,0.1, dtype=tf.float32) * 0.001)