我已经构建了一个带有两个隐藏层的神经网络。对于前两个隐藏我使用了ReLU激活,而对于最后一个层我使用了sigmoid函数。当我启动模型时,损失函数减少(正确),但精度保持为零。
Epoch: 9/150 Train Loss: 6.1869 Train Acc: 0.0005
Epoch: 9/150 Validation Loss: 6.4013 Validation Acc: 0.0000
Epoch: 17/150 Train Loss: 3.5452 Train Acc: 0.0005
Epoch: 17/150 Validation Loss: 3.7929 Validation Acc: 0.0000
Epoch: 25/150 Train Loss: 2.1594 Train Acc: 0.0005
Epoch: 25/150 Validation Loss: 2.2964 Validation Acc: 0.0000
Epoch: 34/150 Train Loss: 1.4753 Train Acc: 0.0005
Epoch: 34/150 Validation Loss: 1.5603 Validation Acc: 0.0000
Epoch: 42/150 Train Loss: 1.1325 Train Acc: 0.0005
Epoch: 42/150 Validation Loss: 1.2386 Validation Acc: 0.0000
Epoch: 50/150 Train Loss: 0.9314 Train Acc: 0.0005
Epoch: 50/150 Validation Loss: 1.0469 Validation Acc: 0.0000
Epoch: 59/150 Train Loss: 0.8146 Train Acc: 0.0005
Epoch: 59/150 Validation Loss: 0.9405 Validation Acc: 0.0000
Epoch: 67/150 Train Loss: 0.7348 Train Acc: 0.0005
Epoch: 67/150 Validation Loss: 0.8703 Validation Acc: 0.0000
Epoch: 75/150 Train Loss: 0.6712 Train Acc: 0.0005
Epoch: 75/150 Validation Loss: 0.8055 Validation Acc: 0.0000
Epoch: 84/150 Train Loss: 0.6200 Train Acc: 0.0005
Epoch: 84/150 Validation Loss: 0.7562 Validation Acc: 0.0000
Epoch: 92/150 Train Loss: 0.5753 Train Acc: 0.0005
Epoch: 92/150 Validation Loss: 0.7161 Validation Acc: 0.0000
Epoch: 100/150 Train Loss: 0.5385 Train Acc: 0.0005
Epoch: 100/150 Validation Loss: 0.6819 Validation Acc: 0.0000
Epoch: 109/150 Train Loss: 0.5085 Train Acc: 0.0005
Epoch: 109/150 Validation Loss: 0.6436 Validation Acc: 0.0000
Epoch: 117/150 Train Loss: 0.4857 Train Acc: 0.0005
Epoch: 117/150 Validation Loss: 0.6200 Validation Acc: 0.0000
Epoch: 125/150 Train Loss: 0.4664 Train Acc: 0.0005
Epoch: 125/150 Validation Loss: 0.5994 Validation Acc: 0.0000
Epoch: 134/150 Train Loss: 0.4504 Train Acc: 0.0005
Epoch: 134/150 Validation Loss: 0.5788 Validation Acc: 0.0000
Epoch: 142/150 Train Loss: 0.4378 Train Acc: 0.0005
Epoch: 142/150 Validation Loss: 0.5631 Validation Acc: 0.0000
Epoch: 150/150 Train Loss: 0.4283 Train Acc: 0.0005
Epoch: 150/150 Validation Loss: 0.5510 Validation Acc: 0.0000
'./prova.ckpt'
我认为ReLU函数将梯度杀死为零。这可能是我准确性的动机吗?
我可以尝试使用softmax更改激活功能,使用不同的组合: 1.仅使用sigmoid 2.仅使用softmax 3.使用ReLU和softmax 但情况没有改变。
建立神经网络我遵循Kaggle中的泰坦尼克号的例子: https://www.kaggle.com/linxinzhe/tensorflow-deep-learning-to-solve-titanic
答案 0 :(得分:0)
def split_valid_test_data(data, fraction=(1 - 0.8)):
data_y=train_data.as_matrix(columns=[train_data.columns[25]])
data_x = data.drop(["Premio"], axis=1)
train_x, valid_x, train_y, valid_y = train_test_split(data_x, data_y, test_size=fraction)
return train_x.values, train_y, valid_x, valid_y
train_x, train_y, valid_x, valid_y = split_valid_test_data(train_data)
print("train_x:{}".format(train_x.shape))
print("train_y:{}".format(train_y.shape))
print("train_y content:{}".format(train_y[:3]))
print("valid_x:{}".format(valid_x.shape))
print("valid_y:{}".format(valid_y.shape))
# 1st layer number of features (neurons)
n_hidden_1 = 50
# 2nd layer number of features (neurons)
n_hidden_2 = 50
##########################
Neural Network
##########################
from collections import namedtuple
def multilayer_perceptron():
tf.reset_default_graph()
inputs = tf.placeholder(tf.float32, shape=[None,train_x.shape[1]], name='inputs')
y = tf.placeholder(tf.float32, shape=[None, 1], name='y')
weights = {
'h1': tf.Variable(tf.random_normal([train_x.shape[1], n_hidden_1])),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_hidden_2, 1]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'out': tf.Variable(tf.random_normal([1]))
}
# Hidden layer con 50 neuroni e funzione di attivazione ReLU
layer_1 = tf.add(tf.matmul(inputs, weights['h1']), biases['b1'], name='Layer_1_mat')
layer_1 = tf.nn.relu(layer_1, name ='layer_1_relu')
# Hidden layer with ReLU activation
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'], name='Layer_2_mat')
layer_2 = tf.nn.relu(layer_2, name ='vars')
# Output layer with linear activation
out_layer = tf.matmul(layer_2, weights['out'], name ='out_layer') + biases['out']
learning_rate = tf.placeholder(tf.float32, name = 'learning_rate')
is_training=tf.Variable(True,dtype=tf.bool)
cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y,logits=out_layer, name='cross_entropy')
cost = tf.reduce_mean(cross_entropy, name='cost')
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
predicted = tf.nn.sigmoid(out_layer, name='predicted')
correct_pred = tf.equal(tf.round(predicted), y)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')
# Export the nodes
export_nodes = ['inputs', 'y', 'learning_rate','is_training', 'out_layer',
'cost', 'optimizer', 'predicted', 'accuracy']
Graph = namedtuple('Graph', export_nodes)
local_dict = locals()
graph = Graph(*[local_dict[each] for each in export_nodes])
return graph
pred1 = multilayer_perceptron()
#tf.add_to_collection('pred_func', pred1)
############################
#Batch
#############################
def get_batch(data_x,data_y,batch_size=300):
batch_n=len(data_x)//batch_size
for i in range(batch_n):
batch_x=data_x[i*batch_size:(i+1)*batch_size]
batch_y=data_y[i*batch_size:(i+1)*batch_size]
yield batch_x,batch_y
epochs = 150
train_collect = 50
train_print=train_collect*2
learning_rate_value = 0.5 #0.0001
batch_size=150
x_collect = []
train_loss_collect = []
train_acc_collect = []
valid_loss_collect = []
valid_acc_collect = []
train_predict = train_data.drop(["Premio"], axis=1)
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
iteration=0
for e in range(epochs):
for batch_x,batch_y in get_batch(train_x,train_y,batch_size):
iteration+=1
feed = {pred1.inputs: train_x,
pred1.y: train_y,
pred1.learning_rate: learning_rate_value,
pred1.is_training:True
}
train_loss, _, train_acc = sess.run([pred1.cost, pred1.optimizer, pred1.accuracy], feed_dict=feed)
if iteration % train_collect == 0:
x_collect.append(e)
train_loss_collect.append(train_loss)
train_acc_collect.append(train_acc)
if iteration % train_print==0:
print("Epoch: {}/{}".format(e + 1, epochs),
"Train Loss: {:.4f}".format(train_loss),
"Train Acc: {:.4f}".format(train_acc))
feed = {pred1.inputs: valid_x,
pred1.y: valid_y,
pred1.is_training:False
}
val_loss, val_acc = sess.run([pred1.cost, pred1.accuracy], feed_dict=feed)
valid_loss_collect.append(val_loss)
valid_acc_collect.append(val_acc)
if iteration % train_print==0:
print("Epoch: {}/{}".format(e + 1, epochs),
"Validation Loss: {:.4f}".format(val_loss),
"Validation Acc: {:.4f}".format(val_acc))
saver.save(sess, "./prova.ckpt")
train_data.columns[25]
是我预测的变量Prime。
我有一个56属性的数据集(包括dipendente变量Prime)。对于编码的df我使用热矢量和二进制技术。数值变量我使用MinMax Standardlization。