我是机器学习和尝试TensorFlow的新手。为了我的学习,我试图构建一个二进制分类器以在经典的《泰坦尼克号》生存数据上运行。我有几个问题需要解决,我们将不胜感激。
代码如下,我省略了数据格式化功能。
# Split the data into X and y.
# Read data
df_train = pd.read_csv("train.csv", index_col=0)
df = format_data(df_train)
df_X = df.drop(labels="Survived", axis=1)
df_X = df_X.astype('float32') # Float32 data type in all the columns.
df_y = pd.DataFrame(data=df.Survived, columns=["Survived"]) # Float32 datatype in all the columns.
df_y = df_y.astype('float32')
X_train, X_test, y_train, y_test = train_test_split(df_X, df_y, test_size=0.3, random_state=100)
# Tensorflow graph construction phase.
tf.reset_default_graph() # Reset default graph if you want to execute this cell multiple times.
n_inputs = df_X.shape[1]
n_hidden1 = 10
n_hidden2 = 5
n_outputs = df_y.shape[1]
X = tf.placeholder(tf.float32, shape=(None,n_inputs), name="X")
y = tf.placeholder(tf.float32, shape=(None), name="y")
# Define network architecture
hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu,
kernel_initializer = tf.contrib.layers.xavier_initializer(),
kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1.0),
name="hidden1")
hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu,
kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=1.0),
name="hidden2")
logits = tf.layers.dense(hidden1, n_outputs, name="outputs")
with tf.name_scope("loss"):
loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=y)
with tf.name_scope("train"):
learning_rate = 0.01
trainer = tf.train.AdamOptimizer(learning_rate).minimize(loss)
with tf.name_scope("eval"):
# Define the accuracy
#The default threshold is 0.5, rounded off directly
prediction = tf.round(tf.sigmoid(logits))
# Bool into float32 type
correct = tf.cast(tf.equal(prediction, y), dtype=tf.float32)
# Average
accuracy = tf.reduce_mean(correct)
# Using tf.metrics.accuracy method to calculate accuracy
acc, acc_update = tf.metrics.accuracy(labels=y, predictions=prediction)
# Assign variable initializer
init = tf.global_variables_initializer()
init_local = tf.local_variables_initializer()
# Assign model saver
saver = tf.train.Saver()
# TensorFlow execution phase.
n_epochs = 1000
with tf.Session() as sess:
init.run()
init_local.run()
for epoch in range(n_epochs):
sess.run(trainer, feed_dict={X:X_train, y:y_train})
acc_train = accuracy.eval(feed_dict={X:X_train, y:y_train})
acc_test = accuracy.eval(feed_dict={X:X_test, y:y_test})
acc1_train = acc_update.eval(feed_dict={X:X_train, y:y_train})
acc1_test = acc_update.eval(feed_dict={X:X_test, y:y_test})
print("Training and test accuracy at", epoch, "epoch:", acc_train, acc_test)
print("Training and test accuracy at", epoch, "epoch:", acc1_train, acc1_test)
save_path = saver.save(sess, "./titanic.ckpt")
这是我使用的两种精度方法的输出。
Training and test accuracy at 999 epoch: 0.9069021 0.8507463
Training and test accuracy at 999 epoch: 0.8738184 0.8738114