我在tensorflow上使用logistic regression
后,尝试获得测试数据的准确性。
我用
# parameters
learning_rate = 0.01
training_epochs = 5
batch_size = 8192
display_step = 1
reset_graph()
X = tf.placeholder(tf.float32, shape=(None, X_train.shape[1]), name="X")
y = tf.placeholder(tf.float32, shape=(None, y_train.shape[1]), name="y")
# set model weights
weights = tf.Variable(tf.random_normal([X_train.shape[1], y_train.shape[1]], stddev=0.1), name="weights")
biases = tf.Variable(tf.ones(shape=y_train.shape[1]), name="biases")
# construct model
y_pred = tf.nn.sigmoid(tf.add(tf.matmul(X, weights), biases))
# minimize error using cross entropy
cost = tf.reduce_mean(-(y*tf.log(y_pred) + (1 - y)*tf.log(1 - y_pred)))
# gradient descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)
# check accuracy
correct_prediction = tf.equal(y, y_pred)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# variables initializing
init = tf.global_variables_initializer()
# starting session
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
for batch_i, (start, end) in enumerate(split(0, X_train.shape[0], batch_size)):
x_batch, y_true_batch, = X_train[start:end].toarray(), y_train[start:end]
feed_dict_train = {X: x_batch, y:y_true_batch}
sess.run(optimizer, feed_dict=feed_dict_train)
# Display logs per epoch step
if (epoch + 1) % display_step == 0:
cost_step, acc_step = sess.run([cost, accuracy], feed_dict={X: X_test.toarray(), y: y_test})
print("Epoch:", '%04d' % (epoch + 1), "cost =", "{:.4f}".format(cost_step), "accuracy =", "{:.4f}".format(acc_step))
print(accuracy.eval(feed_dict={X: X_test.toarray(), y: y_test}))
predicts = sess.run(y_pred, feed_dict={X: X_test.toarray()})
print(y_test.reshape(1, -1))
print(predicts)
但是它总是返回准确的0
。
也许在定义变量和准确性时有一些错误。
如何解决该问题并获得真正的准确性?
我检查测试数据并预测测试数据,它们看起来像
test - [[0 0 1 ... 1 1 1]]
[[0.74700934]
test_predict - [0.59065396]
[0.7749347 ]
...
[0.6533902 ]
[0.72667533]
[0.67722285]]