我正在尝试使用Tensorflow为Kaggle Facial Keypoints Detection competition构建一个基本的Softmax回归模型。
我跟随Tensorflow初学者MNIST的Softmax回归模型示例和Daniel Nouri的blog进行数据结构化。
我面临的问题是脚本始终将准确度预测为0.0 。
我也跟着类似的问题,但没有运气: -
注意: - 我将删除数据集中缺少值的所有行。
import random
import numpy as np
import pandas as pd
from sklearn import cross_validation
import tensorflow as tf
data = pd.read_csv("../data/training.csv")
data = data.dropna() # drop all rows that have missing values
# The Image column has pixel values separated by space; convert the values
# to numpy arrays:
data['Image'] = data['Image'].apply(lambda im: np.fromstring(im, sep=' '))
X = np.vstack(data['Image'].values)
X = X.astype(np.float32)
# convert from [0, 255] => [0.0, 1.0]
X = np.divide(X, 255.0)
print "X.shape == {}; X.min == {:.3f}; X.max == {:.3f}".format(
X.shape, X.min(), X.max())
y = data[data.columns[:-1]].values
y = (y - 48) / 48 # scale target coordinates to [-1, 1]
y = y.astype(np.float32)
print "y.shape == {}; y.min == {:.3f}; y.max == {:.3f}".format(
y.shape, y.min(), y.max())
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y,
test_size=0.2, random_state=42)
# serve data by batches
epochs_completed = 0
index_in_epoch = 0
num_examples = X_train.shape[0]
def next_batch(batch_size):
global X_train
global y_train
global index_in_epoch
global epochs_completed
start = index_in_epoch
index_in_epoch += batch_size
if index_in_epoch > num_examples:
# finished epoch
epochs_completed += 1
# shuffle the data
perm = np.arange(num_examples)
np.random.shuffle(perm)
X_train = X_train[perm]
y_train = y_train[perm]
# start next epoch
start = 0
index_in_epoch = batch_size
assert batch_size <= num_examples
end = index_in_epoch
return X_train[start:end], y_train[start:end]
def tf_softmax(output=True):
random.seed(42)
x = tf.placeholder(tf.float32, [None, 9216])
W = tf.Variable(tf.zeros([9216, 30]))
b = tf.Variable(tf.zeros([30]))
y = tf.nn.softmax(tf.matmul(x, W) + b)
y_ = tf.placeholder(tf.float32, [None, 30])
cross_entropy = -tf.reduce_mean(y_ * tf.log(y))
cross_entropy = tf.Print(cross_entropy, [cross_entropy], "CrossE")
train_step = tf.train.AdagradOptimizer(0.01).minimize(cross_entropy)
init = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init)
for i in range(1000):
batch_xs, batch_ys = next_batch(100)
sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print sess.run(accuracy, feed_dict={x: X_test, y_: y_test})
# Standard boilerplate to call the main() function to begin the program.
if __name__ == '__main__':
tf_softmax()
终端日志:
X.shape == (2140, 9216); X.min == 0.000; X.max == 1.000
y.shape == (2140, 30); y.min == -0.920; y.max == 0.996
I tensorflow/core/common_runtime/local_device.cc:40] Local device intra op parallelism threads: 8
I tensorflow/core/common_runtime/direct_session.cc:58] Direct session inter op parallelism threads: 8
I tensorflow/core/kernels/logging_ops.cc:79] CrossE[-0.026783023]
I tensorflow/core/kernels/logging_ops.cc:79] CrossE[-0.746333]
I tensorflow/core/kernels/logging_ops.cc:79] CrossE[-1.5569628]
I tensorflow/core/kernels/logging_ops.cc:79] CrossE[-2.3121257]
I tensorflow/core/kernels/logging_ops.cc:79] CrossE[-3.1212008]
I tensorflow/core/kernels/logging_ops.cc:79] CrossE[-3.6717496]
I tensorflow/core/kernels/logging_ops.cc:79] CrossE[-4.5370874]
I tensorflow/core/kernels/logging_ops.cc:79] CrossE[-5.3017216]
I tensorflow/core/kernels/logging_ops.cc:79] CrossE[-6.0262742]
I tensorflow/core/kernels/logging_ops.cc:79] CrossE[-6.9303608]
I tensorflow/core/kernels/logging_ops.cc:79] CrossE[-7.3321404]
I tensorflow/core/kernels/logging_ops.cc:79] CrossE[-8.2394152]
I tensorflow/core/kernels/logging_ops.cc:79] CrossE[-inf]
I tensorflow/core/kernels/logging_ops.cc:79] CrossE[nan]
I tensorflow/core/kernels/logging_ops.cc:79] CrossE[nan]
I tensorflow/core/kernels/logging_ops.cc:79] CrossE[nan]
.....
.....
.....
I tensorflow/core/kernels/logging_ops.cc:79] CrossE[nan]
0.0
编辑:
似乎问题与费用/交叉熵有关,将其更改为以下修复问题:
cross_entropy = -tf.reduce_sum(y_*tf.log(y + 1e-10))