我在TensorFlow中为XOR输入写了一个神经网络。我使用了1个隐藏层,2个单位和softmax分类。输入的形式为< 1,x_1,x_2,zero,one> ,其中
准确度始终在0.5左右。出了什么问题?神经网络的架构是错误的,还是代码中有什么东西?
import tensorflow as tf
import numpy as np
from random import randint
DEBUG=True
def init_weights(shape):
return tf.Variable(tf.random_normal(shape, stddev=0.01))
def model(X, weight_hidden, weight_output):
# [1,3] x [3,n_hiddent_units] = [1,n_hiddent_units]
hiddern_units_output = tf.nn.sigmoid(tf.matmul(X, weight_hidden))
# [1,n_hiddent_units] x [n_hiddent_units, 2] = [1,2]
return hiddern_units_output
#return tf.matmul(hiddern_units_output, weight_output)
def getHiddenLayerOutput(X, weight_hidden):
hiddern_units_output = tf.nn.sigmoid(tf.matmul(X, weight_hidden))
return hiddern_units_output
total_inputs = 100
zeros = tf.zeros([total_inputs,1])
ones = tf.ones([total_inputs,1])
around_zeros = tf.random_normal([total_inputs,1], mean=0, stddev=0.01)
around_ones = tf.random_normal([total_inputs,1], mean=1, stddev=0.01)
batch_size = 10
n_hiddent_units = 2
X = tf.placeholder("float", [None, 3])
Y = tf.placeholder("float", [None, 2])
weight_hidden = init_weights([3, n_hiddent_units])
weight_output = init_weights([n_hiddent_units, 2])
hiddern_units_output = getHiddenLayerOutput(X, weight_hidden)
py_x = model(X, weight_hidden, weight_output)
#cost = tf.square(Y - py_x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=py_x, labels=Y))
train_op = tf.train.GradientDescentOptimizer(0.05).minimize(cost)
with tf.Session() as sess:
tf.global_variables_initializer().run()
trX_0_0 = sess.run(tf.concat([ones, around_zeros, around_zeros, ones, zeros], axis=1))
trX_0_1 = sess.run(tf.concat([ones, around_zeros, around_ones, zeros, ones], axis=1))
trX_1_0 = sess.run(tf.concat([ones, around_ones, around_zeros, zeros, ones], axis=1))
trX_1_1 = sess.run(tf.concat([ones, around_ones, around_ones, ones, zeros], axis=1))
trX = sess.run(tf.concat([trX_0_0, trX_0_1, trX_1_0, trX_1_1], axis=0))
trX = sess.run(tf.random_shuffle(trX))
print(trX)
for i in range(10):
for start, end in zip(range(0, len(trX), batch_size), range(batch_size, len(trX) + 1, batch_size)):
trY = tf.identity(trX[start:end,3:5])
trY = sess.run(tf.reshape(trY,[batch_size, 2]))
sess.run(train_op, feed_dict={ X: trX[start:end,0:3], Y: trY })
start_index = randint(0, (total_inputs*4)-batch_size)
y_0 = sess.run(py_x, feed_dict={X: trX[start_index:start_index+batch_size,0:3]})
print("iteration :",i, " accuracy :", np.mean(np.absolute(trX[start_index:start_index+batch_size,3:5]-y_0)),"\n")
查看评论部分以获取更新后的代码