有两件事我无法理解。 以下代码应该学习简单的OR逻辑功能:
import numpy as np
import tensorflow as tf
from datetime import datetime
now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
root_logdir = "tf_logs"
logdir = "{}/run-{}/".format(root_logdir, now)
def neuron_layer(X, n_neurons, name, activation = None):
with tf.name_scope(name):
n_inputs = int(X.get_shape()[1])
stddev = 2 / np.sqrt(n_inputs)
init = tf.truncated_normal((n_inputs,n_neurons), stddev= stddev)
W = tf.Variable(init, name="kernel")
b = tf.Variable(tf.zeros([n_neurons]), name="bias")
Z = tf.matmul(X,W) + b
if activation is not None:
return activation(Z)
else:
return Z
n_INPUTS = 2
X_OR = np.array([[0,0],[0,1],[1,0],[1,1]])
y_OR = np.array([[0], [1], [1],[1]])
X_train = X_OR
y_train = y_OR
with tf.name_scope('input'):
X = tf.placeholder(tf.float32, [None, n_INPUTS], name="X-inputs")
y = tf.placeholder(tf.float32, [None,1], name = "y-inputs")
# hidden1 = neuron_layer(X, 2,"hidden1")
y_pred = neuron_layer(X,1,"output")
with tf.name_scope('MSE'):
error = y_pred - y
mse = tf.reduce_mean(tf.square(error), name="mse")
tf.summary.scalar('mse-scalar', mse)
with tf.name_scope('train'):
optimiser = tf.train.AdamOptimizer(0.01)
training_op = optimiser.minimize(mse)
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(logdir + '/train',
tf.get_default_graph())
test_writer = tf.summary.FileWriter(logdir + '/test')
init = tf.global_variables_initializer()
n_epochs = 1000
with tf.Session() as sess:
sess.run(init)
for epoch in range(n_epochs):
summary, _ = sess.run([merged, training_op],
feed_dict={X:X_train, y: y_train})
train_writer.add_summary(summary, epoch)
if epoch % 100 == 99:
y_pred_test = y_pred.eval(feed_dict={X: X_train, y: y_train})
print(y_pred_test)
train_writer.close()
test_writer.close()
目前,我的偏差形状遵循以下规则:偏差节点的数量等于层中神经元的数量。因此b = tf.Variable(tf.zeros([n_neurons]), name="bias")
但是,在这种情况下,网络无法收敛。得到的结果等于:
[[0.2599482 ]
[0.7518728 ]
[0.75169754]
[1.2436221 ]]
但是,如果我将其形状更改为b = tf.Variable(tf.zeros([4, n_neurons]), name="bias")
(其中4是数据点数),那么它将收敛。但是,据我所记得,等式为X * w + b,其中X的形状为(示例数,参数数量),w的形状为(参数数,神经元数量),b的形状为(数神经元)。
我还可以从文档(https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/mnist/mnist.py)中注意到该示例。 为什么我需要批量指定偏差形状的示例数量?我不应该这样。
给出链接中的MNIST示例,我希望y
占位符具有以下形状:
y = tf.placeholder(tf.float32, [None], name = "y-inputs")
因为我们通常不知道许多示例,所以目标只是类的列表。 但是,如果在我的代码中进行更改,我将得到两个不同的结果:
(正确的结果)
X_OR = np.array([[0,0],[0,1],[1,0],[1,1]])
y_OR = np.array([[0], [1], [1],[1]])
X = tf.placeholder(tf.float32, [None, n_INPUTS], name="X-inputs")
y = tf.placeholder(tf.float32, [None,1], name = "y-inputs")
到(错误结果):
X_OR = np.array([[0,0],[0,1],[1,0],[1,1]])
y_OR = np.array([0, 1, 1,1])
X = tf.placeholder(tf.float32, [None, n_INPUTS], name="X-inputs")
y = tf.placeholder(tf.float32, [None], name = "y-inputs")
tf如何解释这种形状变化?