背景
我正在尝试使用Tensorflow创建一个MLP,这是我第一次使用tensorflow。这是一个简单的NN,它将执行xor操作。我有2个输入神经元(对于1和0)一个隐藏层,它是2个神经元宽。一个输出将给我1或0.我的激活是一个简单的sigmoid。
问题
我遇到启动图表的问题。我注意到的一点是,当我们启动图表时,我们得到的是所有批次,而不是一次一个。例如,我在数组[[1,0],[0,0],[0,1],[1,1]]
中有以下内容。当我尝试启动图表时,我会执行以下操作:
x_vals = np.array([[1.0, 0.0],[0.0, 0.0], [0.0, 1.0], [1.0, 1.0]])
y_vals = np.array([[1.0],[0.0],[1.0],[0.0]])
result = run_nn(x,y)
with tf.Session() as sess:
sess.run(init)
results = sess.run(result, feed_dict={x: x_vals, y:y_vals})
print results
我们可以看到我将x和y输入神经网络。一旦我这样做,我需要乘以权重*输出(基本上是输入[1,0])并求和它们问题是我在x值和权重数组之间的大小不匹配:
tf.transpose(tf.reduce_sum(tf.multiply(tf.transpose(l0.weights),l0.outputs),1))
InvalidArgumentError: Incompatible shapes: [2,3] vs. [4,3]
[[Node: Mul_6 = Mul[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"](transpose_10, concat_12)]]
我在这里做错了什么,我知道这不是一个完美的实现。但我想一步一步地做一个NN
以下是我的完整代码:
import math
import numpy as np
momentum = 0.5
learning_rate = 2.0
class layer:
def __init__(self, num_neurons, num_weights, layer_type):#number of weights corresponds to number of neurons in next layer
self.num_neurons = num_neurons
self.num_weights = num_weights
self.layer_type = layer_type
if layer_type == 'hidden':
num_neurons = num_neurons+1#account for bias
self.num_neurons = num_neurons+1
self.weights = tf.random_normal([num_neurons, num_weights])
self.outputs = tf.zeros(num_neurons, tf.float32)
self.sums = tf.zeros(num_neurons, tf.float32)
self.deltas = tf.zeros(num_neurons, tf.float32)
self.gradiants = tf.zeros([num_neurons, num_weights], tf.float32)
self.weight_deltas = tf.zeros_like(self.gradiants)
def calculate_sums(self, p_layer):
self.sums = tf.transpose(tf.reduce_sum(tf.multiply(tf.transpose(p_layer.weights) , p_layer.outputs), 1))
return self.sums
def calculate_outputs(self, p_layer):
if self.layer_type == 'hidden':
self.outputs = tf.concat([sigmoid(self.sums, False), tf.constant([1.0])], 0)
else:
self.outputs = sigmoid(self.sums, False)
return self.outputs
def calculate_deltas(self, n_layer = None, y=None):
if self.layer_type == 'hidden':
self.deltas = sigmoid(self.sums, True) * n_layer.deltas * self.weights[:-1,0]
else:#output delta
E = self.outputs[:self.num_neurons]-y
#print 'error: {}'.format(E)
self.deltas = -E* sigmoid(self.sums, True)
return self.deltas
def calculate_gradiants(self, n_layer):
shape = (tf.shape(self.outputs)[0], 1)
self.gradiants += tf.reshape(self.outputs, shape=shape) * tf.transpose(n_layer.deltas)#we add the gradiants for every batch completion then update, dont want to update every time
return self.gradiants
def update_weights(self):
self.weight_deltas = self.gradiants*learning_rate + momentum * self.weight_deltas
self.weights += self.weight_deltas
# for i in range(len(self.gradiants)):
# for j in range(len(self.gradiants[0])):
# self.weight_deltas[i,j] = weight_change(self.gradiants[i,j], self.weight_deltas[i,j])
# self.weights[i,j] += self.weight_deltas[i,j]
def sigmoid(x, derivative = False):
if derivative == True:
return (1.0/(1+tf.exp(-x))) * (1.0 - (1.0/(1+tf.exp(-x))))
return 1.0/(1+tf.exp(-x))
#the output delta is just E*f'i, essentially the error * the derivative of the activation function
def weight_change(g, p_w_delta):#gradiant, previous weight delta
return learning_rate*g + momentum * p_w_delta
def run_nn(x_val, y_val):
l0.outputs = tf.concat([x_val, tf.ones(shape=(tf.shape(x_val)[0],1))], 1)
print 'set output'
#forward pass
# l1.calculate_sums(l0)
# print 'l1 calc sum'
# l1.calculate_outputs(l0)
# print 'l1 calc output'
# ol.calculate_sums(l1)
# print 'ol calc sum'
# ol.calculate_outputs(l1)
# print 'ol calc output'
# #backwards pass
# ol.calculate_deltas(y=y_val)
# print 'ol calc deltas'
# l1.calculate_deltas(ol)
# print 'l1 calc deltas'
# l1.calculate_gradiants(ol)
# print 'l1 calc gradiants'
# l0.calculate_gradiants(l1)
# print 'l0 calc gradiants'
# #we dont want to update the weights every time, just after we have gone through every batch/minibatch
# l1.update_weights()
# print 'l1 update weights'
# l0.update_weights()
# print 'l0 uipdate weights'
# l1.gradiants = tf.zeros_like(l1.gradiants)
# print 'l1 zero gradiants'
# l0.gradiants = tf.zeros_like(l0.gradiants)
# print 'l0 zero gradiants'
# #test
# print 'run test'
# l0.outputs = tf.concat([x, tf.constant([1.0])], 0 )
# #forward pass
# l1.calculate_sums(l0)
# l1.calculate_outputs(l0)
#
# ol.calculate_sums(l1)
# ol.calculate_outputs(l1)
# print 'DONE'
return tf.transpose(tf.reduce_sum(tf.multiply(tf.transpose(l0.weights) , l0.outputs), 1))
l0 = layer(2,2,'hidden')#input
l1 = layer(2,1,'hidden')#hidden
ol = layer(1,0,'output')#output
x_vals = np.array([[1.0, 0.0],[0.0, 0.0], [0.0, 1.0], [1.0, 1.0]])
y_vals = np.array([[1.0],[0.0],[1.0],[0.0]])
# initialize variables
init = tf.global_variables_initializer()
x = tf.placeholder('float', None)
y = tf.placeholder('float', None)
result = run_nn(x,y)
with tf.Session() as sess:
sess.run(init)
results = sess.run(result, feed_dict={x: x_vals, y:y_vals})
print results
这是纯python / numpy中的一些等效代码
import math
import numpy as np
momentum = 0.5
learning_rate = 2.0
class layer:
def __init__(self, num_neurons, num_weights, layer_type):#number of weights corresponds to number of neurons in next layer
self.layer_type = layer_type
if layer_type == 'hidden':
num_neurons = num_neurons+1#account for bias
self.weights = np.random.rand(num_neurons,num_weights)
self.outputs = np.zeros(shape=(1,num_neurons))
self.sums = np.zeros(shape=(1,num_neurons))
self.deltas = np.zeros(shape=(1,num_neurons)).T
self.gradiants = np.zeros(shape=(num_neurons,num_weights))
self.weight_deltas = np.zeros_like(self.gradiants)
def calculate_sums(self, p_layer):
self.sums = np.array([(sum(p_layer.weights * p_layer.outputs))]).T
return self.sums;
def calculate_outputs(self, p_layer):
if self.layer_type == 'hidden':
self.outputs = np.concatenate((np.array([[sigmoid(X, False)] for X in self.sums]), np.array([[1.0]])))
else:
self.outputs = np.array([[sigmoid(X, False)] for X in self.sums])
return self.outputs
def calculate_deltas(self, n_layer = None):
if self.layer_type == 'hidden':
self.deltas = np.array([[sigmoid(X, True)] for X in self.sums]) * n_layer.deltas * self.weights[:-1]
else:#output delta
E = self.outputs-y
#print 'error: {}'.format(E)
self.deltas = -E* sigmoid(self.sums, True)
return self.deltas
def calculate_gradiants(self, n_layer):
self.gradiants += self.outputs * n_layer.deltas.T#we add the gradiants for every batch completion then update, dont want to update every time
return self.gradiants
def update_weights(self):
for i in range(len(self.gradiants)):
for j in range(len(self.gradiants[0])):
self.weight_deltas[i,j] = weight_change(self.gradiants[i,j], self.weight_deltas[i,j])
self.weights[i,j] += self.weight_deltas[i,j]
def sigmoid(x, derivative = False):
if derivative == True:
return (1.0/(1+math.exp(-x))) * (1.0 - (1.0/(1+math.exp(-x))))
return 1.0/(1+math.exp(-x))
#the output delta is just E*f'i, essentially the error * the derivative of the activation function
def weight_change(g, p_w_delta):#gradiant, previous weight delta
return learning_rate*g + momentum * p_w_delta
input_layer = layer(3,2, 'hidden')
hidden_layer1 = layer(2,1, 'hidden')
output_layer = layer(1,0, 'output')
x_vals = []
y_vals = []
for i in range(2):
for j in range(2):
for k in range(2):
x_vals.append(np.array([[float(i)],[float(j)],[float(k)]]))
y_vals.append(np.array([float(i ^ j ^ k)]))
#x_vals = [np.array([[1.0], [0.0]]), np.array([[0.0], [0.0]]), np.array([[0.0], [1.0]]),np.array([[1.0], [1.0]])]
#y_vals = np.array([[1.0],[0.0],[1.0],[0.0]])
#input_layer.weights = np.array([[-0.06782947598673161,0.9487814395569221],[0.22341077197888182,0.461587116462548], [-0.4635107399577998, 0.09750161997450091]])
#hidden_layer1.weights = np.array([[-0.22791948943117624],[0.581714099641357], [0.7792991203673414]])
Error = []
for n in range(10000):
for x, y in zip(x_vals, y_vals):
input_layer.outputs = np.concatenate((x, np.array([[1.0]])))
#forward pass
hidden_layer1.calculate_sums(input_layer)
hidden_layer1.calculate_outputs(input_layer)
output_layer.calculate_sums(hidden_layer1)
output_layer.calculate_outputs(hidden_layer1)
Error.append(-(output_layer.outputs-y))
#backwards pass
output_layer.calculate_deltas()
hidden_layer1.calculate_deltas(output_layer)
hidden_layer1.calculate_gradiants(output_layer)
input_layer.calculate_gradiants(hidden_layer1)
if n % 1000 == 0:
print 'Epoch #{}; error: {}'.format(n, sum(Error)/len(Error))
Error = []
#we dont want to update the weights every time, just after we have gone through every batch/minibatch
hidden_layer1.update_weights()
input_layer.update_weights()
hidden_layer1.gradiants.fill(0.0)
input_layer.gradiants.fill(0.0)
#test
for x, y in zip(x_vals, y_vals):
input_layer.outputs = np.concatenate((x, np.array([[1.0]])))
#forward pass
hidden_layer1.calculate_sums(input_layer)
hidden_layer1.calculate_outputs(input_layer)
output_layer.calculate_sums(hidden_layer1)
output_layer.calculate_outputs(hidden_layer1)
print 'Y_hat: {}, Y: {}'.format(round(float(output_layer.outputs), 3), float(y))
任何人都可以指出我正确的方向。
由于