我正在尝试从头开始实现神经网络,但我无法正确实现反向传播。我觉得我要么正在调用重量/激活的错误索引,要么我只是没有正确实现矩阵乘法。我无法使矩阵正确对齐。
请注意,费用是平方误差的总和。我正在考虑Z的成本梯度。
我将发布相关部分,而不是发布整个代码。
我的网络是:
#[3,5,5,1] 3 inputs, 5 hidden units, 5 hidden units, 1 output unit
##a[-1] = [1,1]
##a[-2] =[1,5]
##a[-3]= [1,5]
##a[-4]= [1,3] #input layer
weight shapes as follows :
##weights[-1] = 5,1
##weights [-2] = 5,5
##weights [-3] = 3,5 #first layer
Zs:
##zs shape: (1, 1) [-1] #last layer z
##zs shape: (1, 5) [-2]
##zs shape: (1, 5) [-3] #first layer z
#code for generating data
def psuedo_training_data():
input_data = np.random.randint(1, 5, (500,1,3))
labels =np.random.randint(0,2,(500,1))
training_data = [(x,y) for x,y in zip
(input_data[:int(len(input_data)*.8)],
labels[:int(len(labels)*.8)])]
testing_data = [(x,y) for x,y in zip
(input_data[int(len(input_data)*.8):],
labels[int(len(labels)*.8):])]
return training_data, testing_data
def sigmoid_derivative(z):
return sigmoid(z) * (1-sigmoid(z))
def sigmoid(z):
return 1.0/(1.0+np.exp(-z))
def calculate_gradients(weights, biases, x, y):
weight_derivatives, bias_derivatives, activations, zs = gather_backprop_data(weights, biases, x,y)
#weight derivatives and bias_derivatives are empty shells
#gradient of cost with respect to Z of last layer
last_layer_z_error = (activations[-1] - y) * sigmoid_derivative(zs[-1])
#updating the weight_derivatives of final layer
weight_derivatives[-1] = last_layer_z_error * (activations[-2]).T
bias_derivatives[-1] = last_layer_z_error
#attempting to get the gradients of the hidden layers
z_previous_layer = last_layer_z_error
for i in reversed(range(1,len(weights))):
print('layer:', i)
print('****************************************')
print('z_previous_layer.shape', z_previous_layer.shape)
print('weights[i-1].shape', weights[i-1].shape)
print(' sigmoid_derivative(zs[i-1])',sigmoid_derivative(zs[i-1]).shape)
print('******************************************************')
z_previous_layer =np.dot(weights[i].T, z_previous_layer)*
(sigmoid_derivative(zs[i-1]))
print('********************************************')
print('activations i -1', activations[i-1].shape)
print('z_previous_layer_post.shape', z_previous_layer.shape)
weight_derivatives[i-1] = np.dot(z_previous_layer,activations[i-1].T)
bias_derivatives[i-1] = z_previous_layer
return weight_derivatives, bias_derivatives
FSE:
ValueError Traceback (most recent call last)
<ipython-input-399-ee974769393b> in <module>()
----> 1 stochastic_gradient_decent(training_data,testing_data, 5, 25, NN.weights, NN.biases, NN)
<ipython-input-395-e78e37a1fda0> in stochastic_gradient_decent(training_data, testing_data, epochs, mini_batch_size, weights, biases, neural_network)
13 for mini_batch in mini_batches:
14 for x,y in mini_batch:
---> 15 weight_derivatives, bias_derivatives = calculate_gradients(weights, biases, x,y )
16
17 for i in range(len(weights)):
<ipython-input-393-bb458045ab42> in calculate_gradients(weights, biases, x, y)
19 print('******************************************************')
20
---> 21 z_previous_layer =np.dot(weights[i].T, z_previous_layer)*(sigmoid_derivative(zs[i-1]))
22
23
ValueError: shapes (1,5) and (1,1) not aligned: 5 (dim 1) != 1 (dim 0)