我的3层神经网络(输入 - >隐藏 - >输出)无法正常工作,没有语法错误,但它不能正常工作。为什么这样,我该如何解决?
我的反向传播不正确或者是什么错误?我不明白,一切似乎都是正确的,但显然不是。
我试图调整我的超参数,但它并没有真正做任何事情,我也试图以不同的顺序乘以衍生物,但这也不起作用。
"""
This is a three layer neural network that will predict the xor value of
an input,
using gradient descent and backpropagation
"""
import numpy as np
class NeuralNetwork:
# relu activation function for hidden layer
# derivative of relu at a specific point
def relu(self, val, deriv=False):
if deriv:
val[val <= 0] = 0
val[val > 0] = 1
return np.maximum(0, val)
# sigmoid actiavation function for output layer
# derivative of sigmoid at a specific point
def sigmoid(self, val, deriv=False):
if deriv:
return val * (1-val)
return 1/(1+np.exp(-val))
# start taining
def Train(self, X, Y, w_ih, b_h, w_ho, b_o, lr, epochs):
# training loop
for i in range(epochs):
# forward propagte
# weighted sum of the hidden layer
Z_h = np.dot(X, w_ih) + b_h
# prediction of the hidden layer
P_h = self.relu(Z_h)
# weighted sum of output layer
Z_o = np.dot(P_h, w_ho) + b_o
# prediction of the output layer
P_o = self.sigmoid(Z_o)
# backpropagation
# error for prediction
# MSE - mean squared error
E_o = ((P_o - Y)**2)/2
# partial derivative of error with respect to the parameters
dE_dPo = (P_o - Y) # 4, 1
dPo_dZo = self.sigmoid(P_o, True) # 4, 1
dZo_dwho = P_h # 4, 2
dZo_dPh = w_ho # 2, 1
dPh_dZh = self.relu(P_h, True) # 4, 2
dZh_dwih = X # 4, 2
# update weights and biases from hidden to output
delta_w_ho = lr * np.dot(dZo_dwho.T, dE_dPo * dPo_dZo)
delta_b_o = lr * sum(dE_dPo * dPo_dZo)
w_ho -= delta_w_ho
b_o -= delta_b_o
# update weights and biases from input to hidden
delta_w_ih = lr * np.dot(np.dot(dZo_dPh, dE_dPo.T * dPo_dZo.T), dPh_dZh * dZh_dwih)
delta_b_h = lr * sum(np.dot(np.dot(dZo_dPh, dE_dPo.T * dPo_dZo.T), dPh_dZh))
w_ih -= delta_w_ih
b_h -= delta_b_h
print("prediction")
print(P_o)
print("MSE for output layer")
print(E_o)
if __name__ == "__main__":
nn = NeuralNetwork()
input_data = np.array([[0, 0],
[0, 1],
[1, 0],
[1, 1]])
output_data = np.array([[0],
[1],
[1],
[0]])
# number of neurons in each layer
n_i = 2
n_h = 2
n_o = 1
# initialize weights and biases
# seed the numbers
np.random.seed(0)
# weights and biases for hidden layer
w_ih = np.random.random((n_i, n_h))
b_h = np.random.random((n_h))
# weights and biases for output layer
w_ho = np.random.random((n_h, n_o))
b_o = np.random.random(n_o)
# hyper parameters
learning_rate = 0.2
epochs = 1000
nn.Train(input_data, output_data, w_ih, b_h, w_ho, b_o, learning_rate, epochs)