我已经训练了神经网络来解决XOR问题。我的网络的问题是它没有融合。我正在使用Andrew Ng的DeepLearning.ai课程中教授的方法和符号。
以下是代码:
import numpy as np
from __future__ import print_function
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0, 1, 1, 0]])
np.random.seed(1)
W1 = np.random.randn(3, 2) * 0.0001
b1 = np.ones((3, 1))
W2 = np.random.randn(1, 3) * 0.0001
b2 = np.ones((1, 1))
Backpropagation的下一部分:
learning_rate = 0.01
m = 4
for iteration in range(100000):
# forward propagation
# layer1
Z1 = np.dot(W1, X.T) + b1
A1 = sigmoid(Z1)
# layer2
Z2 = np.dot(W2, A1) + b2
A2 = sigmoid(Z2)
# backpropagation
dZ2 = Y - A2
dW2 = (1 / m) * np.dot(dZ2, A1.T)
db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)
dZ1 = np.dot(dW2.T, dZ2) * sigmoid_gradient(Z1)
dW1 = (1 / m) * np.dot(dZ1, X)
db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)
# checking if shapes are correctly preserved
assert (dZ2.shape == Z2.shape)
assert (dW2.shape == W2.shape)
assert (db2.shape == b2.shape)
assert (dZ1.shape == Z1.shape)
assert (dW1.shape == W1.shape)
assert (db1.shape == b1.shape)
# update parameters
W1 = W1 + learning_rate * dW1
W2 = W2 + learning_rate * dW2
b1 = b1 + learning_rate * db1
b2 = b2 + learning_rate * db2
# print every 10k
if (iteration % 10000 == 0):
print(A2)