我尝试为XOR函数构造一个神经网络。该网络具有1个隐藏层,其中除了偏置节点外还具有2个节点。 Sigmoid函数用作激活函数。我已经测试了网络的多种学习率。结果始终是相同的:网络为所有输入给出值0.5。人们报告了相同的问题还有其他一些线索,但据我所知,在这些情况下还犯了其他错误。
以下代码显示了我的网络和结果。
import numpy as np
import matplotlib.pyplot as plt
class NN:
""" XOR function test. 1 hidden layer with 2 hidden nodes in addition to bias node."""
def __init__(self, nodeNumbers, learningRate, targetMatrix, inputMatrix, errorTolerance, \
maxIterations):
self.nodeNumbers, self.learningRate, self.targetMatrix, \
self.inputMatrix, self.errorTolerance, self.maxIterations = \
nodeNumbers, learningRate, targetMatrix, inputMatrix, errorTolerance, \
maxIterations
self.numberOfInputs = np.shape(self.inputMatrix)[1]
self.weightMatrices = []
for nodeNumber in range(len(nodeNumbers[1:])):
self.weightMatrices.append(np.random.random_sample((nodeNumbers[nodeNumber+1], \
nodeNumbers[nodeNumber]+1)).T - .5)
def activationFunction(self, x):
return 1./(1+np.exp(-x))
def derivative(self, weightedInputs):
return self.activationFunction(weightedInputs)*(1 - self.activationFunction(weightedInputs))
def run(self):
self.iterationNumber = 0
numberOfAdjustmentsDuringIteration = 1
while (self.iterationNumber < self.maxIterations and numberOfAdjustmentsDuringIteration != 0):
self.iterationNumber += 1
numberOfAdjustmentsDuringIteration = 0
for inputNumber in range(self.numberOfInputs):
self.inputs = self.inputMatrix[:,inputNumber]
self.targets = self.targetMatrix[inputNumber]
self.forward()
self.calculateError()
if abs(self.error2) > self.errorTolerance:
numberOfAdjustmentsDuringIteration +=1
self.backward()
print('Iterations: ', self.iterationNumber, '|Error|: ', self.error2)
def forward(self):
self.u1 = self.weightMatrices[0].T @ self.inputMatrix.T[0,:]
z1 = self.activationFunction(self.u1)
self.z1 = np.concatenate([[-1], z1])
self.u2 = self.weightMatrices[1].T @ self.z1
self.z2 = self.activationFunction(self.u2)
def calculateError(self):
self.error2 = (self.targets - self.z2)**2
def backward(self, inputs=False, targets=False):
self.delta2 = (self.z2 - self.targets)*self.derivative(self.u2)
delta11 = self.derivative(self.u1[0])*self.delta2* self.weightMatrices[1][0]
delta12 = self.derivative(self.u1[1])*self.delta2* self.weightMatrices[1][1]
self.delta1 = np.concatenate([delta11, delta12])
self.weightMatrices[1][0,0] -= self.learningRate*self.delta2*self.z1[0]
self.weightMatrices[1][1,0] -= self.learningRate*self.delta2*self.z1[1]
self.weightMatrices[1][2,0] -= self.learningRate*self.delta2*self.z1[2]
self.weightMatrices[0][0,0] -= self.learningRate*self.delta1[0]*self.inputs[0]
self.weightMatrices[0][1,0] -= self.learningRate*self.delta1[0]*self.inputs[1]
self.weightMatrices[0][0,1] -= self.learningRate*self.delta1[1]*self.inputs[0]
self.weightMatrices[0][1,1] -= self.learningRate*self.delta1[1]*self.inputs[1]
def predict(self, newInput):
self.inputs = newInput
self.forward()
print('Input: ', newInput, 'Predicted output: ', self.z2)
nodeNumbers = [2,2,1]
activationFunction = activationFunction
derivative = differentiateActivationFunction
learningRate = 0.3
targetMatrix = np.array(((0), (1), (1), (0))).T
inputMatrix = np.array(((-1,0, 0), (-1, 0, 1), (-1,1, 0), (-1,1,1))).T
errorTolerance = 1e-3
maxIterations= 500
nn=NN(nodeNumbers, learningRate, targetMatrix, inputMatrix, errorTolerance, maxIterations)
nn.run()
以上结果
Iterations: 500 |Error|: [0.26341771]
做出预测
inputs = np.array(((-1,0, 0), (-1, 0, 1), (-1,1, 0), (-1,1,1))).T
for inp in inputs:
nn.predict(inp)
结果
Input: [-1 0 0] Predicted output: [0.49987204]
Input: [-1 0 1] Predicted output: [0.49987204]
Input: [-1 1 0] Predicted output: [0.49987204]
Input: [-1 1 1] Predicted output: [0.49987204]
有人发现任何错误吗?