神经网络XOR始终为0.5

时间:2018-09-30 17:46:01

标签: python neural-network xor

我尝试为XOR函数构造一个神经网络。该网络具有1个隐藏层,其中除了偏置节点外还具有2个节点。 Sigmoid函数用作激活函数。我已经测试了网络的多种学习率。结果始终是相同的:网络为所有输入给出值0.5。人们报告了相同的问题还有其他一些线索,但据我所知,在这些情况下还犯了其他错误。

以下代码显示了我的网络和结果。

import numpy as np
import matplotlib.pyplot as plt

class NN:
    """ XOR function test. 1 hidden layer with 2 hidden nodes in addition to bias node."""
    def __init__(self, nodeNumbers, learningRate, targetMatrix, inputMatrix, errorTolerance, \
                 maxIterations):
        self.nodeNumbers,  self.learningRate, self.targetMatrix, \
        self.inputMatrix, self.errorTolerance, self.maxIterations = \
        nodeNumbers, learningRate, targetMatrix, inputMatrix, errorTolerance, \
                 maxIterations

        self.numberOfInputs = np.shape(self.inputMatrix)[1]

        self.weightMatrices = []
        for nodeNumber in range(len(nodeNumbers[1:])):
            self.weightMatrices.append(np.random.random_sample((nodeNumbers[nodeNumber+1], \
                                                        nodeNumbers[nodeNumber]+1)).T - .5)  

    def activationFunction(self, x):
        return 1./(1+np.exp(-x))

    def derivative(self, weightedInputs):
        return self.activationFunction(weightedInputs)*(1 - self.activationFunction(weightedInputs))        

    def run(self):
        self.iterationNumber = 0
        numberOfAdjustmentsDuringIteration = 1

        while (self.iterationNumber < self.maxIterations and numberOfAdjustmentsDuringIteration != 0):
            self.iterationNumber += 1
            numberOfAdjustmentsDuringIteration = 0

            for inputNumber in range(self.numberOfInputs):
                self.inputs = self.inputMatrix[:,inputNumber]
                self.targets = self.targetMatrix[inputNumber]
                self.forward()
                self.calculateError()

                if abs(self.error2) > self.errorTolerance:
                    numberOfAdjustmentsDuringIteration +=1
                    self.backward()
        print('Iterations: ', self.iterationNumber, '|Error|: ', self.error2)

    def forward(self):
        self.u1 = self.weightMatrices[0].T @ self.inputMatrix.T[0,:]
        z1 = self.activationFunction(self.u1)
        self.z1 = np.concatenate([[-1], z1])
        self.u2 = self.weightMatrices[1].T @ self.z1
        self.z2 = self.activationFunction(self.u2)

    def calculateError(self):
        self.error2 = (self.targets - self.z2)**2

    def backward(self, inputs=False, targets=False):    
        self.delta2 = (self.z2 - self.targets)*self.derivative(self.u2) 

        delta11 = self.derivative(self.u1[0])*self.delta2* self.weightMatrices[1][0]
        delta12 = self.derivative(self.u1[1])*self.delta2* self.weightMatrices[1][1]
        self.delta1 = np.concatenate([delta11, delta12])

        self.weightMatrices[1][0,0] -= self.learningRate*self.delta2*self.z1[0]
        self.weightMatrices[1][1,0] -= self.learningRate*self.delta2*self.z1[1]
        self.weightMatrices[1][2,0] -= self.learningRate*self.delta2*self.z1[2]

        self.weightMatrices[0][0,0] -= self.learningRate*self.delta1[0]*self.inputs[0]
        self.weightMatrices[0][1,0] -= self.learningRate*self.delta1[0]*self.inputs[1]
        self.weightMatrices[0][0,1] -= self.learningRate*self.delta1[1]*self.inputs[0]
        self.weightMatrices[0][1,1] -= self.learningRate*self.delta1[1]*self.inputs[1]

    def predict(self, newInput):
        self.inputs = newInput
        self.forward()
        print('Input: ', newInput, 'Predicted output: ', self.z2)


nodeNumbers = [2,2,1]
activationFunction = activationFunction
derivative = differentiateActivationFunction
learningRate = 0.3
targetMatrix = np.array(((0), (1), (1), (0))).T
inputMatrix = np.array(((-1,0, 0), (-1, 0, 1), (-1,1, 0), (-1,1,1))).T

errorTolerance = 1e-3
maxIterations= 500

nn=NN(nodeNumbers, learningRate, targetMatrix, inputMatrix, errorTolerance, maxIterations)
nn.run()

以上结果

Iterations:  500 |Error|:  [0.26341771]

做出预测

inputs = np.array(((-1,0, 0), (-1, 0, 1), (-1,1, 0), (-1,1,1))).T
for inp in inputs:
    nn.predict(inp)

结果

Input:  [-1  0  0] Predicted output:  [0.49987204]
Input:  [-1  0  1] Predicted output:  [0.49987204]
Input:  [-1  1  0] Predicted output:  [0.49987204]
Input:  [-1  1  1] Predicted output:  [0.49987204]

有人发现任何错误吗?

0 个答案:

没有答案