Question

我一直在做一个前馈神经网络来充当XOR门，但是在训练完所有输出之后（应该接近0或1），趋向于0.5。

我一直在训练神经网络，如下所示：

遍历网络中的所有权重。
根据该权重计算成本函数的导数（通过3B1B的视频Backpropagation Calculus）
将此导数追加到成本函数向量。
完成所有这些操作后，将成本向量乘以学习率，然后从权重中减去相应的分量。

我要去哪里错了？我在下面包含了我的代码。

注意：

我不使用numpy，因为我想完全从头开始编写神经网络。
我知道还有其他与此类似的问题，但是我检查了这些问题中提到的要点，但它们无济于事。另外，我使用的是面向对象的方法，其他问题都是程序性的。

代码：

import random
import math

def calcCost(trainingNetwork, expected):
    cost = 0
    for x in range(len(trainingNetwork.network[2])):
        actual = trainingNetwork.network[-1][x].activation
        wanted = expected[1][x]
        cost += (actual - wanted) ** 2

    return cost

def calcCostVector(trainingNetwork, expected):
    cost = []
    for x in range(len(trainingNetwork)):
        actual = trainingNetwork[x].activation
        wanted = expected[1][x]
        cost.append(wanted - actual)

    return cost

class Neuron:
    def __init__(self):
        self.activation = 0.0

    def __repr__(self):
        return str(self.activation)

class BiasNeuron(Neuron):
    def __init__(self):
        super().__init__()
        self.activation = 1.0

class Network:
    def __init__(self):
        self.network = [[Neuron(), Neuron(), BiasNeuron()],  # INPUT
                        [Neuron(), Neuron()],  # HIDDEN
                        [Neuron()]]  # OUTPUT

        self.weights = [[[random.uniform(-1, 1), random.uniform(-1, 1)],
                         [random.uniform(-1, 1), random.uniform(-1, 1)],
                         [random.uniform(-1, 1), random.uniform(-1, 1)]],  # Layer 0 - Layer 1

                        [[random.uniform(-1, 1)], [random.uniform(-1, 1)]]]  # Layer 1 - output

        self.learningRate = 0.01

    @staticmethod
    def sigmoid(number, prime=False):
        if not prime:
            return 1/(1 + math.exp(-number))

        else:
            return number * (1 - number)

    def sumNeuron(self, neuron, layer):]
        total = 0

        for x in range(len(self.weights[layer - 1])):

            total += self.weights[layer - 1][x][neuron] * self.network[layer - 1][x].activation

        self.network[layer][neuron].activation = self.sigmoid(total)

    def sumNetwork(self):

        for x in range(1, len(self.network)):
            for y in range(len(self.network[x])):
                self.sumNeuron(y, x)

    def setInputValues(self, data):
        for x in range(len(data)):
            self.network[0][x].activation = data[x]

    def applyChanges(self, gradientVec):
        acc = 0
        for x in range(len(self.weights)):
            for y in range(len(self.weights[x])):
                for z in range(len(self.weights[x][y])):
                    self.weights[x][y][z] -= gradientVec[acc]
                    acc += 1

    def calcWeightGradient(self, x, y, z, data):
        gradient = 2 * (self.network[x + 1][z].activation - data[1][0]) * \
                   self.sigmoid(self.network[x + 1][z].activation, prime=True) * \
               self.network[x][y].activation

        return gradient

    def train(self, data):
        costVec = []
        for x in range(len(self.weights)):
            for y in range(len(self.weights[x])):
                for z in range(len(self.weights[x][y])):
                    gradient = self.calcWeightGradient(x, y, z, data)
                    costVec.append(gradient)

        scaledCostVec = []
        for x in costVec:
            scaledCostVec.append(x * self.learningRate)

        self.applyChanges(scaledCostVec)

if __name__ == '__main__':
    inputData = [((0, 0), (0,)),
                 ((0, 1), (1,)),
                 ((1, 0), (1,)),
                 ((1, 1), (0,))]

    network = Network()

    for iteration in range(1000):

        cost = 0
        for sample in inputData:
            network.setInputValues(sample[0])
            network.sumNetwork()
            cost += calcCost(network, sample)

        cost /= len(inputData)
        print(cost)

        for sample in inputData:
            network.setInputValues(sample[0])
            network.sumNetwork()

            network.train(sample)

    print(network.weights)

    for sample in inputData:
        network.setInputValues(sample[0])
        network.sumNetwork()
        print(sample[0][0], 'XOR', sample[0][1], '=', network.network[-1][0].activation)

神经网络输出应接近0或1，收敛到0.5

0 个答案: