Question

我是神经网络世界的新手，非常有趣。我在多层NN上编写了反向传播的基本算法来解决小问题我使用激活函数sigmoid（我想你们大多数人都认为）（x-> 1 /（1 + exp（-x）））。
我在几个问题上尝试了我的程序：

第一个是XOR问题。我采用了一个大小为[2,2,1]的3层网络，在两个第一层中有一个偏置神经元（所以实际上大小更多[3,3,1]）。它尝试了1000组数据（即一对（0 / 1,0 / 1）和它的XOR作为输出），算法似乎收敛于0.5的错误:(我发现它很奇怪所以我提出了数字到10000，并没有改变任何东西，到100000（绝望：p）并且它工作！误差平均下降到0.02以下。有没有人知道为什么它需要这么多的数据工作？
第二个是两个数字之间的总和问题（如4 + 8 =？）。我随机选择了一个[2,5,5,1]网络，在三个第一层中有一个偏置神经元（所以实际上大小更多[3,6,6,1]）。我把一组100000以下数字的训练数据集及其总和。这次，错误根本没有收敛，而网络的输出总是返回数字1.您是否已经看到过这种情况？这是一个错误代码吗？（我多次检查过的代码，但也许）。


import random
import math

class Network:
    def initdata(self):
        #weights initialization
        self.weights.append([])
        self.threshold.append([])
        for l in range(1,len(self.layers)):
            n = self.layers[l]
            thresholdl = []
            weightsl = []
            for i in range(n):
                thresholdl.append(-random.random())
                weightsli = []
                for j in range(self.layers[l-1]):
                    weightsli.append(random.random()*2-1)
                #adding bias neurons
                weightsli.append(thresholdl[-1])
                weightsl.append(weightsli)
            self.weights.append(weightsl)
            self.threshold.append(thresholdl)

    def __init__(self, layers):
        self.layers = layers
        self.weights = []
        self.threshold = []
        self.initdata()

    def activation_function(self, x):
        return 1/(1+math.exp(-x))

    def outputlayer(self, input, l):
        if l==0:
            return [input]
        output = []
        prevoutput = self.outputlayer(input, l-1)
        for i in range(self.layers[l]):
            f = 0
            for k in range(len(prevoutput[-1])):
                f += self.weights[l][i][k]*prevoutput[-1][k]
            f += self.weights[l][i][-1] #bias weight !
            output.append(self.activation_function(f))
        return prevoutput+[output]

    def layersoutput(self, input):
        return self.outputlayer(input, len(self.layers)-1)

    def finaloutput(self, input):
        return self.layersoutput(input)[-1]

    def train(self, data, nu):
        for (input, finaloutput) in data:
            output = self.layersoutput(input)
            err = self.errorvector(finaloutput, output[-1])
            self.changeweights(err, output, nu)

    def changeweights(self, err, output, nu):
        deltas = []
        for i in range(len(self.layers)):
            deltas.append([])
        tempweights = self.weights.copy()
        def changeweightslayer(layer):
            if layer != len(self.layers)-1:
                changeweightslayer(layer+1)
            for i in range(self.layers[layer]):
                delta = 0
                if layer != len(self.layers)-1:
                    delta = output[layer][i]*(1-output[layer][i])*sum([deltas[layer+1][l]*self.weights[layer+1][l][i] for l in range(self.layers[layer+1])])
                else:
                    delta = output[layer][i]*(1-output[layer][i])*err[i]
                deltas[layer].append(delta)
                for k in range(len(self.weights[layer][i])-1):
                    tempweights[layer][i][k] += nu*output[layer-1][k]*delta
                tempweights[layer][i][-1] += nu*delta
        changeweightslayer(1)
        self.weights = tempweights


    def quadraticerror(self, a, b):
        return sum([(a[i]-b[i])**2 for i in range(len(a))])

    def errorvector(self, a, b):
        return [a[i]-b[i] for i in range(len(a))]

network = Network([2, 5, 5, 1])
print(network.weights)
data = []
for i in range(1000000):
    bit1 = random.randrange(100)
    bit2 = random.randrange(100)
    data.append(([float(bit1), float(bit2)], [float(bit1+bit2)]))
network.train(data, 0.1)
print(network.weights)

反向传播算法的收敛性：迭代次数是多少？

0 个答案: