我一直在做一个前馈神经网络来充当XOR门,但是在训练完所有输出之后(应该接近0或1),趋向于0.5。
我一直在训练神经网络,如下所示:
遍历网络中的所有权重。
根据该权重计算成本函数的导数(通过3B1B的视频Backpropagation Calculus)
将此导数追加到成本函数向量。
完成所有这些操作后,将成本向量乘以学习率,然后从权重中减去相应的分量。
我要去哪里错了?我在下面包含了我的代码。
注意:
我不使用numpy,因为我想完全从头开始编写神经网络。
我知道还有其他与此类似的问题,但是我检查了这些问题中提到的要点,但它们无济于事。另外,我使用的是面向对象的方法,其他问题都是程序性的。
代码:
import random
import math
def calcCost(trainingNetwork, expected):
cost = 0
for x in range(len(trainingNetwork.network[2])):
actual = trainingNetwork.network[-1][x].activation
wanted = expected[1][x]
cost += (actual - wanted) ** 2
return cost
def calcCostVector(trainingNetwork, expected):
cost = []
for x in range(len(trainingNetwork)):
actual = trainingNetwork[x].activation
wanted = expected[1][x]
cost.append(wanted - actual)
return cost
class Neuron:
def __init__(self):
self.activation = 0.0
def __repr__(self):
return str(self.activation)
class BiasNeuron(Neuron):
def __init__(self):
super().__init__()
self.activation = 1.0
class Network:
def __init__(self):
self.network = [[Neuron(), Neuron(), BiasNeuron()], # INPUT
[Neuron(), Neuron()], # HIDDEN
[Neuron()]] # OUTPUT
self.weights = [[[random.uniform(-1, 1), random.uniform(-1, 1)],
[random.uniform(-1, 1), random.uniform(-1, 1)],
[random.uniform(-1, 1), random.uniform(-1, 1)]], # Layer 0 - Layer 1
[[random.uniform(-1, 1)], [random.uniform(-1, 1)]]] # Layer 1 - output
self.learningRate = 0.01
@staticmethod
def sigmoid(number, prime=False):
if not prime:
return 1/(1 + math.exp(-number))
else:
return number * (1 - number)
def sumNeuron(self, neuron, layer):]
total = 0
for x in range(len(self.weights[layer - 1])):
total += self.weights[layer - 1][x][neuron] * self.network[layer - 1][x].activation
self.network[layer][neuron].activation = self.sigmoid(total)
def sumNetwork(self):
for x in range(1, len(self.network)):
for y in range(len(self.network[x])):
self.sumNeuron(y, x)
def setInputValues(self, data):
for x in range(len(data)):
self.network[0][x].activation = data[x]
def applyChanges(self, gradientVec):
acc = 0
for x in range(len(self.weights)):
for y in range(len(self.weights[x])):
for z in range(len(self.weights[x][y])):
self.weights[x][y][z] -= gradientVec[acc]
acc += 1
def calcWeightGradient(self, x, y, z, data):
gradient = 2 * (self.network[x + 1][z].activation - data[1][0]) * \
self.sigmoid(self.network[x + 1][z].activation, prime=True) * \
self.network[x][y].activation
return gradient
def train(self, data):
costVec = []
for x in range(len(self.weights)):
for y in range(len(self.weights[x])):
for z in range(len(self.weights[x][y])):
gradient = self.calcWeightGradient(x, y, z, data)
costVec.append(gradient)
scaledCostVec = []
for x in costVec:
scaledCostVec.append(x * self.learningRate)
self.applyChanges(scaledCostVec)
if __name__ == '__main__':
inputData = [((0, 0), (0,)),
((0, 1), (1,)),
((1, 0), (1,)),
((1, 1), (0,))]
network = Network()
for iteration in range(1000):
cost = 0
for sample in inputData:
network.setInputValues(sample[0])
network.sumNetwork()
cost += calcCost(network, sample)
cost /= len(inputData)
print(cost)
for sample in inputData:
network.setInputValues(sample[0])
network.sumNetwork()
network.train(sample)
print(network.weights)
for sample in inputData:
network.setInputValues(sample[0])
network.sumNetwork()
print(sample[0][0], 'XOR', sample[0][1], '=', network.network[-1][0].activation)