Question

我正在使用“人工智能：现代方法”中的反向传播算法来研究NeuralNetwork类。我使用调试器逐步运行了train函数，一切似乎都正常运行，但是运行该错误并没有解决。有人可以发现我在做什么吗？

import math, random
import numpy as np

CLOSE = 0.2

class Perceptron:
    '''A single perceptron using sigmoid activation'''
    def __init__(self, inputs):
        '''Set up the perceptron with the given number of inputs'''
        self.weights = np.empty(inputs)
        for i in range(inputs):
            self.weights[i] = random.random()
        self.bias = random.random()

    def getOutput(self, inputs):
        '''Calculates, stores, and returns the output'''
        assert len(inputs) == len(self.weights)
        inj = np.sum(inputs * self.weights) + self.bias # Sum inputs
        g = 1.0 / (1.0 + math.exp(-inj)) # Sigmoid activation
        self.aj = g
        return g

    def adjust(self, delta):
        '''Adjusts the weights and bias'''
        self.bias += self.aj * delta
        for i in range(len(self.weights)):
            self.weights[i] += self.aj * delta

class Layer:
    '''Creates a single layer in a single feed-forward neural network'''
    def __init__(self, width, inputSize, prevLayer=False):
        '''Create a new layer'''
        self.prevLayer = prevLayer
        self.nextLayer = False
        self.nodes = []
        for _ in range(width):
            self.nodes.append(Perceptron(inputSize))

    def setNext(self, nextLayer):
        '''Set the next layer in the network'''
        self.nextLayer = nextLayer

    def getOutput(self, inputs):
        '''Get an array of the output of the network'''
        output = np.empty(len(self.nodes))
        for i in range(len(self.nodes)):
            output[i] = self.nodes[i].getOutput(inputs)
        if isinstance(self.nextLayer, Layer):
            # If this isn't the output layer, recurse to the next layer down
            return self.nextLayer.getOutput(output)
        return output

    def backProp(self, deltas):
        '''Back-propogate error through all the layers'''
        if isinstance(self.prevLayer, Layer):
            # If this isn't the input layer, calculate deltas for the next layer up
            crossprod = np.empty((len(deltas), len(self.nodes[0].weights)))
            for j in range(len(deltas)):
                crossprod[j][:] = self.nodes[j].weights * deltas[j]
            nextDeltas = crossprod.sum(axis=0)
            for i in range(len(nextDeltas)):
                # multiply by g'
                nextDeltas[i] *= self.prevLayer.nodes[i].aj * (1.0 - self.prevLayer.nodes[i].aj)
            # Recurse upwards
            self.prevLayer.backProp(nextDeltas)
        # Adjust the weights of neurons in this layer
        for i in range(len(self.nodes)):
            self.nodes[i].adjust(deltas[i])

class NeuralNetwork:
    def __init__(self, layerSizes=np.array(0), filename=""):
        '''Creates a neural network with the given layer sizes.'''
        prev = False
        inputLayer = False
        for i in range(len(layerSizes)-1):
            inputSize = layerSizes[i]
            outputSize = layerSizes[i+1]
            layer = Layer(outputSize, inputSize, prev)
            if isinstance(prev, Layer):
                prev.setNext(layer)
            if not isinstance(inputLayer, Layer):
                inputLayer = layer
            prev = layer
        self.inputLayer = inputLayer
        self.outputLayer = prev

    def train(self, inputs, outputs):
        '''Train the network on the given sample'''
        pred = self.inputLayer.getOutput(inputs)
        # calculate error of output layer
        error = outputs - pred
        deltas = error * pred * (1.0 - pred)
        # back-propogate the error
        self.outputLayer.backProp(deltas)
        # return error
        return np.max(abs(error))

    def test(self, inputs, outputs):
        '''Test the network on the given sample'''
        pred = self.inputLayer.getOutput(inputs)
        correct = True
        for i in range(len(pred)):
            if abs(pred[i] - outputs[i]) > CLOSE:
                correct = False
        return correct

Answer 1

您可以尝试以下任何一种措施：

很好地整理数据。
使用较小的学习率，例如0.001
使用ReLU代替Sigmoid。
将偏差初始设置为1，而不是随机。
如果使用ReLU，请在输出层使用softmax。

我的反向传播功能中的错误没有上升

1 个答案: