神经网络反向传播不起作用

时间:2016-07-31 18:32:40

标签: javascript typescript machine-learning neural-network

我用JavaScript编写了一个神经网络,并实现了here描述的反向传播算法。 这是代码(typescript):

/**
 * Net
 */


export class Net {
    private layers: Layer[] = [];
    private inputLayer: Layer;
    private outputLayer: Layer;
    public error: number = Infinity;

    private eta: number = 0.15;
    private alpha: number = 0.5;

    constructor(...topology: number[]) {
        topology.forEach((topologyLayer, iTL) => {
            var nextLayerNeuronNumber = topology[iTL + 1] || 0;
            this.layers.push(new Layer(topologyLayer, nextLayerNeuronNumber));
        });

        this.inputLayer = this.layers[0];
        this.outputLayer = this.layers[this.layers.length - 1];

    }

    public loadWeights(weights) {
        /*
        [
            [Layer
                [Node weights, ..., ...]
            ]
        ]
        */

        for (var iL = 0; iL < weights.length; iL++) {
            var neuronWeights = weights[iL];
            var layer = this.layers[iL];
            for (var iN = 0; iN < neuronWeights.length; iN++) {

                // Neuron

                var connections = neuronWeights[iN];
                for (var iC = 0; iC < connections.length; iC++) {
                    var connection = connections[iC];
                    this.layer(iL).neuron(iN).setWeights(iC, connection);

                }

            }
        }

    }


    public train(data: number[][], iterartions = 2000) {

        var inputs = this.inputLayer.neurons.length - 1;

        for (var ite = 0; ite < iterartions; ite++) {

            data.forEach(node => {

                var inputData = [];
                var outputData = [];

                for (var i = 0; i < node.length; i++) {
                    if (i < inputs) {
                        inputData.push(node[i])
                    } else {
                        outputData.push(node[i])
                    }
                }

                this.feedForward(...inputData);
                this.backProb(...outputData);


            });


        }


        return this.calcDataError(data);

    }

    private calcDataError(data){
        var overallDataErrorSum = 0;
        var inputs = this.inputLayer.neurons.length - 1;

        data.forEach(node => {
            var outputData = node.splice(inputs);
            var inputData = node;

            this.feedForward(...inputData);
            overallDataErrorSum += this.getNetError(outputData);
        });

        overallDataErrorSum /= data.length;

        return overallDataErrorSum;
    }

    public saveWeights() {
        // Ignore output layer
        var ret = []
        for (var iL = 0; iL < this.layers.length - 1; iL++) {
            var layer = this.layers[iL];
            var layer_ret = [];

            layer.neurons.forEach(neuron => {
                layer_ret.push(neuron.connections.map(c => c.weight));
            });

            ret.push(layer_ret);
        }
        return ret;
    }

    feedForward(...inputs: number[]) {
        if (inputs.length != this.inputLayer.neurons.length - 1) return false;

        this.inputLayer.neurons.forEach((neuron, i) => {
            if (!neuron.isBias) {
                neuron.output(inputs[i]);
            }
        });

        this.layers.forEach((layer, i) => {
            // Skip Input Layer
            if (i > 0) {
                var prevLayer = this.layers[i - 1]
                layer.neurons.forEach(neuron => {
                    neuron.calcOutput(prevLayer);
                });
            }
        });

    }

    public getNetError(targetVals) {
        // Calc delta error of outputs
        var deltas = [];

        this.outputLayer.neurons.forEach((neuron, iN) => {
            if (!neuron.isBias) {
                neuron.calcOutputDelta(targetVals[iN]);
                deltas.push(neuron.delta);
            }
        });

        deltas = deltas.map(d => Math.pow(d, 2));


        var sum = 0;

        deltas.forEach(d => sum += d);

        return sum / deltas.length;


    }

    backProb(...targetVals: number[]) {



        // Calc delta error of outputs
        this.outputLayer.neurons.forEach((neuron, iN) => {
            if (!neuron.isBias) {
                neuron.calcOutputDelta(targetVals[iN]);
            }
        });

        // Backprop delta error through hidden layers

        for (var iL = this.layers.length - 2; iL > 0; iL--) {
            var layer = this.layers[iL];
            var nextLayer = this.layers[iL + 1]
            layer.neurons.forEach(neuron => {
                neuron.calcHiddenDelta(nextLayer);
            });

        }

        // Update weights 

        for (var iL = 1; iL < this.layers.length; iL++) {
            var layer = this.layers[iL];
            var prevLayer = this.layers[iL - 1];

            layer.neurons.forEach(neuron => {
                if (!neuron.isBias) {
                    neuron.updateWeights(prevLayer, this.eta);
                }
            });
        }

        this.error = this.getNetError(targetVals);

        return this.error;

    }

    getOutputs(...inputs: number[]) {

        var ret = [];
        this.outputLayer.neurons.forEach(neuron => {
            if (!neuron.isBias) {
                ret.push(neuron.output())
            }
        });
        return ret;

    }

    getResults(...inputs: number[]) {
        this.feedForward(...inputs)
        return this.getOutputs();
    }

    layer(i) {
        return this.layers[i];
    }
}

/**
 * Layer
 */
class Layer {
    public neurons: Neuron[] = [];
    constructor(neuronNumber: number, nextLayerNeuronNumber: number) {
        for (var iN = 0; iN < neuronNumber + 1; iN++) {
            // +1 for bias neuron, which is last
            if (iN < neuronNumber) {
                // Create normal neuron
                this.neurons.push(new Neuron(nextLayerNeuronNumber, iN, false));
            } else {
                this.neurons.push(new Neuron(nextLayerNeuronNumber, iN, true));
            }
        }
    }

    neuron(i) {
        return this.neurons[i];
    }

    bias() {
        return this.neurons[this.neurons.length - 1];
    }
}

/**
 * Neuron
 */
class Neuron {
    public connections: Connection[] = [];
    private outputVal: number;
    public delta: number;

    constructor(outputsTo: number, private index, public isBias = false) {

        // Creates connections
        for (var c = 0; c < outputsTo; c++) {
            this.connections.push(new Connection());
        }

        this.outputVal = isBias ? 1 : 0;

    }

    calcOutput(prevLayer: Layer) {

        // Only calcOutput when neuron is not a bias neuron

        if (!this.isBias) {
            var sum = 0;

            prevLayer.neurons.forEach(prevLayerNeuron => {
                sum += prevLayerNeuron.output() * prevLayerNeuron.getWeights(this.index).weight;
            });

            this.output(this.activationFunction(sum));
        }

    }

    private activationFunction(x) {

        //return Math.tanh(x);
        return 1 / (1 + Math.exp(-x))
        //return x;
    };

    private activationFunctionDerivative(x) {
        // Small approximation of tanh derivative
        //return 1 - x * x

        // Sigmoid
        var s = this.activationFunction(x);
        return s * (1 - s);

        // With general derivative formula where h = 1e-10
        /*var h = 0.0001;
        var dx = ((this.activationFunction(x + h) - this.activationFunction(x))/h)
        return dx;*/

        //return 1
    };

    // Backprop // Todo // Understand


    public calcOutputDelta(targetVal) {

        // Bias output neurons do not have delta error
        if (!this.isBias) {
            this.delta = targetVal - this.output();
        }
    }

    public calcHiddenDelta(nextLayer: Layer) {
        var sum = 0;

        // Go through all neurons of next layer excluding bias
        nextLayer.neurons.forEach((neuron, iN) => {
            if (!neuron.isBias) {
                sum += neuron.delta * this.getWeights(iN).weight;
            }
        });

        this.delta = sum;
    }

    public updateWeights(prevLayer: Layer, eta: number) {

        prevLayer.neurons.forEach((neuron, iN) => {
            var weight = neuron.getWeights(this.index).weight;
            var newWeight =
                weight + // old weight
                eta *   // learning weight
                this.delta * // delta error
                this.activationFunctionDerivative(neuron.output())
            neuron.getWeights(this.index).weight = newWeight;
        });


    }


    // Backprop end

    output(s?) {
        if (s && !this.isBias) {
            this.outputVal = s;
            return this.outputVal;
        } else {
            return this.outputVal;
        }
    }

    getWeights(i) {
        return this.connections[i];
    }

    setWeights(i, s) {
        return this.connections[i].weight = s;
    }
}

/**
 * Connection
 */
class Connection {
    public weight: number;
    public deltaWeight: number;

    constructor() {
        this.weight = Math.random();
        this.deltaWeight = 0;
    }
}

当仅针对一组数据进行训练时,它可以正常工作。 (例如here

import {Net} from './ml';

var myNet = new Net(2, 2, 2);


var weights = [
    [
        [0.15, 0.25],
        [0.20, 0.30],
        [0.35, 0.35]
    ],
    [
        [0.40, 0.50],
        [0.45, 0.55],
        [0.60, 0.60]
    ]
];

// Just loads the weights given in the example

myNet.loadWeights(weights)

var error = myNet.train([[0.05, 0.10, 0.01, 0.99]]);
console.log('Error: ', error);

console.log(myNet.getResults(0.05, 0.10));

控制台打印:

Error:  0.0000020735174706210714
[ 0.011556397089327321, 0.9886867357304885 ]

基本上,这很不错,对吧?

然后,我想教网络XOR问题:

import {Net} from './ml';

var myNet = new Net(2, 3, 1);


var trainigData = [
    [0, 0, 0],
    [1, 0, 1],
    [0, 1, 1],
    [1, 1, 0]
]

var error = myNet.train(trainigData)
console.log('Error: ', error);

console.log('Input: 0, 0: ', myNet.getResults(0, 0));
console.log('Input: 1, 0: ', myNet.getResults(1, 0));

此处网络失败:

Error:  0.2500007370167383
Input: 0, 0:  [ 0.5008584967899313 ]
Input: 1, 0:  [ 0.5008584967899313 ]

我做错了什么?

1 个答案:

答案 0 :(得分:0)

首先对整个批次执行梯度检查(对批次上的梯度函数进行计算),如果还没有这样做的话。这将确保您知道问题所在。

如果未正确计算渐变,考虑到您的实现适用于单个数据集,则很可能会在向后传递中混合使用某些值。

如果正确计算了渐变,则更新功能会出错。

可以在here

中找到javaScript中神经网络反向传播的工作实现

以下是使用backpropagation

的trainStep函数的代码段
    function trainStepBatch(details){
//we compute forward pass 
//for each training sample in the batch
//and stored in the batch array 
    var batch=[];
    var ks=[];
    for(var a=0;a<details.data.in.length;a++){
    var results=[];
    var k=1;
    results[0]={output:details.data.in[a]};
    for(var i=1;i<this.layers.length;i++){
        results[i]=layers[this.layers[i].type].evalForGrad(this.layers[i],results[i-1].output);
        k++;
    }
    batch[a]=results;
    ks[a]=k;
    }
//We compute the backward pass
//first derivative of the cost function given the output
    var grad=[];
    for(i in batch)grad[i]={grad:costs[details.cost].df(batch[i][ks[i]-1].output,details.data.out[i])};
//for each layer we compute the backwards pass
//on the results of all forward passes at a given layer
    for(var i=this.layers.length-1;i>0;i--){
    var grads=[];
    var test=true;
    for(a in batch){
        grads[a]=layers[this.layers[i].type].grad(this.layers[i],batch[a][i],batch[a][i-1],grad[a]);
        if(grads[a]==null)test=false;
        else grads[a].layer=i;
    }
//we perform the update
    if(test)stepBatch(this.layers[i].par,grads,details.stepSize);
    }
}

对于stepBatch函数

function stepBatch(params,grads, stepSize){
for(i in params.w){
    for(j in params.w[i]){
        for(a in grads){
            params.w[i][j]-=stepSize*grads[a].dw[i][j];
        }
    }
}
for(i in params.b){
    for(a in grads){
        params[a]-=stepSize*grads[a].db[i];
    }
}
function stepBatch(params,grads, stepSize){
    for(i in params.w){
        for(j in params.w[i]){
            for(a in grads){
                params.w[i][j]-=stepSize*grads[a].dw[i][j];
            }
        }
    }
    for(i in params.b){
        for(a in grads){
            params[a]-=stepSize*grads[a].db[i];
        }
    }
}