ANN回归,线性函数逼近

时间:2016-11-08 16:21:42

标签: machine-learning neural-network artificial-intelligence regression conv-neural-network

我建立了一个常规的ANN-BP设置,在输入和输出层有一个单元,在sigmoid中隐藏了4个节点。给它一个简单的任务来近似线性f(n) = n,其中n在0-100范围内。

问题:无论层数,隐藏层中的单位数或者我是否在节点值中使用偏差,它都会学习近似f(n)=平均值(数据集),如下所示:< / p>

enter image description here

代码是用JavaScript编写的,作为概念证明。我定义了三个类:Net,Layer和Connection,其中Layer是输入,偏置和输出值的数组,Connection是权重和delta权重的2D数组。以下是所有重要计算发生的层代码:

Ann.Layer = function(nId, oNet, oConfig, bUseBias, aInitBiases) {
var _oThis = this;

var _initialize = function() {
        _oThis.id        = nId;
        _oThis.length    = oConfig.nodes;
        _oThis.outputs   = new Array(oConfig.nodes);
        _oThis.inputs    = new Array(oConfig.nodes);
        _oThis.gradients = new Array(oConfig.nodes);
        _oThis.biases    = new Array(oConfig.nodes);

        _oThis.outputs.fill(0);
        _oThis.inputs.fill(0);
        _oThis.biases.fill(0);

        if (bUseBias) {
            for (var n=0; n<oConfig.nodes; n++) {
                _oThis.biases[n] = Ann.random(aInitBiases[0], aInitBiases[1]);
            }
        }
    };

/****************** PUBLIC ******************/

this.id;
this.length;
this.inputs;
this.outputs;
this.gradients;
this.biases;
this.next;
this.previous;

this.inConnection;
this.outConnection;

this.isInput  = function() { return !this.previous;     }
this.isOutput = function() { return !this.next;         }

this.calculateGradients = function(aTarget) {
    var n, n1, nOutputError,
        fDerivative = Ann.Activation.Derivative[oConfig.activation];

    if (this.isOutput()) {
        for (n=0; n<oConfig.nodes; n++) {
            nOutputError = this.outputs[n] - aTarget[n];
            this.gradients[n] = nOutputError * fDerivative(this.outputs[n]);
        }
    } else {
        for (n=0; n<oConfig.nodes; n++) {
            nOutputError = 0.0;
            for (n1=0; n1<this.outConnection.weights[n].length; n1++) {
                nOutputError += this.outConnection.weights[n][n1] * this.next.gradients[n1];
            }
            // console.log(this.id, nOutputError, this.outputs[n], fDerivative(this.outputs[n]));
            this.gradients[n] = nOutputError * fDerivative(this.outputs[n]);
        }
    }
}

this.updateInputWeights = function() {
    if (!this.isInput()) {
        var nY,
            nX,
            nOldDeltaWeight,
            nNewDeltaWeight;

        for (nX=0; nX<this.previous.length; nX++) {
            for (nY=0; nY<this.length; nY++) {
                nOldDeltaWeight = this.inConnection.deltaWeights[nX][nY];
                nNewDeltaWeight =
                    - oNet.learningRate
                    * this.previous.outputs[nX]
                    * this.gradients[nY]
                    // Add momentum, a fraction of old delta weight
                    + oNet.learningMomentum
                    * nOldDeltaWeight;

                if (nNewDeltaWeight == 0 && nOldDeltaWeight != 0) {
                    console.log('Double overflow');
                }

                this.inConnection.deltaWeights[nX][nY] = nNewDeltaWeight;
                this.inConnection.weights[nX][nY]     += nNewDeltaWeight;
            }
        }
    }
}

this.updateInputBiases = function() {
    if (bUseBias && !this.isInput()) {
        var n,
            nNewDeltaBias;

        for (n=0; n<this.length; n++) {
            nNewDeltaBias = 
                - oNet.learningRate
                * this.gradients[n];

            this.biases[n] += nNewDeltaBias;
        }
    }
}

this.feedForward = function(a) {
    var fActivation = Ann.Activation[oConfig.activation];

    this.inputs = a;

    if (this.isInput()) {
        this.outputs = this.inputs;
    } else {
        for (var n=0; n<a.length; n++) {
            this.outputs[n] = fActivation(a[n] + this.biases[n]);
        }
    }
    if (!this.isOutput()) {
        this.outConnection.feedForward(this.outputs);
    }
}

_initialize();
}

主feedForward和backProp函数的定义如下:

this.feedForward = function(a) {
    this.layers[0].feedForward(a);
    this.netError = 0;
}

this.backPropagate = function(aExample, aTarget) {
    this.target = aTarget;

    if (aExample.length != this.getInputCount())  { throw "Wrong input count in training data"; }
    if (aTarget.length  != this.getOutputCount()) { throw "Wrong output count in training data"; }

    this.feedForward(aExample);
    _calculateNetError(aTarget);

    var oLayer = null,
        nLast  = this.layers.length-1,
        n;

    for (n=nLast; n>0; n--) {
        if (n === nLast) {
            this.layers[n].calculateGradients(aTarget);
        } else {
            this.layers[n].calculateGradients();
        }
    }

    for (n=nLast; n>0; n--) {
        this.layers[n].updateInputWeights();
        this.layers[n].updateInputBiases();
    }
}

连接代码非常简单:

Ann.Connection = function(oNet, oConfig, aInitWeights) {
var _oThis = this;

var _initialize = function() {
        var nX, nY, nIn, nOut;

        _oThis.from = oNet.layers[oConfig.from];
        _oThis.to   = oNet.layers[oConfig.to];

        nIn  = _oThis.from.length;
        nOut = _oThis.to.length;

        _oThis.weights      = new Array(nIn);
        _oThis.deltaWeights = new Array(nIn);

        for (nX=0; nX<nIn; nX++) {
            _oThis.weights[nX]      = new Array(nOut);
            _oThis.deltaWeights[nX] = new Array(nOut);
            _oThis.deltaWeights[nX].fill(0);
            for (nY=0; nY<nOut; nY++) {
                _oThis.weights[nX][nY] = Ann.random(aInitWeights[0], aInitWeights[1]);
            }
        }
    };

/****************** PUBLIC ******************/

this.weights;
this.deltaWeights;
this.from;
this.to;

this.feedForward = function(a) {
    var n, nX, nY, aOut = new Array(this.to.length);

    for (nY=0; nY<this.to.length; nY++) {
        n = 0;
        for (nX=0; nX<this.from.length; nX++) {
            n += a[nX] * this.weights[nX][nY];
        }
        aOut[nY] = n;
    }

    this.to.feedForward(aOut);
}

_initialize();
}

我的激活函数和派生类的定义如下:

Ann.Activation = {
    linear : function(n) { return n; },
    sigma  : function(n) { return 1.0 / (1.0 + Math.exp(-n)); },
    tanh   : function(n) { return Math.tanh(n); }
}

Ann.Activation.Derivative = {
    linear : function(n) { return 1.0; },
    sigma  : function(n) { return n * (1.0 - n); },
    tanh   : function(n) { return 1.0 - n * n; }
}

网络的配置JSON如下:

var Config = {
    id : "Config1",

    learning_rate     : 0.01,
    learning_momentum : 0,
    init_weight       : [-1, 1],
    init_bias         : [-1, 1],
    use_bias          : false,

    layers: [
        {nodes : 1},
        {nodes : 4, activation : "sigma"},
        {nodes : 1, activation : "linear"}
    ],

    connections: [
        {from : 0, to : 1},
        {from : 1, to : 2}
    ]
}

也许,您经验丰富的眼睛可以通过我的计算发现问题?

See example in JSFiddle

2 个答案:

答案 0 :(得分:2)

我没有仔细查看代码(因为需要查看很多代码,以后需要花费更多时间,而且我不是100%熟悉javascript)。无论哪种方式,我相信斯蒂芬介绍了权重计算方式的一些变化,他的代码似乎给出了正确的结果,所以我建议看一下。

以下几点虽然不一定是关于计算的正确性,但可能仍然有帮助:

  • 您展示了多少培训网络示例?您是否多次显示相同的输入?您应该多次显示您拥有的每个示例(输入);显示每个例子只有一次对于基于梯度下降的算法来说是不够的,因为它们每次只能在正确的方向上移动一点点。您的所有代码都可能是正确的,但您只需要花费更多时间进行培训。
  • 像斯蒂芬那样引入更多隐藏层可能有助于加速训练,或者可能有害。这通常是您想要针对特定​​情况进行试验的内容。但是,对于这个简单的问题,它绝对不是必需的。我怀疑你的配置和Stephen的配置之间的一个更重要的区别可能是隐藏层中使用的激活功能。您使用了sigmoid,这意味着所有输入值在隐藏层中被压扁到1.0以下,然后您需要非常大的权重将这些数字转换回所需的输出(可以达到值100)。 Stephen对所有层使用了线性激活函数,在这种特定情况下,由于您实际上正在尝试学习线性函数,因此可能使训练更容易。在许多其他情况下,希望引入非线性。
  • 将输入和所需输出转换(标准化)为[0,1]而不是[0,100]可能是有益的。这将使你的sigmoid层更有可能产生良好的结果(虽然我仍然不确定它是否足够,因为你仍然在你打算学习线性的情况下引入非线性功能,你可能需要更多的隐藏节点来纠正它)。在现实世界中&#39;&#39;&#39;在具有多个不同输入变量的情况下,通常也会这样做,因为它确保所有输入变量最初都被视为同等重要。您可以随时执行预处理步骤,将输入规范化为[0,1],将其作为网络输入,训练以在[0,1]中生成输出,然后添加后处理步骤,在此处转换输出回到原来的范围。

答案 1 :(得分:1)

首先......我真的很喜欢这段代码。我对NN(刚入门)知之甚少,所以请原谅我这里缺少的。

以下是我所做更改的摘要:

//updateInputWeights has this in the middle now:

nNewDeltaWeight =
oNet.learningRate
* this.gradients[nY] 
/ this.previous.outputs[nX]
// Add momentum, a fraction of old delta weight
+ oNet.learningMomentum
* nOldDeltaWeight;


//updateInputWeights has this at the bottom now:

this.inConnection.deltaWeights[nX][nY] += nNewDeltaWeight; // += added
this.inConnection.weights[nX][nY]      += nNewDeltaWeight;

// I modified the following:

	_calculateNetError2 = function(aTarget) {
		var oOutputLayer = _oThis.getOutputLayer(),
			nOutputCount = oOutputLayer.length,
			nError = 0.0,
			nDelta = 0.0,
			n;

		for (n=0; n<nOutputCount; n++) {
			nDelta = aTarget[n] - oOutputLayer.outputs[n];
			nError += nDelta;
		}

		_oThis.netError = nError;
	};

配置部分现在看起来像这样:

var Config = {
id : "Config1",

learning_rate     : 0.001,
learning_momentum : 0.001,
init_weight       : [-1.0, 1.0],
init_bias         : [-1.0, 1.0],
use_bias          : false,

/*
layers: [
	{nodes : 1, activation : "linear"},
	{nodes : 5, activation : "linear"},
	{nodes : 1, activation : "linear"}
],

connections: [
	{from : 0, to : 1}
	,{from : 1, to : 2}
]
*/


layers: [
	{nodes : 1, activation : "linear"},
	{nodes : 2, activation : "linear"},
	{nodes : 2, activation : "linear"},
	{nodes : 2, activation : "linear"},
	{nodes : 2, activation : "linear"},
	{nodes : 1, activation : "linear"}
],

connections: [
	 {from : 0, to : 1}
	,{from : 1, to : 2}
	,{from : 2, to : 3}
	,{from : 3, to : 4}
	,{from : 4, to : 5}
]

}

These were my resulting images: