我建立了一个常规的ANN-BP设置,在输入和输出层有一个单元,在sigmoid中隐藏了4个节点。给它一个简单的任务来近似线性f(n) = n
,其中n在0-100范围内。
问题:无论层数,隐藏层中的单位数或者我是否在节点值中使用偏差,它都会学习近似f(n)=平均值(数据集),如下所示:< / p>
代码是用JavaScript编写的,作为概念证明。我定义了三个类:Net,Layer和Connection,其中Layer是输入,偏置和输出值的数组,Connection是权重和delta权重的2D数组。以下是所有重要计算发生的层代码:
Ann.Layer = function(nId, oNet, oConfig, bUseBias, aInitBiases) {
var _oThis = this;
var _initialize = function() {
_oThis.id = nId;
_oThis.length = oConfig.nodes;
_oThis.outputs = new Array(oConfig.nodes);
_oThis.inputs = new Array(oConfig.nodes);
_oThis.gradients = new Array(oConfig.nodes);
_oThis.biases = new Array(oConfig.nodes);
_oThis.outputs.fill(0);
_oThis.inputs.fill(0);
_oThis.biases.fill(0);
if (bUseBias) {
for (var n=0; n<oConfig.nodes; n++) {
_oThis.biases[n] = Ann.random(aInitBiases[0], aInitBiases[1]);
}
}
};
/****************** PUBLIC ******************/
this.id;
this.length;
this.inputs;
this.outputs;
this.gradients;
this.biases;
this.next;
this.previous;
this.inConnection;
this.outConnection;
this.isInput = function() { return !this.previous; }
this.isOutput = function() { return !this.next; }
this.calculateGradients = function(aTarget) {
var n, n1, nOutputError,
fDerivative = Ann.Activation.Derivative[oConfig.activation];
if (this.isOutput()) {
for (n=0; n<oConfig.nodes; n++) {
nOutputError = this.outputs[n] - aTarget[n];
this.gradients[n] = nOutputError * fDerivative(this.outputs[n]);
}
} else {
for (n=0; n<oConfig.nodes; n++) {
nOutputError = 0.0;
for (n1=0; n1<this.outConnection.weights[n].length; n1++) {
nOutputError += this.outConnection.weights[n][n1] * this.next.gradients[n1];
}
// console.log(this.id, nOutputError, this.outputs[n], fDerivative(this.outputs[n]));
this.gradients[n] = nOutputError * fDerivative(this.outputs[n]);
}
}
}
this.updateInputWeights = function() {
if (!this.isInput()) {
var nY,
nX,
nOldDeltaWeight,
nNewDeltaWeight;
for (nX=0; nX<this.previous.length; nX++) {
for (nY=0; nY<this.length; nY++) {
nOldDeltaWeight = this.inConnection.deltaWeights[nX][nY];
nNewDeltaWeight =
- oNet.learningRate
* this.previous.outputs[nX]
* this.gradients[nY]
// Add momentum, a fraction of old delta weight
+ oNet.learningMomentum
* nOldDeltaWeight;
if (nNewDeltaWeight == 0 && nOldDeltaWeight != 0) {
console.log('Double overflow');
}
this.inConnection.deltaWeights[nX][nY] = nNewDeltaWeight;
this.inConnection.weights[nX][nY] += nNewDeltaWeight;
}
}
}
}
this.updateInputBiases = function() {
if (bUseBias && !this.isInput()) {
var n,
nNewDeltaBias;
for (n=0; n<this.length; n++) {
nNewDeltaBias =
- oNet.learningRate
* this.gradients[n];
this.biases[n] += nNewDeltaBias;
}
}
}
this.feedForward = function(a) {
var fActivation = Ann.Activation[oConfig.activation];
this.inputs = a;
if (this.isInput()) {
this.outputs = this.inputs;
} else {
for (var n=0; n<a.length; n++) {
this.outputs[n] = fActivation(a[n] + this.biases[n]);
}
}
if (!this.isOutput()) {
this.outConnection.feedForward(this.outputs);
}
}
_initialize();
}
主feedForward和backProp函数的定义如下:
this.feedForward = function(a) {
this.layers[0].feedForward(a);
this.netError = 0;
}
this.backPropagate = function(aExample, aTarget) {
this.target = aTarget;
if (aExample.length != this.getInputCount()) { throw "Wrong input count in training data"; }
if (aTarget.length != this.getOutputCount()) { throw "Wrong output count in training data"; }
this.feedForward(aExample);
_calculateNetError(aTarget);
var oLayer = null,
nLast = this.layers.length-1,
n;
for (n=nLast; n>0; n--) {
if (n === nLast) {
this.layers[n].calculateGradients(aTarget);
} else {
this.layers[n].calculateGradients();
}
}
for (n=nLast; n>0; n--) {
this.layers[n].updateInputWeights();
this.layers[n].updateInputBiases();
}
}
连接代码非常简单:
Ann.Connection = function(oNet, oConfig, aInitWeights) {
var _oThis = this;
var _initialize = function() {
var nX, nY, nIn, nOut;
_oThis.from = oNet.layers[oConfig.from];
_oThis.to = oNet.layers[oConfig.to];
nIn = _oThis.from.length;
nOut = _oThis.to.length;
_oThis.weights = new Array(nIn);
_oThis.deltaWeights = new Array(nIn);
for (nX=0; nX<nIn; nX++) {
_oThis.weights[nX] = new Array(nOut);
_oThis.deltaWeights[nX] = new Array(nOut);
_oThis.deltaWeights[nX].fill(0);
for (nY=0; nY<nOut; nY++) {
_oThis.weights[nX][nY] = Ann.random(aInitWeights[0], aInitWeights[1]);
}
}
};
/****************** PUBLIC ******************/
this.weights;
this.deltaWeights;
this.from;
this.to;
this.feedForward = function(a) {
var n, nX, nY, aOut = new Array(this.to.length);
for (nY=0; nY<this.to.length; nY++) {
n = 0;
for (nX=0; nX<this.from.length; nX++) {
n += a[nX] * this.weights[nX][nY];
}
aOut[nY] = n;
}
this.to.feedForward(aOut);
}
_initialize();
}
我的激活函数和派生类的定义如下:
Ann.Activation = {
linear : function(n) { return n; },
sigma : function(n) { return 1.0 / (1.0 + Math.exp(-n)); },
tanh : function(n) { return Math.tanh(n); }
}
Ann.Activation.Derivative = {
linear : function(n) { return 1.0; },
sigma : function(n) { return n * (1.0 - n); },
tanh : function(n) { return 1.0 - n * n; }
}
网络的配置JSON如下:
var Config = {
id : "Config1",
learning_rate : 0.01,
learning_momentum : 0,
init_weight : [-1, 1],
init_bias : [-1, 1],
use_bias : false,
layers: [
{nodes : 1},
{nodes : 4, activation : "sigma"},
{nodes : 1, activation : "linear"}
],
connections: [
{from : 0, to : 1},
{from : 1, to : 2}
]
}
也许,您经验丰富的眼睛可以通过我的计算发现问题?
答案 0 :(得分:2)
我没有仔细查看代码(因为需要查看很多代码,以后需要花费更多时间,而且我不是100%熟悉javascript)。无论哪种方式,我相信斯蒂芬介绍了权重计算方式的一些变化,他的代码似乎给出了正确的结果,所以我建议看一下。
以下几点虽然不一定是关于计算的正确性,但可能仍然有帮助:
答案 1 :(得分:1)
首先......我真的很喜欢这段代码。我对NN(刚入门)知之甚少,所以请原谅我这里缺少的。
以下是我所做更改的摘要:
//updateInputWeights has this in the middle now:
nNewDeltaWeight =
oNet.learningRate
* this.gradients[nY]
/ this.previous.outputs[nX]
// Add momentum, a fraction of old delta weight
+ oNet.learningMomentum
* nOldDeltaWeight;
//updateInputWeights has this at the bottom now:
this.inConnection.deltaWeights[nX][nY] += nNewDeltaWeight; // += added
this.inConnection.weights[nX][nY] += nNewDeltaWeight;
// I modified the following:
_calculateNetError2 = function(aTarget) {
var oOutputLayer = _oThis.getOutputLayer(),
nOutputCount = oOutputLayer.length,
nError = 0.0,
nDelta = 0.0,
n;
for (n=0; n<nOutputCount; n++) {
nDelta = aTarget[n] - oOutputLayer.outputs[n];
nError += nDelta;
}
_oThis.netError = nError;
};
配置部分现在看起来像这样:
var Config = {
id : "Config1",
learning_rate : 0.001,
learning_momentum : 0.001,
init_weight : [-1.0, 1.0],
init_bias : [-1.0, 1.0],
use_bias : false,
/*
layers: [
{nodes : 1, activation : "linear"},
{nodes : 5, activation : "linear"},
{nodes : 1, activation : "linear"}
],
connections: [
{from : 0, to : 1}
,{from : 1, to : 2}
]
*/
layers: [
{nodes : 1, activation : "linear"},
{nodes : 2, activation : "linear"},
{nodes : 2, activation : "linear"},
{nodes : 2, activation : "linear"},
{nodes : 2, activation : "linear"},
{nodes : 1, activation : "linear"}
],
connections: [
{from : 0, to : 1}
,{from : 1, to : 2}
,{from : 2, to : 3}
,{from : 3, to : 4}
,{from : 4, to : 5}
]
}