我用JavaScript编写了一个神经网络,并实现了here描述的反向传播算法。 这是代码(typescript):
/**
* Net
*/
export class Net {
private layers: Layer[] = [];
private inputLayer: Layer;
private outputLayer: Layer;
public error: number = Infinity;
private eta: number = 0.15;
private alpha: number = 0.5;
constructor(...topology: number[]) {
topology.forEach((topologyLayer, iTL) => {
var nextLayerNeuronNumber = topology[iTL + 1] || 0;
this.layers.push(new Layer(topologyLayer, nextLayerNeuronNumber));
});
this.inputLayer = this.layers[0];
this.outputLayer = this.layers[this.layers.length - 1];
}
public loadWeights(weights) {
/*
[
[Layer
[Node weights, ..., ...]
]
]
*/
for (var iL = 0; iL < weights.length; iL++) {
var neuronWeights = weights[iL];
var layer = this.layers[iL];
for (var iN = 0; iN < neuronWeights.length; iN++) {
// Neuron
var connections = neuronWeights[iN];
for (var iC = 0; iC < connections.length; iC++) {
var connection = connections[iC];
this.layer(iL).neuron(iN).setWeights(iC, connection);
}
}
}
}
public train(data: number[][], iterartions = 2000) {
var inputs = this.inputLayer.neurons.length - 1;
for (var ite = 0; ite < iterartions; ite++) {
data.forEach(node => {
var inputData = [];
var outputData = [];
for (var i = 0; i < node.length; i++) {
if (i < inputs) {
inputData.push(node[i])
} else {
outputData.push(node[i])
}
}
this.feedForward(...inputData);
this.backProb(...outputData);
});
}
return this.calcDataError(data);
}
private calcDataError(data){
var overallDataErrorSum = 0;
var inputs = this.inputLayer.neurons.length - 1;
data.forEach(node => {
var outputData = node.splice(inputs);
var inputData = node;
this.feedForward(...inputData);
overallDataErrorSum += this.getNetError(outputData);
});
overallDataErrorSum /= data.length;
return overallDataErrorSum;
}
public saveWeights() {
// Ignore output layer
var ret = []
for (var iL = 0; iL < this.layers.length - 1; iL++) {
var layer = this.layers[iL];
var layer_ret = [];
layer.neurons.forEach(neuron => {
layer_ret.push(neuron.connections.map(c => c.weight));
});
ret.push(layer_ret);
}
return ret;
}
feedForward(...inputs: number[]) {
if (inputs.length != this.inputLayer.neurons.length - 1) return false;
this.inputLayer.neurons.forEach((neuron, i) => {
if (!neuron.isBias) {
neuron.output(inputs[i]);
}
});
this.layers.forEach((layer, i) => {
// Skip Input Layer
if (i > 0) {
var prevLayer = this.layers[i - 1]
layer.neurons.forEach(neuron => {
neuron.calcOutput(prevLayer);
});
}
});
}
public getNetError(targetVals) {
// Calc delta error of outputs
var deltas = [];
this.outputLayer.neurons.forEach((neuron, iN) => {
if (!neuron.isBias) {
neuron.calcOutputDelta(targetVals[iN]);
deltas.push(neuron.delta);
}
});
deltas = deltas.map(d => Math.pow(d, 2));
var sum = 0;
deltas.forEach(d => sum += d);
return sum / deltas.length;
}
backProb(...targetVals: number[]) {
// Calc delta error of outputs
this.outputLayer.neurons.forEach((neuron, iN) => {
if (!neuron.isBias) {
neuron.calcOutputDelta(targetVals[iN]);
}
});
// Backprop delta error through hidden layers
for (var iL = this.layers.length - 2; iL > 0; iL--) {
var layer = this.layers[iL];
var nextLayer = this.layers[iL + 1]
layer.neurons.forEach(neuron => {
neuron.calcHiddenDelta(nextLayer);
});
}
// Update weights
for (var iL = 1; iL < this.layers.length; iL++) {
var layer = this.layers[iL];
var prevLayer = this.layers[iL - 1];
layer.neurons.forEach(neuron => {
if (!neuron.isBias) {
neuron.updateWeights(prevLayer, this.eta);
}
});
}
this.error = this.getNetError(targetVals);
return this.error;
}
getOutputs(...inputs: number[]) {
var ret = [];
this.outputLayer.neurons.forEach(neuron => {
if (!neuron.isBias) {
ret.push(neuron.output())
}
});
return ret;
}
getResults(...inputs: number[]) {
this.feedForward(...inputs)
return this.getOutputs();
}
layer(i) {
return this.layers[i];
}
}
/**
* Layer
*/
class Layer {
public neurons: Neuron[] = [];
constructor(neuronNumber: number, nextLayerNeuronNumber: number) {
for (var iN = 0; iN < neuronNumber + 1; iN++) {
// +1 for bias neuron, which is last
if (iN < neuronNumber) {
// Create normal neuron
this.neurons.push(new Neuron(nextLayerNeuronNumber, iN, false));
} else {
this.neurons.push(new Neuron(nextLayerNeuronNumber, iN, true));
}
}
}
neuron(i) {
return this.neurons[i];
}
bias() {
return this.neurons[this.neurons.length - 1];
}
}
/**
* Neuron
*/
class Neuron {
public connections: Connection[] = [];
private outputVal: number;
public delta: number;
constructor(outputsTo: number, private index, public isBias = false) {
// Creates connections
for (var c = 0; c < outputsTo; c++) {
this.connections.push(new Connection());
}
this.outputVal = isBias ? 1 : 0;
}
calcOutput(prevLayer: Layer) {
// Only calcOutput when neuron is not a bias neuron
if (!this.isBias) {
var sum = 0;
prevLayer.neurons.forEach(prevLayerNeuron => {
sum += prevLayerNeuron.output() * prevLayerNeuron.getWeights(this.index).weight;
});
this.output(this.activationFunction(sum));
}
}
private activationFunction(x) {
//return Math.tanh(x);
return 1 / (1 + Math.exp(-x))
//return x;
};
private activationFunctionDerivative(x) {
// Small approximation of tanh derivative
//return 1 - x * x
// Sigmoid
var s = this.activationFunction(x);
return s * (1 - s);
// With general derivative formula where h = 1e-10
/*var h = 0.0001;
var dx = ((this.activationFunction(x + h) - this.activationFunction(x))/h)
return dx;*/
//return 1
};
// Backprop // Todo // Understand
public calcOutputDelta(targetVal) {
// Bias output neurons do not have delta error
if (!this.isBias) {
this.delta = targetVal - this.output();
}
}
public calcHiddenDelta(nextLayer: Layer) {
var sum = 0;
// Go through all neurons of next layer excluding bias
nextLayer.neurons.forEach((neuron, iN) => {
if (!neuron.isBias) {
sum += neuron.delta * this.getWeights(iN).weight;
}
});
this.delta = sum;
}
public updateWeights(prevLayer: Layer, eta: number) {
prevLayer.neurons.forEach((neuron, iN) => {
var weight = neuron.getWeights(this.index).weight;
var newWeight =
weight + // old weight
eta * // learning weight
this.delta * // delta error
this.activationFunctionDerivative(neuron.output())
neuron.getWeights(this.index).weight = newWeight;
});
}
// Backprop end
output(s?) {
if (s && !this.isBias) {
this.outputVal = s;
return this.outputVal;
} else {
return this.outputVal;
}
}
getWeights(i) {
return this.connections[i];
}
setWeights(i, s) {
return this.connections[i].weight = s;
}
}
/**
* Connection
*/
class Connection {
public weight: number;
public deltaWeight: number;
constructor() {
this.weight = Math.random();
this.deltaWeight = 0;
}
}
当仅针对一组数据进行训练时,它可以正常工作。 (例如here)
import {Net} from './ml';
var myNet = new Net(2, 2, 2);
var weights = [
[
[0.15, 0.25],
[0.20, 0.30],
[0.35, 0.35]
],
[
[0.40, 0.50],
[0.45, 0.55],
[0.60, 0.60]
]
];
// Just loads the weights given in the example
myNet.loadWeights(weights)
var error = myNet.train([[0.05, 0.10, 0.01, 0.99]]);
console.log('Error: ', error);
console.log(myNet.getResults(0.05, 0.10));
控制台打印:
Error: 0.0000020735174706210714
[ 0.011556397089327321, 0.9886867357304885 ]
基本上,这很不错,对吧?
然后,我想教网络XOR问题:
import {Net} from './ml';
var myNet = new Net(2, 3, 1);
var trainigData = [
[0, 0, 0],
[1, 0, 1],
[0, 1, 1],
[1, 1, 0]
]
var error = myNet.train(trainigData)
console.log('Error: ', error);
console.log('Input: 0, 0: ', myNet.getResults(0, 0));
console.log('Input: 1, 0: ', myNet.getResults(1, 0));
此处网络失败:
Error: 0.2500007370167383
Input: 0, 0: [ 0.5008584967899313 ]
Input: 1, 0: [ 0.5008584967899313 ]
我做错了什么?
答案 0 :(得分:0)
首先对整个批次执行梯度检查(对批次上的梯度函数进行计算),如果还没有这样做的话。这将确保您知道问题所在。
如果未正确计算渐变,考虑到您的实现适用于单个数据集,则很可能会在向后传递中混合使用某些值。
如果正确计算了渐变,则更新功能会出错。
可以在here
中找到javaScript中神经网络反向传播的工作实现以下是使用backpropagation
的trainStep函数的代码段 function trainStepBatch(details){
//we compute forward pass
//for each training sample in the batch
//and stored in the batch array
var batch=[];
var ks=[];
for(var a=0;a<details.data.in.length;a++){
var results=[];
var k=1;
results[0]={output:details.data.in[a]};
for(var i=1;i<this.layers.length;i++){
results[i]=layers[this.layers[i].type].evalForGrad(this.layers[i],results[i-1].output);
k++;
}
batch[a]=results;
ks[a]=k;
}
//We compute the backward pass
//first derivative of the cost function given the output
var grad=[];
for(i in batch)grad[i]={grad:costs[details.cost].df(batch[i][ks[i]-1].output,details.data.out[i])};
//for each layer we compute the backwards pass
//on the results of all forward passes at a given layer
for(var i=this.layers.length-1;i>0;i--){
var grads=[];
var test=true;
for(a in batch){
grads[a]=layers[this.layers[i].type].grad(this.layers[i],batch[a][i],batch[a][i-1],grad[a]);
if(grads[a]==null)test=false;
else grads[a].layer=i;
}
//we perform the update
if(test)stepBatch(this.layers[i].par,grads,details.stepSize);
}
}
对于stepBatch函数
function stepBatch(params,grads, stepSize){
for(i in params.w){
for(j in params.w[i]){
for(a in grads){
params.w[i][j]-=stepSize*grads[a].dw[i][j];
}
}
}
for(i in params.b){
for(a in grads){
params[a]-=stepSize*grads[a].db[i];
}
}
function stepBatch(params,grads, stepSize){
for(i in params.w){
for(j in params.w[i]){
for(a in grads){
params.w[i][j]-=stepSize*grads[a].dw[i][j];
}
}
}
for(i in params.b){
for(a in grads){
params[a]-=stepSize*grads[a].db[i];
}
}
}