我是这个网站的新手,所以如果我在这篇文章中做任何错误的事,我会提前道歉。
我目前正在尝试机器学习,并且正在学习神经网络。我当前正在使用http://neuralnetworksanddeeplearning.com/。但是,我并没有完全理解所有内容,并且所有代码都是用Python编写的(我更喜欢JavaScript)。
我创建了一个适用于简单数据的程序。但是,对于更复杂的数据(使用MNIST数据进行手写数字识别),通过使用784个输入神经元的神经网络,其中的10-400个隐藏神经元的准确率将不如上面的网站所说的那样高。隐藏层(仅一个隐藏层,并尝试了几种可能数量的神经元),以及10个具有数百次迭代的输出神经元。我认为我的反向传播步骤有一个错误(即训练步骤,我在这里包括其他功能作为参考)阻止了它的足够快的学习(顺便说一句,我使用交叉熵作为我的成本)功能)。如果有人可以帮助我找到错误,我将非常感谢。预先感谢。
下面是代码。权重以阵列的阵列的形式排列({weight[i][j][k]
是第i层中的第j个神经元与第(i + 1)层中的第k个神经元之间的权重)。类似地,bias[i][j]
是第j个神经元的第(i + 1)层的偏差。训练数据被格式化为带有输入和输出键的对象数组(请参见下面的示例)。
class NeuralNetwork {
constructor(layers) {
// Check if layers is a valid argument
// Initialize neural network
if (!Array.isArray(layers) || layers.length < 2) {
throw Error("Layers must be specified as an array of length at least 2");
}
this.weights = [];
this.biases = [];
for (let i = 0, l = layers.length; i < l; ++i) {
let currentLayer = layers[i];
if (typeof currentLayer === "number" && Number.isInteger(currentLayer) && currentLayer > 0) {
let numWeights = layers[i + 1];
if (i < l - 1) {
this.weights.push([]);
}
if (i) {
this.biases.push([]);
}
// Seed weights and biases
for (let j = 0; j < currentLayer; ++j) {
if (i < l - 1) {
let weights = [];
for (let k = 0; k < numWeights; ++k) {
weights.push(Math.random() * 2 - 1);
}
this.weights[i].push(weights);
}
if (i) {
this.biases[i - 1].push(Math.random() * 2 - 1);
}
}
} else {
throw Error("Array used to specify NeuralNetwork layers must consist solely of positive integers");
}
}
this.activation = (x) => 1 / (1 + Math.exp(-x));
this.activationDerivative = (x) => this.activation(x) * (1 - this.activation(x));
Object.freeze(this);
console.log("Successfully initialized NeuralNetwork");
return this;
}
run(input, training) {
// Forward propagation
let currentInput;
if (training) {
currentInput = [input.map((a) => {return {before: a, after: a}})];
} else {
currentInput = [...input];
}
for (let i = 0, l = this.weights.length; i < l; ++i) {
let newInput = [];
for (let j = 0, m = this.weights[i][0].length, n = (training ? currentInput[i] : currentInput).length; j < m; ++j) {
let sum = this.biases[i][j];
for (let k = 0; k < n; ++k) {
sum += (training ? currentInput[i][k].after : currentInput[k]) * this.weights[i][k][j];
}
if (training) {
newInput.push({
before: sum,
after: this.activation(sum)
});
} else {
newInput.push(this.activation(sum));
}
}
if (training) {
currentInput.push(newInput);
} else {
currentInput = newInput;
}
}
return currentInput;
}
train(data, learningRate = 0.1, batch = 50, iterations = 10000) {
// Backward propagation
console.log("Initialized training");
let length = data.length,
totalCost = 0,
learningRateFunction = typeof learningRate === "function",
batchCount = 0,
weightChanges = [],
biasChanges = [];
for (let i = 0; i < iterations; ++i) {
let rate = learningRateFunction ? learningRate(i, totalCost) : learningRate;
totalCost = 0;
for (let j = 0, l = length; j < l; ++j) {
let currentData = data[j],
result = this.run(currentData.input, true),
outputLayer = result[result.length - 1],
outputLayerError = [],
errors = [];
for (let k = 0, m = outputLayer.length; k < m; ++k) {
let currentOutputNeuron = outputLayer[k];
outputLayerError.push(currentOutputNeuron.after - currentData.output[k]);
totalCost -= Math.log(currentOutputNeuron.after) * currentData.output[k] + Math.log(1 - currentOutputNeuron.after) * (1 - currentData.output[k]);
}
errors.push(outputLayerError);
for (let k = result.length - 1; k > 1; --k) {
let previousErrors = errors[0],
newErrors = [],
currentLayerWeights = this.weights[k - 1],
previousResult = result[k - 1];
for (let i = 0, n = currentLayerWeights.length; i < n; ++i) {
let sum = 0,
currentNeuronWeights = currentLayerWeights[i];
for (let j = 0, o = currentNeuronWeights.length; j < o; ++j) {
sum += currentNeuronWeights[j] * previousErrors[j];
}
newErrors.push(sum * this.activationDerivative(previousResult[i].before));
}
errors.unshift(newErrors);
}
for (let k = 0, n = this.biases.length; k < n; ++k) {
if (!weightChanges[k]) weightChanges[k] = [];
if (!biasChanges[k]) biasChanges[k] = [];
let currentLayerWeights = this.weights[k],
currentLayerBiases = this.biases[k],
currentLayerErrors = errors[k],
currentLayerResults = result[k],
currentLayerWeightChanges = weightChanges[k],
currentLayerBiasChanges = biasChanges[k];
for (let i = 0, o = currentLayerBiases.length; i < o; ++i) {
let change = rate * currentLayerErrors[i];
for (let j = 0, p = currentLayerWeights.length; j < p; ++j) {
if (!currentLayerWeightChanges[j]) currentLayerWeightChanges[j] = [];
currentLayerWeightChanges[j][i] = (currentLayerWeightChanges[j][i] || 0) - change * currentLayerResults[j].after;
}
currentLayerBiasChanges[i] = (currentLayerBiasChanges[i] || 0) - change;
}
}
++batchCount;
if (batchCount % batch === 0 || i === iterations - 1 && j === l - 1) {
for (let k = 0, n = this.weights.length; k < n; ++k) {
let currentLayerWeights = this.weights[k],
currentLayerBiases = this.biases[k],
currentLayerWeightChanges = weightChanges[k],
currentLayerBiasChanges = biasChanges[k];
for (let i = 0, o = currentLayerWeights.length; i < o; ++i) {
let currentNeuronWeights = currentLayerWeights[i],
currentNeuronWeightChanges = currentLayerWeightChanges[i];
for (let j = 0, p = currentNeuronWeights.length; j < p; ++j) {
currentNeuronWeights[j] += currentNeuronWeightChanges[j] / batch;
}
currentLayerBiases[i] += currentLayerBiasChanges[i] / batch;
}
}
weightChanges = [];
biasChanges = [];
}
}
totalCost /= length;
}
console.log(`Training ended due to iterations reached\nIterations: ${iterations} times\nTime spent: ${(new Date).getTime() - startTime} ms`);
return this;
}
}
示例
测试点是否在圆内。对于此示例,神经网络表现良好。但是,对于诸如手写识别之类的更复杂的示例,神经网络的性能确实很差(即使使用相似的参数,相对于网站上指出的96%的准确度,单个神经网络的最佳准确度还是70%)。 / p>
let trainingData = [];
for (let i = 0; i < 1000; ++i) {
let [x, y] = [Math.random(), Math.random()];
trainingData.push({input: [x, y], output: [Number(Math.hypot(x,y) < 1)]});
}
let brain = new NeuralNetwork([2, 5, 5, 1]);
brain.train(trainingData.slice(0,700), 0.1, 10, 500); // Accuracy rate 95.33% on the remaining 300 entries in trainingData
答案 0 :(得分:0)
好的,我想我要回答我自己的问题。因此,我认为我的代码中没有错误,如果有人愿意的话,使用它也很好(尽管确实非常低效)。
我对MNIST数据的运行无法给出准确答案的原因是由于我一开始没有处理数据。原始数据给出了[0,255]范围内28 * 28像素的暗度,我直接将其用作每个训练数据的输入。正确的步骤是将其转换为[0,1]或[-1,1]的范围。
[0,255]范围不能正常工作的原因是由于第二个神经元隐藏层将收到真正的正输入或负输入。
当反向传播算法计算梯度时,为每个权重计算的变化将非常小,因为它与神经元输入端的激活函数的斜率成比例(对数函数的导数为exp(-x )/(1 + exp(-x)),对于x的正负值,该值接近0。因此,神经网络将花费很长时间进行训练,而就我而言,它无法很好地学习数据。
使用正确的方法,我可以在相当短的时间内对784 * 200 * 10神经网络实现90%左右的准确性,尽管它仍然不如作者所说的那样准确。在问题中提到的链接中使用甚至更简单的算法。