我正在为神经网络创建一个库;我已成功实现了Back Propagation算法,但我遇到了Resilient Propagation问题。
我一直在使用XOR场景来测试我的实现;错误似乎在几个时期后减少,然后它增加并最终停止变化。
我正在测试的网络有3层--2个输入,2个隐藏和1个输出神经元/ s。突触/树突重量位于下一层的神经元上,例如,输入隐藏的权重将存储在隐藏的神经元上,隐藏的输出权重将存储在输出神经元上。
这是我的渐变计算(编辑4):
private void CalculateGradient()
{
for (int t = 0; t < TrainingInputs.Count; ++t) //loop through training data
{
Network.Run(TrainingInputs[t]);
for (int l = Network.Layers.Count - 1; l > 0; l--)
{
for (int i = 0; i < Network.Layers[l].Neurons.Count; i++)
{
Network.Layers[l].Neurons[i].Delta = l < Network.Layers.Count - 1
? CalculateNonLastGradient(l + 1, i, Network.Layers[l].Neurons[i].Value)
: CalculateLastGradient(TrainingOutputs[t][i], Network.Layers[l].Neurons[i].Value);
}
}
for (int l = Network.Layers.Count - 1; l > 0; l--)
{
for (int j = 0; j < Network.Layers[l].Neurons.Count; ++j)
{
double grad = Network.Layers[l].Neurons[j].Delta;
biasGrads[l][j] += grad;
for (int i = 0; i < Network.Layers[l - 1].Neurons.Count; ++i)
{
grad = Network.Layers[l].Neurons[j].Delta * Network.Layers[l - 1].Neurons[i].Value;
grads[l][j][i] += grad;
}
}
}
int o = 0;
Layer layer = Network.Layers[Network.Layers.Count - 1];
errors.Add(layer.Neurons.Sum(n => Math.Abs(TrainingOutputs[t][o++] - n.Value)));
}
Error = errors.Average();
}
private double CalculateLastGradient(double ideal, double nValue)
{
return Network.Activation.Derivitive(nValue) * (ideal - nValue);
}
private double CalculateNonLastGradient(int nextLayer, int j, double nValue)
{
double sum = 0.0;
for (int i = 0; i < Network.Layers[nextLayer].Neurons.Count; i++)
{
sum += Network.Layers[nextLayer].Neurons[i].Delta * Network.Layers[nextLayer].Neurons[i].Dendrites[j].Weight;
}
return Network.Activation.Derivitive(nValue) * sum;
}
我的RProp实施(编辑4):
public bool Algorithm()
{
//initialise matrices
if (!initializedMatrices)
{
InitializeMatrices();
}
ZeroOut();
//calculate gradients
CalculateGradient();
for (int l = 1; l < Network.Layers.Count; l++) //layers
{
for (int i = 0; i < Network.Layers[l - 1].Neurons.Count; ++i) //prev layer neurons
{
for (int j = 0; j < Network.Layers[l].Neurons.Count; ++j) //current layer neurons
{
double delta = prevDeltas[l][j][i];
int change = Math.Sign(prevGrads[l][j][i] * grads[l][j][i]);
if (change > 0)
{
delta = Math.Min(delta * etaPlus, deltaMax);
double deltaWeight = -Math.Sign(grads[l][j][i]) * delta;
Network.Layers[l].Neurons[j].Dendrites[i].Weight += deltaWeight;
}
else if (change < 0)
{
delta = Math.Max(delta * etaMinus, deltaMin);
Network.Layers[l].Neurons[j].Dendrites[i].Weight -= prevDeltas[l][j][i];
prevGrads[l][j][i] = 0;
}
else if (change == 0)
{
double deltaWeight = -Math.Sign(grads[l][j][i]) * delta;
Network.Layers[l].Neurons[j].Dendrites[i].Weight += deltaWeight;
}
prevGrads[l][j][i] = grads[l][j][i];
prevDeltas[l][j][i] = delta;
} //j
} //i
for (int i = 1; i < Network.Layers[l].Neurons.Count; ++i)
{
double delta = prevBiasDeltas[l][i];
int change = Math.Sign(prevBiasGrads[l][i] * biasGrads[l][i]);
if (change > 0)
{
delta = Math.Min(prevBiasDeltas[l][i] * etaPlus, deltaMax);
double biasDeltaWeight = -Math.Sign(biasGrads[l][i]) * delta;
Network.Layers[l].Neurons[i].Bias += biasDeltaWeight;
}
else if (change < 0)
{
delta = Math.Max(prevBiasDeltas[l][i] * etaMinus, deltaMin);
Network.Layers[l].Neurons[i].Bias -= prevBiasDeltas[l][i];
prevBiasGrads[l][i] = 0;
}
else if (change == 0)
{
double biasDeltaWeight = -Math.Sign(biasGrads[l][i]) * delta;
Network.Layers[l].Neurons[i].Bias += biasDeltaWeight;
}
prevBiasGrads[l][i] = biasGrads[l][i];
prevBiasDeltas[l][i] = delta;
}
}
return true;
}
有人能指出我可能出现的问题吗?
修改 问题似乎是三角洲没有变化。
编辑2: 我通过将每个调用前的第一个前一个增量初始化为0.01并对每个调用前的梯度进行归零来修正增量不变。误差增加得非常快,然后用TANH非常缓慢地减小;与Sigmoid相反。
编辑3: 偏差'的循环从0开始,当它应该是1.修复此问题修复了原始问题,但是在新问题上出现了 - 错误在某一点后停止下降。
编辑4:我意识到我没有在训练集上累积每个神经元的渐变。现在错误减少了,但速度很慢。