我试图让我的第一个神经网络工作,但无论我做什么,网络似乎都没有得到正确答案。
这是网络达到0.0001
的MSE后的输出0 XOR 0 = 0.0118003716248665
1 XOR 1 = 0.994320073237859
1 XOR 0 = 0.818618888320916
0 XOR 1 = 0.985995457430471
问题:这些答案不正确。
我创建了一个有2个输入,2个隐藏神经元和1个输出的网络,使用相同的数量解决了XOR问题,因此排除了这种可能性(我猜)。
作为旁注,我从另一个站点上找到的C#示例转换了此代码,C#代码执行并且工作正常,因此这很可能是某个逻辑错误或错误计算:/
现在,遗憾的是我完全无法找到导致错误的相关代码,因此我将不得不在此发布涉及网络的整个代码(抱歉)。
编辑:UpdateWeights()函数是Back传播,我想我会把它放在这里以防万一有人没有抓住它,其余的名字和东西都是可以理解的。
unit NeuralNetwork_u;
interface
uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls, ComCtrls, Math;
type TDoubleArray = array of Double;
type TDouble2DArray = array of TDoubleArray;
type TNeuralNetwork = class(TObject)
private
numInput, numHidden, numOutput : Integer;
inputs, hBiases, hSums, hOutputs, oBiases, oSums, Outputs, oGrads, hGrads, hPrevBiasesDelta, oPrevBiasesDelta : TDoubleArray;
ihWeights, hoWeights, ihPrevWeightsDelta, hoPrevWeightsDelta : TDouble2DArray;
public
constructor Create(NumInputs, NumHiddens, NumOutputs : Integer);
procedure SetWeights(weights : TDoubleArray);
function GetWeights : TDoubleArray;
function GetOutputs : TDoubleArray;
function ComputeOutputs( xvalues : TDoubleArray) : TDoubleArray;
function SigmoidFunction( X : Double) : Double;
function HyperTanFunction( X: Double) : Double;
procedure UpdateWeights( tValues : TDoubleArray ; learn, mom : Double);
function Train( TrainData : TDouble2DArray ; MaxEpochs : Integer ; LearningRate, Momentum, DesiredError : Double) : Double;
function WeightCount : Integer;
procedure Shuffle(Seq : array of Integer);
function MeanSquaredError(TrainData : TDouble2DArray) : Double;
end;
type THelper = class(TObject)
public
function MakeMatrix( Rows, Cols : Integer) : TDouble2DArray;
function Error(tValues, yValues : array of Double) : Double;
end;
implementation
uses NetworkInterface_u;
constructor TNeuralNetwork.Create(NumInputs, NumHiddens, NumOutputs : Integer);
var
Helper : THelper;
begin
Helper := THelper.Create;
numInput := NumInputs;
numHidden := NumHiddens;
numOutput := NumOutputs;
SetLength(inputs,numInput);
ihWeights := Helper.MakeMatrix(numInput, numHidden);
SetLength(hBiases,numHidden);
SetLength(hSums, numHidden);
SetLength(hOutputs, numHidden);
howeights := Helper.makeMatrix(numHidden, numOutput);
SetLength(oBiases,numOutput);
SetLength(oSums, numOutput);
SetLength(Outputs, numOutput);
SetLength(oGrads,numOutput);
SetLength(hGrads,numHidden);
ihPrevWeightsDelta := Helper.makeMatrix(numInput,numHidden);
SetLength(hPrevBiasesDelta,numHidden);
hoPrevWeightsDelta := Helper.makeMatrix(numHidden,numOutput);
SetLength(oPrevBiasesDelta,numOutput);
end;
unit NeuralNetwork_u;
interface
uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls, ComCtrls, Math;
type TDoubleArray = array of Double;
type TDouble2DArray = array of TDoubleArray;
type TNeuralNetwork = class(TObject)
private
numInput, numHidden, numOutput : Integer;
inputs, hBiases, hSums, hOutputs, oBiases, oSums, Outputs, oGrads, hGrads, hPrevBiasesDelta, oPrevBiasesDelta : TDoubleArray;
ihWeights, hoWeights, ihPrevWeightsDelta, hoPrevWeightsDelta : TDouble2DArray;
public
constructor Create(NumInputs, NumHiddens, NumOutputs : Integer);
procedure SetWeights(weights : TDoubleArray);
function GetWeights : TDoubleArray;
function GetOutputs : TDoubleArray;
function ComputeOutputs( xvalues : TDoubleArray) : TDoubleArray;
function SigmoidFunction( X : Double) : Double;
function HyperTanFunction( X: Double) : Double;
procedure UpdateWeights( tValues : TDoubleArray ; learn, mom : Double);
function Train( TrainData : TDouble2DArray ; MaxEpochs : Integer ; LearningRate, Momentum, DesiredError : Double) : Double;
function WeightCount : Integer;
procedure Shuffle( var Seq : array of Integer);
function MeanSquaredError(TrainData : TDouble2DArray) : Double;
end;
type THelper = class(TObject)
public
function MakeMatrix( Rows, Cols : Integer) : TDouble2DArray;
function Error(tValues, yValues : array of Double) : Double;
end;
implementation
uses NetworkInterface_u;
constructor TNeuralNetwork.Create(NumInputs, NumHiddens, NumOutputs : Integer);
var
Helper : THelper;
begin
Helper := THelper.Create;
numInput := NumInputs;
numHidden := NumHiddens;
numOutput := NumOutputs;
SetLength(inputs,numInput);
ihWeights := Helper.MakeMatrix(numInput, numHidden);
SetLength(hBiases,numHidden);
SetLength(hSums, numHidden);
SetLength(hOutputs, numHidden);
howeights := Helper.makeMatrix(numHidden, numOutput);
SetLength(oBiases,numOutput);
SetLength(oSums, numOutput);
SetLength(Outputs, numOutput);
SetLength(oGrads,numOutput);
SetLength(hGrads,numHidden);
ihPrevWeightsDelta := Helper.makeMatrix(numInput,numHidden);
SetLength(hPrevBiasesDelta,numHidden);
hoPrevWeightsDelta := Helper.makeMatrix(numHidden,numOutput);
SetLength(oPrevBiasesDelta,numOutput);
end;
procedure TNeuralNetwork.SetWeights(weights : TDoubleArray);
var
numWeights : Integer;
i, k, j : Integer;
begin
numWeights := (numInput * numHidden) + (numHidden * numOutput) + numHidden + numOutput;
if High(weights) <> numWeights then
begin
Raise Exception.Create('The Weights Array Length Does Not match The Total Number Of Weights And Biases - ' + IntToStr(numWeights));
end;
k := 0;
for i := 0 to numInput-1 do
begin
for j := 0 to numHidden-1 do
begin
ihWeights[i][j] := weights[k];
Inc(k);
end;
end;
for i := 0 to numHidden-1 do
begin
hBiases[i] := weights[k];
Inc(k);
end;
for i := 0 to numHidden-1 do
begin
for j := 0 to numOutput-1 do
begin
hoWeights[i][j] := weights[k];
Inc(k);
end;
end;
for i := 0 to numOutput-1 do
begin
oBiases[i] := weights[k];
Inc(k);
end;
end;
function TNeuralNetwork.GetWeights : TDoubleArray;
var
numWeights : Integer;
k, i, j : Integer;
begin
numWeights := (numInput * numHidden) + (numHidden * numOutput) + numHidden + numOutput;
SetLength(Result,numWeights);
k := 0;
for i := 0 to Length(ihWeights)-1 do
begin
for j := 0 to Length(ihWeights[0])-1 do
begin
Result[k] := ihWeights[i][j];
Inc(k);
end;
end;
for i := 0 to Length(hBiases)-1 do
begin
Result[k] := hBiases[i];
Inc(k);
end;
for i := 0 to Length(hoWeights)-1 do
begin
for j := 0 to Length(hoWeights[0])-1 do
begin
Result[k] := hoWeights[i][j] ;
Inc(k);
end;
end;
for i := 0 to Length(oBiases)-1 do
begin
Result[k] := oBiases[i];
Inc(k);
end;
end;
function TNeuralnetwork.GetOutputs : TDoubleArray;
begin
SetLength(Result, numOutput-1);
Result := Outputs;
end;
Function TNeuralNetwork.ComputeOutputs( xValues : TDoubleArray) : TDoubleArray;
var
i, j : Integer;
begin
if Length(xvalues) <> numInput then
begin
raise Exception.Create('Inputs Array Does Not Match Neural Network Inputs Count = Array ' + IntToStr(Length(xValues)) + ' Input Count ' + IntToStr(numInput));
end;
for i := 0 to numHidden-1 do
begin
hSums[i] := 0.0;
end;
for i := 0 to numOutput-1 do
begin
oSums[i] := 0.0;
end;
for i := 0 to Length(xValues)-1 do
begin
inputs[i] := xValues[i];
end;
for j := 0 to numHidden-1 do
begin
for i := 0 to numInput-1 do
begin
hSums[j] := hSums[j] + (inputs[i]*ihWeights[i][j]);
end;
end;
for i := 0 to numHidden-1 do
begin
hSums[i] := hSums[i] + hBiases[i];
end;
for i := 0 to numHidden-1 do
begin
hOutputs[i] := HyperTanFunction(hSums[i]);
end;
for j := 0 to numOutput-1 do
begin
for i := 0 to numHidden-1 do
begin
oSums[j] := oSums[j] + (hOutputs[i] * hoWeights[i][j]);
end;
end;
for i := 0 to numOutput-1 do
begin
oSums[i] := oSums[i] + oBiases[i];
end;
for i := 0 to numOutput-1 do
begin
Outputs[i] := HyperTanFunction(oSums[i]);
end;
Result := Outputs;
end;
function TNeuralNetwork.SigmoidFunction(X : Double) : Double;
begin
if x < -45.0 then
Result := 0
else if x > 45.0 then
Result := 1
else
Result := 1.0 / (1.0 + Exp(-x));
end;
function TNeuralNetwork.HyperTanFunction( X : Double) : Double;
begin
if x < -45.0 then
Result := -1
else if x > 45.0 then
Result := 1
else
Result := Tanh(X);
end;
procedure TNeuralNetwork.UpdateWeights(tValues : TDoubleArray ; learn, mom : Double);
var
i, j : Integer;
derivative, sum, delta,X : Double;
begin
if Length(tValues) <> numOutput then
begin
Raise Exception.Create('Target Values Not Same Length As Output = ' + IntToStr(Length(tValues)) + ' - Outputcount = ' + IntToStr(numOutput));
end;
for i := 0 to Length(oGrads)-1 do
begin
derivative := (1 - outputs[i]) * outputs[i];
oGrads[i] := derivative * (tValues[i] - outputs[i]);
end;
for i := 0 to Length(hGrads)-1 do
begin
derivative := (1 - hOutputs[i]) * (1 + hOutputs[i]);
sum := 0;
for j := 0 to numOutput-1 do
begin
X := oGrads[j] * hoWeights[i][j];
sum := sum + X;
end;
hGrads[i] := derivative * sum;
end;
for i := 0 to Length(ihWeights)-1 do
begin
for j := 0 to Length(ihWeights[0])-1 do
begin
delta := learn * hGrads[j] * inputs[i];
ihWeights[i][j] := ihWeights[i][j] + delta;
ihWeights[i][j] := ihWeights[i][j] + (mom * ihPrevWeightsDelta[i][j]);
ihPrevWeightsDelta[i][j] := delta;
end;
end;
for i := 0 to Length(hBiases)-1 do
begin
delta := learn * hGrads[i] * 1.0;
hBiases[i] := hBiases[i] + delta;
hBiases[i] := hBiases[i] + (mom * hPrevBiasesDelta[i]);
hPrevBiasesDelta[i] := delta;
end;
for i := 0 to Length(hoWeights)-1 do
begin
for j := 0 to Length(hoWeights[0])-1 do
begin
delta := learn * oGrads[j] * hOutputs[i];
hoWeights[i][j] := hoWeights[i][j] + delta;
hoWeights[i][j] := hoWeights[i][j] + (mom * hoPrevWeightsDelta[i][j]);
hoPrevWeightsDelta[i][j] := delta;
end;
end;
for i := 0 to Length(oBiases)-1 do
begin
delta := learn * oGrads[i] * 1.0;
oBiases[i] := oBiases[i] + delta;
oBiases[i] := oBiases[i] + (mom * oPrevBiasesDelta[i]);
oPrevBiasesDelta[i] := delta;
end;
end;
function TNeuralNetwork.Train( TrainData : TDouble2DArray ; MaxEpochs : Integer ; LearningRate, Momentum, DesiredError : Double) : Double;
var
Epoch, I, Idx, c : Integer;
xValues : TDoubleArray;
tValues : TDoubleArray;
Sequence : Array of Integer;
MeanSquaredErrorr : Double;
Helper : THelper;
begin
Epoch := 0;
SetLength(xValues,numInput);
SetLength(tValues,numOutput+1);
SetLength(Sequence,Length(TrainData));
for I := 0 to Length(Sequence)-1 do
Sequence[I] := I;
Shuffle(Sequence);
while Epoch < MaxEpochs do
begin
frmNetworkInterface.redTraining.Lines.Add('Current Epoch - ' + IntToStr(Epoch) + ' : error = ' + FloatToStr(MeanSquaredErrorr) + ' and Desired Error is = ' + FloatToStr(DesiredError));
Application.ProcessMessages;
MeanSquaredErrorr := MeanSquaredError(TrainData);
if MeanSquaredErrorr < DesiredError then
Break;
for I := 0 to Length(TrainData)-1 do
begin
Idx := Sequence[i];
xValues := Copy(TrainData[Idx],0,numInput);
tValues := Copy(TrainData[Idx],numInput,numOutput);
ComputeOutputs(xValues);
UpdateWeights(tValues,LearningRate,Momentum);
end;
Inc(Epoch);
Result := MeanSquaredErrorr;
end;
end;
procedure TNeuralNetwork.Shuffle( var Seq : array of Integer);
var
I, R, Tmp : Integer;
begin
for I := 0 to Length(Seq)-1 do
begin
R := RandomRange(i,Length(Seq));
Tmp := Seq[i];
Seq[R] := Seq[I];
Seq[I] := Tmp;
end;
end;
function TNeuralNetwork.MeanSquaredError(TrainData : TDouble2DArray) : Double;
var
sumSquaredError, err : Double;
xValues, tValues, yValues : TDoubleArray;
I, J : Integer;
begin
sumSquaredError := 0;
SetLength(xValues,numInput);
SetLength(tvalues,numOutput);
for I := 0 to Length(TrainData)-1 do
begin
xValues := Copy(TrainData[I],0,numInput);
tValues := Copy(TrainData[I],numInput,numOutput);
yValues := ComputeOutputs(xValues);
for J := 0 to numOutput-1 do
begin
err := tValues[j] - yValues[j];
sumSquaredError := sumSquaredError + (err * err);
end;
end;
Result := sumSquaredError / Length(TrainData);
end;
function TNeuralNetwork.WeightCount : Integer;
begin
Result := (numInput * numHidden) + (numHidden * numOutput) + numHidden + numOutput;
end;
function THelper.MakeMatrix(Rows, Cols : Integer) : TDouble2DArray;
begin
SetLength(Result,Rows,Cols);
end;
function THelper.Error(tValues : array of Double ; yValues : array of Double) : Double;
var
sum : Double;
i : Integer;
begin
sum := 0.0;
for i := 0 to High(tValues)-1 do
begin
sum := sum + ((tValues[i] - yValues[i]) * (tValues[i] - yValues[i]));
end;
Result := Sqrt(sum);
end;
end.
我现在已经通过这个代码近一百次了,没有找到答案,没有发现逻辑错误或错误计算,但是,正如我所知C#示例有效,这也应该。
编辑: 观察:在我看来,每当我传入的第二个值为1时,网络自动使输出方式太大(第二个输入所涉及的权重值对于我的口味来说太大了?),因此1 XOR 1是错误的,因为第二个值是1(参见上面的数据)。
编辑: 这是我刚刚运行的一个网络的初始权重(2个输入,2个隐藏,1个输出)
Initial Weight0 - 0.0372207039175555 Initial Weight1 - 0.01092082898831 Initial Weight2 - 0.0755334409791976 Initial Weight3 - 0.0866588755254634 Initial Weight4 - 0.0626101282471791 Initial Weight5 - 0.0365478269639425 Initial Weight6 - 0.0724486718699336 Initial Weight7 - 0.0320405319170095 Initial Weight8 - 0.0680674042692408
在132个时代之后(错误为0.001)
编辑:所以一个新的开发已经曝光,传递TrainingData时出现错误导致它识别1 XOR 1 = 1,但是,在修复此错误后,网络无法收集答案(运行100个网络,每个万个时期)我得到的最低MSE(均方误差)是Final Weight 0 = 0.432341693850932 Final Weight 1 = 0.338041456780997 Final Weight 2 = 1.0096817584107 Final Weight 3 = 0.839104863469981 Final Weight 4 = -0.275763414588823 Final Weight 5 = -0.171414938983027 Final Weight 6 = 1.26394969109634 Final Weight 7 = 0.998915778388676 Final Weight 8 = 0.549501870374428
当前纪元 - 9999:错误= 0.487600332892658,所需错误= 0.001
我记录了每个培训时期发送到网络的输入和输出,并确定它们现在都是正确的,所以现在看来网络似乎无法解决问题?
另外,我正在将代码更新为我最新版本。 (2015年8月26日)
此代码中有新内容:
修复了1而不是0的复制索引。
可以确认现在正确复制了输入和所需输出。
编辑:网络的MSE现在实际上正在增加,这是初始错误:
0.467486419821747,
并且在10000个时代之后,
0.487600332892658,
整体错误随着
而增加0.020113913070917
...这让我相信我的训练程序或UpdateWeights程序都有问题......
编辑:我做的另一个观察是,网络的均方误差在2.5上升(当运行一个疯狂的长训练会使它移动那么多)该死的MSE上升而不是下降?编辑:在培训期间对网络输出的另一种观察
当前时代 - 233:错误= 0.802251346201161并且所需错误= 0.0001
当前时代 - 234:错误= 1.24798705066641,所需错误= 0.0001
当前时代 - 235:错误= 2.47206076545025,所需错误= 0.0001
当前时代 - 236:错误= 2.49999999811955,所需错误= 0.0001
从1.24急剧跳升到2.49,网络显然在与训练或体重变化有关的功能上出现错误。
答案 0 :(得分:0)
我怀疑你的代码中的原始程度是以度为单位的(因为你使用像-45.0这样的常量)而Delphi是以弧度工作的,所以无论你需要什么,你需要45(PI / 4)等等。很可能在训练期间你在达到所需精度之前,时间已用完,因为函数可能超出范围。