我使用了第一个反向传播神经网络 this教程。我想教它简单的logick门:0& 0 = 1,0& 1 = 0,1& 0 = 0,1& 1 = 1。令人惊讶的是,它给了我奇怪的output,我根本不理解。我不知道为什么平均误差总是~0.5,为什么当output1好的情况变得糟糕以及什么使NaN输出。代码如下:
package NeuralNetPackage;
import java.util.*;
public class Main {
public static double N(double[] x, double[] w){ //neuron (input, weights{w1, w2, w3,...})
double l=0; //input
double y = 0; //modified output
double NMax = 1;
int i = 0;
while(i < x.length){ //calculating output
l += x[i] * w[i];
i++;
}
//if(Double.isNaN(l)==true){ l = 0.1;}
y = (2/(1 + Math.pow(Math.pow((1+l), 1/l), -NMax*l)))-1;
if(Math.abs(y)> 20) y = 200;
//System.out.println(l) ;
return y;
}
public static void main(String[] args){
int i = 0; //temporary variables
int z = 0;
int t = 0;
double j = 0;
int v = 0;
int[] toLearn = {0,0,1 ,1,0,0 ,0,1,0 ,1,1,1}; //truth table to learn
int ey1 =0; //expected output
double LRate = 0.1; //learn rate
int cx = 2; //input count
double y1 = 0; //input1
int[] NInLayer = {3, 2, 1}; //Neurons in every layer
int[] NInputs = {cx, NInLayer[0], NInLayer[1]}; //input count in every layer
int NCount = NInLayer[0] + NInLayer[1] + NInLayer[2]; //Neuron count
double[] w = new double[NInLayer[0] * NInputs[0] + NInLayer[1] * NInputs[1] + NInLayer[2] * NInputs[2]]; //Weights
double[] NOutputs = new double[cx + NCount]; //Neuron uotputs
double[] NErrors = new double[NCount]; //Neuron errors
double maxError = 0.1d; //max error of network
double error = 200d; //actual error
double avError = 200d; //average error
List<Double> errorTab = new ArrayList<Double>(); //array with errors to calulate average error
sc.close();
Random fW = new Random();
while(i < (NInLayer[0] * NInputs[0] + NInLayer[1] * NInputs[1] + NInLayer[2] * NInputs[2])){ //random weights on start
w[i] = fW.nextDouble()/2;
i++;
}
int c = 0;
while(avError > maxError){ //whole network in this loop
i = 0;
if(c >= 4) c = 0;
NOutputs[0] = toLearn[c*3]+1; //inputs from truth table
NOutputs[1] = toLearn[c*3+1]+1;
ey1 = toLearn[c*3+2];
while(i < NCount){
int u = (int)((i - 0.0001)/2); //layer in which specified neuron is(0-2)
z = 0;
double[] in = new double[NInputs[u]]; //inputs for specified neuron
double[] inW = new double[NInputs[u]]; //weights for specified neuron
while(z < NInputs[u]){ //specified neuron's inputs calculation
if(u == 0) in[z] = NOutputs[z];
if(u == 1) in[z] = NOutputs[z+cx];
if(u == 2) in[z] = NOutputs[z+cx + NInLayer[0]];
z++;
}
z = 0;
while(z < NInputs[u]){ //specified neuron's weights calculation
if(u==0){ inW[z] = w[(NInputs[u]*i)+z];} //System.out.println("Waga neuronu "+i + " = "+ w[(NInputs[u]*i)+z]);}
else if(u==1){ inW[z] = w[(NInputs[u]*i)-z];} //System.out.println("Waga neuronu "+i + " = "+ w[(NInputs[u]*i)-z]);}
else if(u==2){ inW[z] = w[((NInLayer[0] * NInputs[0] + NInLayer[1] * NInputs[1])) + (NInputs[u]*(i - (NInLayer[0] + NInLayer[1])))+z];} //System.out.println("Waga neuronu "+i + " = "+w[((NInLayer[0] * NInputs[0] + NInLayer[1] * NInputs[1])) + (NInputs[u]*(i - (NInLayer[0] + NInLayer[1])))+z]);}
z++;
}
NOutputs[i + cx] = N(in, inW); //saving output from specified neuron
i++;
}
y1 = NOutputs[NOutputs.length - 1]; //calculating output
error = ey1 - y1; //calculating error
if(error < 0) errorTab.add(error * -1);
else errorTab.add(error);
z = 0;
System.out.println("Actual error " + error);
j += errorTab.get(v);
if(v >= 5){avError = j/errorTab.size(); errorTab.clear(); v = -1; j = 0;} /calculating average error
i = 0;
while(i < NCount){ //calculating new weights
int u = (int)(((NCount - i - 1) - 0.0001)/2);
if(u == 2) NErrors[NCount - i - 1] = error; //for last layer
else{ //for rest of layers
int h = 0;
double v2 = 0;
while(h < NInLayer[u+1]){
v2 += NErrors[NCount - (NInLayer[u+1] - h)]*w[((NCount - i)+6)+(h*3)];
h++;
}
NErrors[NCount - i - 1] = v2;
}
//System.out.println("Błąd neuronu " + (NCount - i) +" = "+ NErrors[NCount - i -1]);
i++;
}
i = 0;
while(i < NCount){ //saving new weights(backpropagation)
int h = 0;
int u = (int)((i - 0.0001)/2);
while(h < NInputs[u]){
if(u==0) w[(NInputs[u]*i)+h] += LRate * NErrors[i] * ((2*Math.exp(NOutputs[h])/Math.sqrt(Math.exp(NOutputs[h])+1)));
else if(u==1) w[(NInputs[u]*i)-h] += LRate * NErrors[i] * ((2*Math.exp(NOutputs[NInLayer[0]-1 + h])/Math.sqrt(Math.exp(NOutputs[NInLayer[0]-1 + h])+1)));
else if(u==2) w[((NInLayer[0] * NInputs[0] + NInLayer[1] * NInputs[1])) + (NInputs[u]*(i - (NInLayer[0] + NInLayer[1])))+h] += LRate * NErrors[i] * ((2*Math.exp(NOutputs[NInLayer[0]+NInLayer[1]+ h])/Math.sqrt(Math.exp(NOutputs[NInLayer[0]+NInLayer[1]+ h])+1)));
h++;
}
i++;
}
System.out.println("Input1 = "+(NOutputs[0]-1));
System.out.println("Input2 = "+(NOutputs[1]-1));
System.out.println("Output1 = "+y1);
System.out.println("Expected_Output1 = "+ey1);
System.out.println("Average error is " + avError);
System.out.println("Run_count " + (t+1));
System.out.println("*********************************************");
t++;
v++;
c++;
if(t >= 20000) System.exit(0); //stop app if it is working too long
}
}
}
我无法发现我的代码有什么问题。我真的需要你的帮助。谢谢。