在训练神经网络识别数字时,我遇到了来自sigmoid函数的NaN结果,并且使用的数据类型是“double”。 我的问题是:如果我使用“长”数据类型,它会解决问题吗?或者我应该改变激活功能?
网络结构: 800个节点用于输入 隐藏层中有700个节点 输出层中的10个节点
BackPropagation类:
public class BackPropagation extends Thread implements Serializable
{
private static final String TAG = "NetworkMessage";
private static final long serialVersionUID = -8862858027413741101L;
private double OverallError;
// The minimum Error Function defined by the user
private double MinimumError;
// The user-defined expected output pattern for a set of samples
private double ExpectedOutput[][];
// The user-defined input pattern for a set of samples
private double Input[][];
// User defined learning rate - used for updating the network weights
private double LearningRate;
// Users defined momentum - used for updating the network weights
private double Momentum;
// Number of layers in the network
private int NumberOfLayers;
// Number of training sets
private int NumberOfSamples;
// Current training set/sample that is used to train network
private int SampleNumber;
// Maximum number of Epochs before the traing stops training
private long MaximumNumberOfIterations;
// Public Variables
public LAYER Layer[];
public double ActualOutput[][];
long delay = 0;
boolean die = false;
// Calculate the node activations
public void FeedForward()
{
int i,j;
// Since no weights contribute to the output
// vector from the input layer,
// assign the input vector from the input layer
// to all the node in the first hidden layer
for (i = 0; i < Layer[0].Node.length; i++)
Layer[0].Node[i].Output = Layer[0].Input[i];
Layer[1].Input = Layer[0].Input;
for (i = 1; i < NumberOfLayers; i++)
{
Layer[i].FeedForward();
// Unless we have reached the last layer, assign the layer i's //output vector
// to the (i+1) layer's input vector
if (i != NumberOfLayers-1)
Layer[i+1].Input = Layer[i].OutputVector();
}
}
// FeedForward()
// Back propagated the network outputy error through
// the network to update the weight values
public void UpdateWeights()
{
CalculateSignalErrors();
BackPropagateError();
}
private void CalculateSignalErrors()
{
int i,j,k,OutputLayer;
double Sum;
OutputLayer = NumberOfLayers-1;
// Calculate all output signal error
for (i = 0; i < Layer[OutputLayer].Node.length; i++)
{
Layer[OutputLayer].Node[i].SignalError =
(ExpectedOutput[SampleNumber][i] -Layer[OutputLayer].Node[i].Output) *
Layer[OutputLayer].Node[i].Output *
(1-Layer[OutputLayer].Node[i].Output);
}
// Calculate signal error for all nodes in the hidden layer
// (back propagate the errors
for (i = NumberOfLayers-2; i > 0; i--)
{
for (j = 0; j < Layer[i].Node.length; j++)
{
Sum = 0;
for (k = 0; k < Layer[i+1].Node.length; k++)
Sum = Sum + Layer[i+1].Node[k].Weight[j] *
Layer[i+1].Node[k].SignalError;
Layer[i].Node[j].SignalError = Layer[i].Node[j].Output*(1 -
Layer[i].Node[j].Output)*Sum;
}
}
}
private void BackPropagateError()
{
int i,j,k;
// Update Weights
for (i = NumberOfLayers-1; i > 0; i--)
{
for (j = 0; j < Layer[i].Node.length; j++)
{
// Calculate Bias weight difference to node j
Layer[i].Node[j].ThresholdDiff = LearningRate *
Layer[i].Node[j].SignalError +
Momentum*Layer[i].Node[j].ThresholdDiff;
// Update Bias weight to node j
Layer[i].Node[j].Threshold =
Layer[i].Node[j].Threshold +
Layer[i].Node[j].ThresholdDiff;
// Update Weights
for (k = 0; k < Layer[i].Input.length; k++)
{
// Calculate weight difference between node j and k
Layer[i].Node[j].WeightDiff[k] =
LearningRate *
Layer[i].Node[j].SignalError*Layer[i-
1].Node[k].Output +
Momentum*Layer[i].Node[j].WeightDiff[k];
// Update weight between node j and k
Layer[i].Node[j].Weight[k] =
Layer[i].Node[j].Weight[k] +
Layer[i].Node[j].WeightDiff[k];
}
}
}
}
private void CalculateOverallError()
{
int i,j;
OverallError = 0;
for (i = 0; i < NumberOfSamples; i++)
for (j = 0; j < Layer[NumberOfLayers-1].Node.length; j++)
{
OverallError = OverallError +
0.5*( Math.pow(ExpectedOutput[i][j] - ActualOutput[i]
[j],2) );
}
}
public BackPropagation(int NumberOfNodes[],
double InputSamples[][],
double OutputSamples[][],
double LearnRate,
double Moment,
double MinError,
long MaxIter
)
{
int i,j;
// Initiate variables
NumberOfSamples = InputSamples.length;
MinimumError = MinError;
LearningRate = LearnRate;
Momentum = Moment;
NumberOfLayers = NumberOfNodes.length;
MaximumNumberOfIterations = MaxIter;
// Create network layers
Layer = new LAYER[NumberOfLayers];
// Assign the number of node to the input layer
Layer[0] = new LAYER(NumberOfNodes[0],NumberOfNodes[0]);
// Assign number of nodes to each layer
for (i = 1; i < NumberOfLayers; i++)
Layer[i] = new LAYER(NumberOfNodes[i],NumberOfNodes[i-1]);
Input = new double[NumberOfSamples][Layer[0].Node.length];
ExpectedOutput = new double[NumberOfSamples][Layer[NumberOfLayers-
1].Node.length];
ActualOutput = new double[NumberOfSamples][Layer[NumberOfLayers-
1].Node.length];
// Assign input set
for (i = 0; i < NumberOfSamples; i++)
for (j = 0; j < Layer[0].Node.length; j++)
Input[i][j] = InputSamples[i][j];
// Assign output set
for (i = 0; i < NumberOfSamples; i++)
for (j = 0; j < Layer[NumberOfLayers-1].Node.length; j++)
ExpectedOutput[i][j] = OutputSamples[i][j];
}
public void TrainNetwork()
{
int i,j;
long k=0;
do
{
// For each pattern
for (SampleNumber = 0; SampleNumber < NumberOfSamples; SampleNumber++)
{
for (i = 0; i < Layer[0].Node.length; i++)
Layer[0].Input[i] = Input[SampleNumber][i];
FeedForward();
// Assign calculated output vector from network to ActualOutput
for (i = 0; i < Layer[NumberOfLayers-1].Node.length; i++)
ActualOutput[SampleNumber][i] = Layer[NumberOfLayers-
1].Node[i].Output;
UpdateWeights();
// if we've been told to stop training, then
// stop thread execution
if (die){
return;
}
// if
}
k++;
// Calculate Error Function
CalculateOverallError();
System.out.println("OverallError =
"+Double.toString(OverallError)+"\n");
System.out.print("Epoch = "+Long.toString(k)+"\n");
} while ((OverallError > MinimumError) &&(k < MaximumNumberOfIterations));
}
public LAYER[] get_layers() { return Layer; }
// called when testing the network.
public double[] test(double[] input)
{
int winner = 0;
NODE[] output_nodes;
for (int j = 0; j < Layer[0].Node.length; j++)
{ Layer[0].Input[j] = input[j];}
FeedForward();
// get the last layer of nodes (the outputs)
output_nodes = (Layer[Layer.length - 1]).get_nodes();
double[] actual_output = new double[output_nodes.length];
for (int k=0; k < output_nodes.length; k++)
{
actual_output[k]=output_nodes[k].Output;
} // for
return actual_output;
}//test()
public double get_error()
{
CalculateOverallError();
return OverallError;
} // get_error()
// to change the delay in the network
public void set_delay(long time)
{
if (time >= 0) {
delay = time;
} // if
}
//save the trained network
public void save(String FileName)
{
try{
FileOutputStream fos = new FileOutputStream (new File(FileName), true);
// Serialize data object to a file
ObjectOutputStream os = new ObjectOutputStream(fos);
os.writeObject(this);
os.close();
fos.close();
System.out.println("Network Saved!!!!");
}
catch (IOException E){System.out.println(E.toString());}
catch (Exception e){System.out.println(e.toString());}
}
public BackPropagation load(String FileName)
{
BackPropagation myclass= null;
try
{
//File patternDirectory = new File(Environment.getExternalStorageDirectory().getAbsolutePath().toString()+"INDIAN_NUMBER_RECOGNITION.data");
//patternDirectory.mkdirs();
FileInputStream fis = new FileInputStream(new File(FileName));
//FileInputStream fis =context.openFileInput(FileName);
ObjectInputStream is = new ObjectInputStream(fis);
myclass = (BackPropagation) is.readObject();
System.out.println("Error After Reading = "+Double.toString(myclass.get_error())+"\n");
is.close();
fis.close();
return myclass;
}
catch (Exception e){System.out.println(e.toString());}
return myclass;
}
// needed to implement threading.
public void run() {
TrainNetwork();
File Net_File = new File(Environment.getExternalStorageDirectory(),"Number_Recognition_1.ser");
save(Net_File.getAbsolutePath());
System.out.println( "DONE TRAINING :) ^_^ ^_^ :) !\n");
System.out.println("With Network ERROR = "+Double.toString(get_error())+"\n");
} // run()
// to notify the network to stop training.
public void kill() { die = true; }
}
图层类:
public class LAYER implements Serializable
{
private double Net;
public double Input[];
// Vector of inputs signals from previous
// layer to the current layer
public NODE Node[];
// Vector of nodes in current layer
// The FeedForward function is called so that
// the outputs for all the nodes in the current
// layer are calculated
public void FeedForward() {
for (int i = 0; i < Node.length; i++) {
Net = Node[i].Threshold;
for (int j = 0; j < Node[i].Weight.length; j++)
{Net = Net + Input[j] * Node[i].Weight[j];
System.out.println("Net = "+Double.toString(Net)+"\n");
}
Node[i].Output = Sigmoid(Net);
System.out.println("Node["+Integer.toString(i)+".Output = "+Double.toString(Node[i].Output)+"\n");
}
}
// The Sigmoid function calculates the
// activation/output from the current node
private double Sigmoid (double Net) {
return 1/(1+Math.exp(-Net));
}
// Return the output from all node in the layer
// in a vector form
public double[] OutputVector() {
double Vector[];
Vector = new double[Node.length];
for (int i=0; i < Node.length; i++)
Vector[i] = Node[i].Output;
return (Vector);
}
public LAYER (int NumberOfNodes, int NumberOfInputs) {
Node = new NODE[NumberOfNodes];
for (int i = 0; i < NumberOfNodes; i++)
Node[i] = new NODE(NumberOfInputs);
Input = new double[NumberOfInputs];
}
// added by DSK
public NODE[] get_nodes() { return Node; }
}
节点类:
public class NODE implements Serializable
{
public double Output;
// Output signal from current node
public double Weight[];
// Vector of weights from previous nodes to current node
public double Threshold;
// Node Threshold /Bias
public double WeightDiff[];
// Weight difference between the nth and the (n-1) iteration
public double ThresholdDiff;
// Threshold difference between the nth and the (n-1) iteration
public double SignalError;
// Output signal error
// InitialiseWeights function assigns a randomly
// generated number, between -1 and 1, to the
// Threshold and Weights to the current node
private void InitialiseWeights() {
Threshold = -1+2*Math.random();
// Initialise threshold nodes with a random
// number between -1 and 1
ThresholdDiff = 0;
// Initially, ThresholdDiff is assigned to 0 so
// that the Momentum term can work during the 1st
// iteration
for(int i = 0; i < Weight.length; i++) {
Weight[i]= -1+2*Math.random();
// Initialise all weight inputs with a
// random number between -1 and 1
WeightDiff[i] = 0;
// Initially, WeightDiff is assigned to 0
// so that the Momentum term can work during
// the 1st iteration
}
}
public NODE (int NumberOfNodes) {
Weight = new double[NumberOfNodes];
// Create an array of Weight with the same
// size as the vector of inputs to the node
WeightDiff = new double[NumberOfNodes];
// Create an array of weightDiff with the same
// size as the vector of inputs to the node
InitialiseWeights();
// Initialise the Weights and Thresholds to the node
}
public double[] get_weights() { return Weight; }
public double get_output() { return Output; }
}
我不知道如何解决这个问题,我们将不胜感激。
答案 0 :(得分:1)
根据NaN定义,当您在计算中仅使用double
数据类型并体验NaN结果时,用于计算结果的数据中的某处(权重,信号强度等)必须至少出现以下因素之一:
尝试分析用于计算结果的所有数据,并根据上述条件对其进行测试。然后,尝试确定元素达到该状态的原因。不幸的是,如果没有任何代码提示,我无法预测更多。