在训练神经网络识别数字时,我遇到了来自sigmoid函数的NaN结果,并且使用的数据类型是“double”。 我的问题是:如果我使用“长”数据类型,它会解决问题吗?或者我应该改变激活功能?
网络结构: 800个节点用于输入 隐藏层中有700个节点 输出层中的10个节点
public class BackPropagation extends Thread implements Serializable
private static final String TAG = "NetworkMessage";
private static final long serialVersionUID = -8862858027413741101L;
private double OverallError;
// The minimum Error Function defined by the user
private double MinimumError;
// The user-defined expected output pattern for a set of samples
private double ExpectedOutput[][];
// The user-defined input pattern for a set of samples
private double Input[][];
// User defined learning rate - used for updating the network weights
private double LearningRate;
// Users defined momentum - used for updating the network weights
private double Momentum;
// Number of layers in the network
private int NumberOfLayers;
// Number of training sets
private int NumberOfSamples;
// Current training set/sample that is used to train network
private int SampleNumber;
// Maximum number of Epochs before the traing stops training
private long MaximumNumberOfIterations;
// Public Variables
public LAYER Layer[];
public double ActualOutput[][];
long delay = 0;
boolean die = false;
// Calculate the node activations
public void FeedForward()
int i,j;
// Since no weights contribute to the output
// vector from the input layer,
// assign the input vector from the input layer
// to all the node in the first hidden layer
for (i = 0; i < Layer[0].Node.length; i++)
Layer[0].Node[i].Output = Layer[0].Input[i];
Layer[1].Input = Layer[0].Input;
for (i = 1; i < NumberOfLayers; i++)
// Unless we have reached the last layer, assign the layer i's //output vector
// to the (i+1) layer's input vector
if (i != NumberOfLayers-1)
Layer[i+1].Input = Layer[i].OutputVector();
// FeedForward()
// Back propagated the network outputy error through
// the network to update the weight values
public void UpdateWeights()
private void CalculateSignalErrors()
int i,j,k,OutputLayer;
double Sum;
OutputLayer = NumberOfLayers-1;
// Calculate all output signal error
for (i = 0; i < Layer[OutputLayer].Node.length; i++)
Layer[OutputLayer].Node[i].SignalError =
(ExpectedOutput[SampleNumber][i] -Layer[OutputLayer].Node[i].Output) *
Layer[OutputLayer].Node[i].Output *
// Calculate signal error for all nodes in the hidden layer
// (back propagate the errors
for (i = NumberOfLayers-2; i > 0; i--)
for (j = 0; j < Layer[i].Node.length; j++)
Sum = 0;
for (k = 0; k < Layer[i+1].Node.length; k++)
Sum = Sum + Layer[i+1].Node[k].Weight[j] *
Layer[i].Node[j].SignalError = Layer[i].Node[j].Output*(1 -
private void BackPropagateError()
int i,j,k;
// Update Weights
for (i = NumberOfLayers-1; i > 0; i--)
for (j = 0; j < Layer[i].Node.length; j++)
// Calculate Bias weight difference to node j
Layer[i].Node[j].ThresholdDiff = LearningRate *
Layer[i].Node[j].SignalError +
// Update Bias weight to node j
Layer[i].Node[j].Threshold =
Layer[i].Node[j].Threshold +
// Update Weights
for (k = 0; k < Layer[i].Input.length; k++)
// Calculate weight difference between node j and k
Layer[i].Node[j].WeightDiff[k] =
LearningRate *
1].Node[k].Output +
// Update weight between node j and k
Layer[i].Node[j].Weight[k] =
Layer[i].Node[j].Weight[k] +
private void CalculateOverallError()
int i,j;
OverallError = 0;
for (i = 0; i < NumberOfSamples; i++)
for (j = 0; j < Layer[NumberOfLayers-1].Node.length; j++)
OverallError = OverallError +
0.5*( Math.pow(ExpectedOutput[i][j] - ActualOutput[i]
[j],2) );
public BackPropagation(int NumberOfNodes[],
double InputSamples[][],
double OutputSamples[][],
double LearnRate,
double Moment,
double MinError,
long MaxIter
int i,j;
// Initiate variables
NumberOfSamples = InputSamples.length;
MinimumError = MinError;
LearningRate = LearnRate;
Momentum = Moment;
NumberOfLayers = NumberOfNodes.length;
MaximumNumberOfIterations = MaxIter;
// Create network layers
Layer = new LAYER[NumberOfLayers];
// Assign the number of node to the input layer
Layer[0] = new LAYER(NumberOfNodes[0],NumberOfNodes[0]);
// Assign number of nodes to each layer
for (i = 1; i < NumberOfLayers; i++)
Layer[i] = new LAYER(NumberOfNodes[i],NumberOfNodes[i-1]);
Input = new double[NumberOfSamples][Layer[0].Node.length];
ExpectedOutput = new double[NumberOfSamples][Layer[NumberOfLayers-
ActualOutput = new double[NumberOfSamples][Layer[NumberOfLayers-
// Assign input set
for (i = 0; i < NumberOfSamples; i++)
for (j = 0; j < Layer[0].Node.length; j++)
Input[i][j] = InputSamples[i][j];
// Assign output set
for (i = 0; i < NumberOfSamples; i++)
for (j = 0; j < Layer[NumberOfLayers-1].Node.length; j++)
ExpectedOutput[i][j] = OutputSamples[i][j];
public void TrainNetwork()
int i,j;
long k=0;
// For each pattern
for (SampleNumber = 0; SampleNumber < NumberOfSamples; SampleNumber++)
for (i = 0; i < Layer[0].Node.length; i++)
Layer[0].Input[i] = Input[SampleNumber][i];
// Assign calculated output vector from network to ActualOutput
for (i = 0; i < Layer[NumberOfLayers-1].Node.length; i++)
ActualOutput[SampleNumber][i] = Layer[NumberOfLayers-
// if we've been told to stop training, then
// stop thread execution
if (die){
// if
// Calculate Error Function
System.out.println("OverallError =
System.out.print("Epoch = "+Long.toString(k)+"\n");
} while ((OverallError > MinimumError) &&(k < MaximumNumberOfIterations));
public LAYER[] get_layers() { return Layer; }
// called when testing the network.
public double[] test(double[] input)
int winner = 0;
NODE[] output_nodes;
for (int j = 0; j < Layer[0].Node.length; j++)
{ Layer[0].Input[j] = input[j];}
// get the last layer of nodes (the outputs)
output_nodes = (Layer[Layer.length - 1]).get_nodes();
double[] actual_output = new double[output_nodes.length];
for (int k=0; k < output_nodes.length; k++)
} // for
return actual_output;
public double get_error()
return OverallError;
} // get_error()
// to change the delay in the network
public void set_delay(long time)
if (time >= 0) {
delay = time;
} // if
//save the trained network
public void save(String FileName)
FileOutputStream fos = new FileOutputStream (new File(FileName), true);
// Serialize data object to a file
ObjectOutputStream os = new ObjectOutputStream(fos);
System.out.println("Network Saved!!!!");
catch (IOException E){System.out.println(E.toString());}
catch (Exception e){System.out.println(e.toString());}
public BackPropagation load(String FileName)
BackPropagation myclass= null;
//File patternDirectory = new File(Environment.getExternalStorageDirectory().getAbsolutePath().toString()+"INDIAN_NUMBER_RECOGNITION.data");
FileInputStream fis = new FileInputStream(new File(FileName));
//FileInputStream fis =context.openFileInput(FileName);
ObjectInputStream is = new ObjectInputStream(fis);
myclass = (BackPropagation) is.readObject();
System.out.println("Error After Reading = "+Double.toString(myclass.get_error())+"\n");
return myclass;
catch (Exception e){System.out.println(e.toString());}
return myclass;
// needed to implement threading.
public void run() {
File Net_File = new File(Environment.getExternalStorageDirectory(),"Number_Recognition_1.ser");
System.out.println( "DONE TRAINING :) ^_^ ^_^ :) !\n");
System.out.println("With Network ERROR = "+Double.toString(get_error())+"\n");
} // run()
// to notify the network to stop training.
public void kill() { die = true; }
public class LAYER implements Serializable
private double Net;
public double Input[];
// Vector of inputs signals from previous
// layer to the current layer
public NODE Node[];
// Vector of nodes in current layer
// The FeedForward function is called so that
// the outputs for all the nodes in the current
// layer are calculated
public void FeedForward() {
for (int i = 0; i < Node.length; i++) {
Net = Node[i].Threshold;
for (int j = 0; j < Node[i].Weight.length; j++)
{Net = Net + Input[j] * Node[i].Weight[j];
System.out.println("Net = "+Double.toString(Net)+"\n");
Node[i].Output = Sigmoid(Net);
System.out.println("Node["+Integer.toString(i)+".Output = "+Double.toString(Node[i].Output)+"\n");
// The Sigmoid function calculates the
// activation/output from the current node
private double Sigmoid (double Net) {
return 1/(1+Math.exp(-Net));
// Return the output from all node in the layer
// in a vector form
public double[] OutputVector() {
double Vector[];
Vector = new double[Node.length];
for (int i=0; i < Node.length; i++)
Vector[i] = Node[i].Output;
return (Vector);
public LAYER (int NumberOfNodes, int NumberOfInputs) {
Node = new NODE[NumberOfNodes];
for (int i = 0; i < NumberOfNodes; i++)
Node[i] = new NODE(NumberOfInputs);
Input = new double[NumberOfInputs];
// added by DSK
public NODE[] get_nodes() { return Node; }
public class NODE implements Serializable
public double Output;
// Output signal from current node
public double Weight[];
// Vector of weights from previous nodes to current node
public double Threshold;
// Node Threshold /Bias
public double WeightDiff[];
// Weight difference between the nth and the (n-1) iteration
public double ThresholdDiff;
// Threshold difference between the nth and the (n-1) iteration
public double SignalError;
// Output signal error
// InitialiseWeights function assigns a randomly
// generated number, between -1 and 1, to the
// Threshold and Weights to the current node
private void InitialiseWeights() {
Threshold = -1+2*Math.random();
// Initialise threshold nodes with a random
// number between -1 and 1
ThresholdDiff = 0;
// Initially, ThresholdDiff is assigned to 0 so
// that the Momentum term can work during the 1st
// iteration
for(int i = 0; i < Weight.length; i++) {
Weight[i]= -1+2*Math.random();
// Initialise all weight inputs with a
// random number between -1 and 1
WeightDiff[i] = 0;
// Initially, WeightDiff is assigned to 0
// so that the Momentum term can work during
// the 1st iteration
public NODE (int NumberOfNodes) {
Weight = new double[NumberOfNodes];
// Create an array of Weight with the same
// size as the vector of inputs to the node
WeightDiff = new double[NumberOfNodes];
// Create an array of weightDiff with the same
// size as the vector of inputs to the node
// Initialise the Weights and Thresholds to the node
public double[] get_weights() { return Weight; }
public double get_output() { return Output; }
答案 0 :(得分:1)