Question

在训练神经网络识别数字时，我遇到了来自sigmoid函数的NaN结果，并且使用的数据类型是“double”。我的问题是：如果我使用“长”数据类型，它会解决问题吗？或者我应该改变激活功能？

网络结构： 800个节点用于输入隐藏层中有700个节点输出层中的10个节点

BackPropagation类：

public class BackPropagation extends  Thread  implements  Serializable
 {
   private static final String TAG = "NetworkMessage";
   private static final long serialVersionUID = -8862858027413741101L;
   private double   OverallError;
   // The minimum Error Function defined by the user
   private double   MinimumError;
   // The user-defined expected output pattern for a set of samples
   private double   ExpectedOutput[][];
   // The user-defined input pattern for a set of samples
   private double   Input[][];
   // User defined learning rate - used for updating the network weights
   private double   LearningRate;
   // Users defined momentum - used for updating the network weights
   private double   Momentum;
   // Number of layers in the network 
   private  int NumberOfLayers;
   // Number of training sets
   private  int NumberOfSamples;
   // Current training set/sample that is used to train network
   private  int SampleNumber;
   // Maximum number of Epochs before the traing stops training 
   private long MaximumNumberOfIterations;
   // Public Variables
   public LAYER Layer[];
   public  double   ActualOutput[][];
   long delay = 0;
   boolean die = false;
   // Calculate the node activations
   public void FeedForward()
    {
      int i,j;
      // Since no weights contribute to the output
      // vector from the input layer,
      // assign the input vector from the input layer
      // to all the node in the first hidden layer
      for (i = 0; i < Layer[0].Node.length; i++)
         Layer[0].Node[i].Output = Layer[0].Input[i];
      Layer[1].Input = Layer[0].Input;
      for (i = 1; i < NumberOfLayers; i++)
       {
         Layer[i].FeedForward();
         // Unless we have reached the last layer, assign the layer i's    //output vector
        // to the (i+1) layer's input vector
        if (i != NumberOfLayers-1)
            Layer[i+1].Input = Layer[i].OutputVector();
       }
  }
  // FeedForward()
  // Back propagated the network outputy error through
  // the network to update the weight values
public void UpdateWeights()
{
    CalculateSignalErrors();
    BackPropagateError();
}
 private void CalculateSignalErrors()
  {
    int i,j,k,OutputLayer;
    double Sum;
    OutputLayer = NumberOfLayers-1;
    // Calculate all output signal error
   for (i = 0; i < Layer[OutputLayer].Node.length; i++)
    { 
      Layer[OutputLayer].Node[i].SignalError =   
      (ExpectedOutput[SampleNumber][i] -Layer[OutputLayer].Node[i].Output) * 
      Layer[OutputLayer].Node[i].Output *  
      (1-Layer[OutputLayer].Node[i].Output);
    }
  // Calculate signal error for all nodes in the hidden layer
  // (back propagate the errors
 for (i = NumberOfLayers-2; i > 0; i--)
  {
    for (j = 0; j < Layer[i].Node.length; j++) 
     {
       Sum = 0;
       for (k = 0; k < Layer[i+1].Node.length; k++)
          Sum = Sum + Layer[i+1].Node[k].Weight[j] * 
          Layer[i+1].Node[k].SignalError;
       Layer[i].Node[j].SignalError = Layer[i].Node[j].Output*(1 -  
       Layer[i].Node[j].Output)*Sum;
     }
  }
}

private void BackPropagateError() 
 {
    int i,j,k;
    // Update Weights
    for (i = NumberOfLayers-1; i > 0; i--) 
      {
        for (j = 0; j < Layer[i].Node.length; j++) 
          {
            // Calculate Bias weight difference to node j
            Layer[i].Node[j].ThresholdDiff = LearningRate *
                    Layer[i].Node[j].SignalError +
                    Momentum*Layer[i].Node[j].ThresholdDiff;
            // Update Bias weight to node j
            Layer[i].Node[j].Threshold =
                    Layer[i].Node[j].Threshold +
                            Layer[i].Node[j].ThresholdDiff;
             // Update Weights
            for (k = 0; k < Layer[i].Input.length; k++) 
               {
                 // Calculate weight difference between node j and k
                 Layer[i].Node[j].WeightDiff[k] =
                         LearningRate *
                         Layer[i].Node[j].SignalError*Layer[i-
                         1].Node[k].Output +
                         Momentum*Layer[i].Node[j].WeightDiff[k];
                 // Update weight between node j and k
                 Layer[i].Node[j].Weight[k] =
                        Layer[i].Node[j].Weight[k] + 
                        Layer[i].Node[j].WeightDiff[k];
               }
          }
       }
  }
private void CalculateOverallError()
 {
    int i,j;
    OverallError = 0;
    for (i = 0; i < NumberOfSamples; i++)
        for (j = 0; j < Layer[NumberOfLayers-1].Node.length; j++)
         {
            OverallError = OverallError +
                    0.5*( Math.pow(ExpectedOutput[i][j] - ActualOutput[i] 
                                                                   [j],2) );
        }
}
 public  BackPropagation(int NumberOfNodes[],
                        double InputSamples[][],
                        double OutputSamples[][],
                        double LearnRate,
                        double Moment,
                        double MinError,
                        long MaxIter
                        )
  {
    int i,j;
 // Initiate variables
    NumberOfSamples = InputSamples.length;
    MinimumError = MinError;
    LearningRate = LearnRate;
    Momentum = Moment;
    NumberOfLayers = NumberOfNodes.length;
    MaximumNumberOfIterations = MaxIter;
 // Create network layers
    Layer = new LAYER[NumberOfLayers];
 // Assign the number of node to the input layer
    Layer[0] = new LAYER(NumberOfNodes[0],NumberOfNodes[0]);
 // Assign number of nodes to each layer
    for (i = 1; i < NumberOfLayers; i++)
        Layer[i] = new LAYER(NumberOfNodes[i],NumberOfNodes[i-1]);
    Input = new double[NumberOfSamples][Layer[0].Node.length];
    ExpectedOutput = new double[NumberOfSamples][Layer[NumberOfLayers-
    1].Node.length];
    ActualOutput = new double[NumberOfSamples][Layer[NumberOfLayers-
   1].Node.length];
    // Assign input set
    for (i = 0; i < NumberOfSamples; i++)
        for (j = 0; j < Layer[0].Node.length; j++)
            Input[i][j] = InputSamples[i][j];
    // Assign output set
    for (i = 0; i < NumberOfSamples; i++)
        for (j = 0; j < Layer[NumberOfLayers-1].Node.length; j++)
            ExpectedOutput[i][j] = OutputSamples[i][j];
} 
 public void TrainNetwork() 
  {
   int i,j;
   long k=0;
   do
    {
      // For each pattern
      for (SampleNumber = 0; SampleNumber < NumberOfSamples; SampleNumber++) 
      {
          for (i = 0; i < Layer[0].Node.length; i++)
              Layer[0].Input[i] = Input[SampleNumber][i];
          FeedForward();
        // Assign calculated output vector from network to ActualOutput
          for (i = 0; i < Layer[NumberOfLayers-1].Node.length; i++)
              ActualOutput[SampleNumber][i] = Layer[NumberOfLayers-
             1].Node[i].Output;
          UpdateWeights();
          // if we've been told to stop training, then
          // stop thread execution
          if (die){
              return;
          }
          // if
      }
      k++;
     // Calculate Error Function
      CalculateOverallError();
      System.out.println("OverallError = 
      "+Double.toString(OverallError)+"\n");
      System.out.print("Epoch = "+Long.toString(k)+"\n");
  } while ((OverallError > MinimumError) &&(k < MaximumNumberOfIterations));
}
  public LAYER[] get_layers() { return Layer; }
  // called when testing the network.
  public double[] test(double[] input) 
   {
     int winner = 0;
     NODE[] output_nodes;
     for (int j = 0; j < Layer[0].Node.length; j++)
     { Layer[0].Input[j] = input[j];}
     FeedForward();
     // get the last layer of nodes (the outputs)
     output_nodes = (Layer[Layer.length - 1]).get_nodes();
     double[] actual_output  = new double[output_nodes.length];
     for (int k=0; k < output_nodes.length; k++)
      {
        actual_output[k]=output_nodes[k].Output;
      } // for
    return actual_output;
 }//test()
  public double get_error() 
  {
     CalculateOverallError(); 
     return OverallError;
  } // get_error()
// to change the delay in the network
 public void set_delay(long time) 
  {
    if (time >= 0) {
        delay = time;
    } // if
}
//save the trained network
public void save(String FileName)
{
    try{


       FileOutputStream fos = new FileOutputStream (new File(FileName), true);
       // Serialize data object to a file
        ObjectOutputStream os = new ObjectOutputStream(fos);
        os.writeObject(this);
        os.close();
        fos.close();
        System.out.println("Network Saved!!!!");
    }
    catch (IOException E){System.out.println(E.toString());}
    catch (Exception e){System.out.println(e.toString());}
}


public  BackPropagation load(String FileName)
{

    BackPropagation myclass= null;
    try
    {

        //File patternDirectory = new File(Environment.getExternalStorageDirectory().getAbsolutePath().toString()+"INDIAN_NUMBER_RECOGNITION.data");
        //patternDirectory.mkdirs();
        FileInputStream fis = new FileInputStream(new File(FileName));
        //FileInputStream fis =context.openFileInput(FileName);
        ObjectInputStream is = new ObjectInputStream(fis);
        myclass = (BackPropagation) is.readObject();
        System.out.println("Error After Reading = "+Double.toString(myclass.get_error())+"\n");
        is.close();
        fis.close();
        return myclass;


    }
    catch (Exception e){System.out.println(e.toString());}
    return myclass;
}

// needed to implement threading.
public void run() {
    TrainNetwork();
    File Net_File = new File(Environment.getExternalStorageDirectory(),"Number_Recognition_1.ser");
    save(Net_File.getAbsolutePath());
    System.out.println( "DONE TRAINING :) ^_^ ^_^ :) !\n");
    System.out.println("With Network ERROR = "+Double.toString(get_error())+"\n");
} // run()


// to notify the network to stop training.
public void kill() { die = true; }

}

图层类：

public class LAYER implements Serializable
{
  private   double  Net;
  public    double  Input[];
  // Vector of inputs signals from previous
  // layer to the current layer
  public    NODE    Node[];
  // Vector of nodes in current layer
  // The FeedForward function is called so that
  // the outputs for all the nodes in the current
  // layer are calculated
public void FeedForward() {
    for (int i = 0; i < Node.length; i++) {
        Net = Node[i].Threshold;

        for (int j = 0; j < Node[i].Weight.length; j++)
        {Net = Net + Input[j] * Node[i].Weight[j];
            System.out.println("Net = "+Double.toString(Net)+"\n");
        }

        Node[i].Output = Sigmoid(Net);
        System.out.println("Node["+Integer.toString(i)+".Output = "+Double.toString(Node[i].Output)+"\n");
    }
}

// The Sigmoid function calculates the
// activation/output from the current node
private double Sigmoid (double Net) {
    return 1/(1+Math.exp(-Net));
}

// Return the output from all node in the layer
// in a vector form
public double[] OutputVector() {

    double Vector[];

    Vector = new double[Node.length];

    for (int i=0; i < Node.length; i++)
        Vector[i] = Node[i].Output;

    return (Vector);
}
public LAYER (int NumberOfNodes, int NumberOfInputs) {
    Node = new NODE[NumberOfNodes];

    for (int i = 0; i < NumberOfNodes; i++)
        Node[i] = new NODE(NumberOfInputs);

    Input = new double[NumberOfInputs];
}

// added by DSK
public NODE[] get_nodes() { return Node; }
}

节点类：

 public class NODE implements  Serializable
{
  public    double  Output;
  // Output signal from current node
  public    double  Weight[];
  // Vector of weights from previous nodes to current node
  public    double  Threshold;
  // Node Threshold /Bias
  public    double  WeightDiff[];
  // Weight difference between the nth and the (n-1) iteration
  public    double  ThresholdDiff;
  // Threshold difference between the nth and the (n-1) iteration
  public    double  SignalError;
  // Output signal error
  // InitialiseWeights function assigns a randomly
  // generated number, between -1 and 1, to the
  // Threshold and Weights to the current node
  private void InitialiseWeights() {
    Threshold = -1+2*Math.random();
    // Initialise threshold nodes with a random
    // number between -1 and 1
    ThresholdDiff = 0;
    // Initially, ThresholdDiff is assigned to 0 so
    // that the Momentum term can work during the 1st
    // iteration
    for(int i = 0; i < Weight.length; i++) {
        Weight[i]= -1+2*Math.random();
        // Initialise all weight inputs with a
        // random number between -1 and 1
        WeightDiff[i] = 0;
        // Initially, WeightDiff is assigned to 0
        // so that the Momentum term can work during
        // the 1st iteration
    }
}

public NODE (int NumberOfNodes) {
    Weight = new double[NumberOfNodes];
    // Create an array of Weight with the same
    // size as the vector of inputs to the node

    WeightDiff = new double[NumberOfNodes];
    // Create an array of weightDiff with the same
    // size as the vector of inputs to the node

    InitialiseWeights();
    // Initialise the Weights and Thresholds to the node
}


public double[] get_weights() { return Weight; }
public double get_output() { return Output; }
}

我不知道如何解决这个问题，我们将不胜感激。

Answer 1

根据NaN定义，当您在计算中仅使用double数据类型并体验NaN结果时，用于计算结果的数据中的某处（权重，信号强度等）必须至少出现以下因素之一：

无限，即某个数字除以真实，非常小的数字
有些东西试图划分 0/0
复数，即计算负数的平方根

尝试分析用于计算结果的所有数据，并根据上述条件对其进行测试。然后，尝试确定元素达到该状态的原因。不幸的是，如果没有任何代码提示，我无法预测更多。

具有NaN结果的神经网络

1 个答案: