神经网络训练不正确

时间:2019-07-05 21:57:59

标签: c++ matrix neural-network

我目前正在尝试学习机器学习,尤其是神经网络。在c ++中,我建立了一个非常简单的2层(隐藏和输出)神经网络,该网络使用矩阵库,我也在c ++中建立了矩阵库。但是,当我尝试用一​​个非常琐碎的问题(即xor问题)训练它时,输出并不是我想要的。这是我要实现的代码:

{
 "kind": "youtube#videoListResponse",
 "etag": "\"Bdx4f4ps3xCOOo1WZ91nTLkRZ_c/CbYwR7FePSZfjlzY4mNeMODOwJA\"",
 "pageInfo": {
  "totalResults": 1,
  "resultsPerPage": 1
 },
 "items": [
  {
   "kind": "youtube#video",
   "etag": "\"Bdx4f4ps3xCOOo1WZ91nTLkRZ_c/9xoE3Yy_gyWHNwLgpi4jyoRrmGY\"",
   "id": "gVLsVj7BebE",
   "contentDetails": {
    "duration": "PT4M2S",
    "dimension": "2d",
    "definition": "hd",
    "caption": "false",
    "licensedContent": false,
    "projection": "rectangular"
   }
  }
 ]
}

这是我的神经网络课:

int main(){

  int trainingData[4][3] =  {
                            {0,0,0},
                            {1,1,0},
                            {1,0,1},
                            {0,1,1}
                            };



  NeuralNetwork nn = NeuralNetwork(2,2,1);

  for (int i = 0; i < 10000; i++){
    std::vector<double> inputs(2);
    std::vector<double> targets(1);
    srand(time(NULL));
    int index = rand()%4;
    for(int j = 0; j < 3; j++){
      if (j < 2){
        inputs[j] = trainingData[index][j];
      }else{
        targets[0] = trainingData[index][j];
      }
    }
    // cout<<inputs[0]<<" "<<inputs[1]<<"  ->  "<<targets[0]<<endl;
    nn.train(inputs,targets);
  }

  std::vector<double> inputs = {0,0};


  nn.predict(inputs).print();
  //
  // for (int i = 0; i < prediction.size(); i++)
  //   cout<<prediction[i]<<" ";



  return 0;
}

这是我的矩阵库中的矩阵类(神经网络类中使用的函数更多,而我的矩阵类中没有;但是完整的库就像> 300行,所以我可以发布必要,但这是

class NeuralNetwork{
private:
  int inputNodes,hiddenNodes,outputNodes;
  float learningRate;
  Matrix weightsIH, weightsHO, biasH, biasO;


public:
    //--------------------------------------------------------------------------
    //                              Constructor
    //--------------------------------------------------------------------------
    NeuralNetwork(int inNodes,int hidNodes,int outNodes,float lr = .1){
    inputNodes = inNodes;
    hiddenNodes = hidNodes;
    outputNodes = outNodes;
    learningRate = lr;

    //create matrices for the weights
    weightsIH.setSize(hiddenNodes,inputNodes);
    weightsHO.setSize(outputNodes,hiddenNodes);

    //randomize weights matrices
    weightsHO.randomize();
    weightsIH.randomize();

    //create matrices for the biases
    biasH.setSize(hiddenNodes,1);
    biasO.setSize(outputNodes,1);

    //randomize biases
    biasH.randomize();
    biasO.randomize();

  }
  //----------------------------------------------------------------------------
  //                                Prediction
  //----------------------------------------------------------------------------
  Matrix predict(std::vector<double> inputVector){
    //generate hidden outputs
    Matrix inputs = fromVector(inputVector);

    Matrix hiddenOutput = multiply(weightsIH, inputs);

    //apply bias
    hiddenOutput.add(biasH);

    //use activation function
    hiddenOutput.sigmoid();

    //output's output
    Matrix output = multiply(weightsHO, hiddenOutput);

    //apply bias
    output.add(biasO);

    //use activation function
    output.sigmoid();

    return output;
  }
  //----------------------------------------------------------------------------
  //                                 Training
  //----------------------------------------------------------------------------

  void train(std::vector<double> inputVector, std::vector<double> targetVector){

    //--------------------------------------------------------------------------
    //                        Feed Forward
    //--------------------------------------------------------------------------

    //generating hidden outputs
    Matrix inputs = fromVector(inputVector);
    //cout<<"1"<<endl;
    Matrix hiddenOutput = multiply(weightsIH, inputs);
    //add bias
    hiddenOutput.add(biasH);
    //activation function
    hiddenOutput.sigmoid();
    //generating second output
    Matrix outputs = multiply(weightsHO,hiddenOutput);
    //add bias
    outputs.add(biasO);
    //activation function
    outputs.sigmoid();

    //--------------------------------------------------------------------------
    //                        Back Propogation
    //--------------------------------------------------------------------------

    //convert targets vector to matrix
    Matrix targets = fromVector(targetVector);

    //calculate error
    Matrix outputErrors = subtract(targets,outputs);

    //calculate gradient
    Matrix gradients = dsigmoid(outputs);
    gradients = multiply(gradients,outputErrors);
    gradients.scale(learningRate);

    //calculate deltas
    Matrix hiddenOutputT = transpose(hiddenOutput);
    Matrix weightHoDeltas = multiply(gradients,hiddenOutputT);

    //adjust weights by deltas
    weightsHO.add(weightHoDeltas);
    biasO.add(gradients);

    //calculate hidden layer errors
    Matrix weightsHoTranspose = transpose(weightsHO);
    Matrix hiddenErrors = multiply(weightsHoTranspose,outputErrors);

    //calculate hidden gradient
    Matrix hiddenGradient = dsigmoid(hiddenOutput);
    hiddenGradient = multiply(hiddenGradient,hiddenErrors);
    hiddenGradient.scale(learningRate);

    //calculate inputs -> hidden deltas
    Matrix inputsT = transpose(inputs);
    Matrix weightsIhDeltas = multiply(hiddenGradient,inputsT);

    weightsIH.add(weightsIhDeltas);
    biasH.add(hiddenGradient);
  }

};

我事先表示歉意,因为我知道那是很多代码,但是老实说,我不知道问题出在哪里。任何帮助将不胜感激。我对神经网络非常陌生,但仍不十分了解其工作原理。

0 个答案:

没有答案