我目前正在尝试学习机器学习,尤其是神经网络。在c ++中,我建立了一个非常简单的2层(隐藏和输出)神经网络,该网络使用矩阵库,我也在c ++中建立了矩阵库。但是,当我尝试用一个非常琐碎的问题(即xor问题)训练它时,输出并不是我想要的。这是我要实现的代码:
{
"kind": "youtube#videoListResponse",
"etag": "\"Bdx4f4ps3xCOOo1WZ91nTLkRZ_c/CbYwR7FePSZfjlzY4mNeMODOwJA\"",
"pageInfo": {
"totalResults": 1,
"resultsPerPage": 1
},
"items": [
{
"kind": "youtube#video",
"etag": "\"Bdx4f4ps3xCOOo1WZ91nTLkRZ_c/9xoE3Yy_gyWHNwLgpi4jyoRrmGY\"",
"id": "gVLsVj7BebE",
"contentDetails": {
"duration": "PT4M2S",
"dimension": "2d",
"definition": "hd",
"caption": "false",
"licensedContent": false,
"projection": "rectangular"
}
}
]
}
这是我的神经网络课:
int main(){
int trainingData[4][3] = {
{0,0,0},
{1,1,0},
{1,0,1},
{0,1,1}
};
NeuralNetwork nn = NeuralNetwork(2,2,1);
for (int i = 0; i < 10000; i++){
std::vector<double> inputs(2);
std::vector<double> targets(1);
srand(time(NULL));
int index = rand()%4;
for(int j = 0; j < 3; j++){
if (j < 2){
inputs[j] = trainingData[index][j];
}else{
targets[0] = trainingData[index][j];
}
}
// cout<<inputs[0]<<" "<<inputs[1]<<" -> "<<targets[0]<<endl;
nn.train(inputs,targets);
}
std::vector<double> inputs = {0,0};
nn.predict(inputs).print();
//
// for (int i = 0; i < prediction.size(); i++)
// cout<<prediction[i]<<" ";
return 0;
}
这是我的矩阵库中的矩阵类(神经网络类中使用的函数更多,而我的矩阵类中没有;但是完整的库就像> 300行,所以我可以发布必要,但这是
class NeuralNetwork{
private:
int inputNodes,hiddenNodes,outputNodes;
float learningRate;
Matrix weightsIH, weightsHO, biasH, biasO;
public:
//--------------------------------------------------------------------------
// Constructor
//--------------------------------------------------------------------------
NeuralNetwork(int inNodes,int hidNodes,int outNodes,float lr = .1){
inputNodes = inNodes;
hiddenNodes = hidNodes;
outputNodes = outNodes;
learningRate = lr;
//create matrices for the weights
weightsIH.setSize(hiddenNodes,inputNodes);
weightsHO.setSize(outputNodes,hiddenNodes);
//randomize weights matrices
weightsHO.randomize();
weightsIH.randomize();
//create matrices for the biases
biasH.setSize(hiddenNodes,1);
biasO.setSize(outputNodes,1);
//randomize biases
biasH.randomize();
biasO.randomize();
}
//----------------------------------------------------------------------------
// Prediction
//----------------------------------------------------------------------------
Matrix predict(std::vector<double> inputVector){
//generate hidden outputs
Matrix inputs = fromVector(inputVector);
Matrix hiddenOutput = multiply(weightsIH, inputs);
//apply bias
hiddenOutput.add(biasH);
//use activation function
hiddenOutput.sigmoid();
//output's output
Matrix output = multiply(weightsHO, hiddenOutput);
//apply bias
output.add(biasO);
//use activation function
output.sigmoid();
return output;
}
//----------------------------------------------------------------------------
// Training
//----------------------------------------------------------------------------
void train(std::vector<double> inputVector, std::vector<double> targetVector){
//--------------------------------------------------------------------------
// Feed Forward
//--------------------------------------------------------------------------
//generating hidden outputs
Matrix inputs = fromVector(inputVector);
//cout<<"1"<<endl;
Matrix hiddenOutput = multiply(weightsIH, inputs);
//add bias
hiddenOutput.add(biasH);
//activation function
hiddenOutput.sigmoid();
//generating second output
Matrix outputs = multiply(weightsHO,hiddenOutput);
//add bias
outputs.add(biasO);
//activation function
outputs.sigmoid();
//--------------------------------------------------------------------------
// Back Propogation
//--------------------------------------------------------------------------
//convert targets vector to matrix
Matrix targets = fromVector(targetVector);
//calculate error
Matrix outputErrors = subtract(targets,outputs);
//calculate gradient
Matrix gradients = dsigmoid(outputs);
gradients = multiply(gradients,outputErrors);
gradients.scale(learningRate);
//calculate deltas
Matrix hiddenOutputT = transpose(hiddenOutput);
Matrix weightHoDeltas = multiply(gradients,hiddenOutputT);
//adjust weights by deltas
weightsHO.add(weightHoDeltas);
biasO.add(gradients);
//calculate hidden layer errors
Matrix weightsHoTranspose = transpose(weightsHO);
Matrix hiddenErrors = multiply(weightsHoTranspose,outputErrors);
//calculate hidden gradient
Matrix hiddenGradient = dsigmoid(hiddenOutput);
hiddenGradient = multiply(hiddenGradient,hiddenErrors);
hiddenGradient.scale(learningRate);
//calculate inputs -> hidden deltas
Matrix inputsT = transpose(inputs);
Matrix weightsIhDeltas = multiply(hiddenGradient,inputsT);
weightsIH.add(weightsIhDeltas);
biasH.add(hiddenGradient);
}
};
我事先表示歉意,因为我知道那是很多代码,但是老实说,我不知道问题出在哪里。任何帮助将不胜感激。我对神经网络非常陌生,但仍不十分了解其工作原理。