实现使用c ++的反向传播算法提供了不正确的结果

时间:2018-03-08 11:57:06

标签: c++ neural-network backpropagation

我正在努力实现PB算法,我使用了此博客中的代码(Here)。我对他的工作进行了一些修改。

问题是我要测试的所有模式,对于所有测试模式,错误为 1 提供。肯定是不正确的。

我添加的 额外工作 位于 example.cpp 中,程序可以读取文本文件并获取输入并输出fm,并将训练集和测试集从文本文件中分离出来。并打印它们以查看提取文本文件过程的结果。

我使用了 1个隐藏图层隐藏图层中的10个神经元 20000个纪元

程序的输入如下:

Program Inputs

这是代码:

bpnet.h

#ifndef BPNET_H
#define BPNET_H

/*********************************Structure representing a neuron******************************/

struct neuron
{
    float *weights; // neuron input weights or synaptic connections
    float *deltavalues; //neuron delta values
    float output; //output value
    float gain; //Gain value
    float wgain; //Weight gain value

    neuron(); //Constructor
    ~neuron(); //Destructor
    void create(int inputcount); //Allocates memory and initializates values
};


/**************************************Structure representing a layer******************************/

struct layer
{
    neuron **neurons; //The array of neurons
    int neuroncount; //Contains the total number of neurons
    float *layerinput; //The layer input
    int inputcount; //The total count of elements in layerinput

    layer(); //Object constructor. Initializates all values as 0

    ~layer(); //Destructor. Frees the memory used by the layer

    void create(int inputsize, int _neuroncount); //Creates the layer and allocates memory
    void calculate(); //Calculates all neurons performing the network formula
};



/********************************Structure Representing the network********************************/

class bpnet
{
private:
    layer m_inputlayer; //input layer of the network
    layer m_outputlayer; //output layer..contains the result of applying the network
    layer **m_hiddenlayers; //Additional hidden layers
    int m_hiddenlayercount; //the count of additional hidden layers

public:

    //function tu create in memory the network structure
    bpnet();//Construction..initialzates all values to 0
    ~bpnet();//Destructor..releases memory

    //Creates the network structure on memory
    void create(int inputcount,int inputneurons,int outputcount,int *hiddenlayers,int hiddenlayercount);

    void propagate(const float *input);//Calculates the network values given an input pattern

    //Updates the weight values of the network given a desired output and applying the backpropagation
    //Algorithm
    float train(const float *desiredoutput,const float *input,float alpha, float momentum);

    //Updates the next layer input values
    void update(int layerindex);

    //Returns the output layer..this is useful to get the output values of the network
    inline layer &getOutput()
    {
        return m_outputlayer;
    }

};

#endif // BPNET_H

bpnet.cpp

#include "bpnet.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>



/*****************************neuron routines*******************************/

//constructor
neuron::neuron():weights(0),deltavalues(0),output(0),gain(0),wgain(0) 
{

}

//Destructor
neuron::~neuron()
{
    if(weights)
        delete [] weights;
    if(deltavalues)
        delete [] deltavalues;
}

//Initializates neuron weights
void neuron::create(int inputcount)
{
    assert(inputcount);
    float sign = -1; //to change sign
    float random; //to get random number
    weights = new float[inputcount];
    deltavalues = new float[inputcount];

    //important initializate all weights as random unsigned values
    //and delta values as 0
    for( int i = 0; i < inputcount; i++)
    {
        //get a random number between -0.5 and 0.5
        random = ( float(rand()) / float(RAND_MAX) )/2.f; //min 0.5
        random *= sign;
        sign *= -1;
        weights[i] = random;
        deltavalues[i] = 0;
    }

    gain = 1;

    random = ( float(rand()) / float(RAND_MAX) )/2.f; //min 0.5
    random *= sign;
    sign *= -1;
    wgain = random;


}



/***********************************Layer member functions********************************/

layer::layer():neurons(0),neuroncount(0),layerinput(0),inputcount(0)
{

}

layer::~layer()
{
    if(neurons)
    {
        for( int i = 0; i < neuroncount; i++ )
        {
            delete neurons[i];
        }

        delete [] neurons;
    }
    if(layerinput)
    {
        delete [] layerinput;
    }
}

void layer::create( int inputsize, int _neuroncount )
{
    assert( inputsize && _neuroncount );//check for errors

    int i;

    neurons = new neuron*[_neuroncount];
    for( i = 0; i < _neuroncount; i++)
    {
        neurons[i] = new neuron;
        neurons[i] -> create(inputsize);
    }

    layerinput = new float[inputsize];
    neuroncount = _neuroncount;
    inputcount = inputsize;
}


//Calculates the neural network result of the layer using the sigmoid function
void layer::calculate()
{
    int i,j;
    float sum;

    //Apply the formula for each neuron
    for( i = 0; i < neuroncount; i++ )
    {
        sum = 0; //store the sum of all values here
        for( j = 0; j < inputcount; j++ )
        {
            //Performing function
            sum += neurons[i] -> weights[j] * layerinput[j]; //apply input * weight
        }

        sum += neurons[i] ->wgain * neurons[i] -> gain; //apply the gain or theta multiplied by the gain weight.

        //sigmoidal activation function
        neurons[i] -> output= 1.f / (1.f + exp(-sum)); //calculate the sigmoid function
        //neurons[i]->output=-1 + 2*(1.f + exp(-sum));
    }
}



/***************************bpnet object functions**************/

bpnet::bpnet():m_hiddenlayers(0),m_hiddenlayercount(0)
{
}

bpnet::~bpnet()
{
    if(m_hiddenlayers)
    {
        for(int i=0;i<m_hiddenlayercount;i++)
        {
            delete m_hiddenlayers[i];
        }

        delete [] m_hiddenlayers;
    }

}

void bpnet::create(int inputcount, int inputneurons, int outputcount, int *hiddenlayers, int hiddenlayercount)
{

            //make sure required values are not zero

            assert(inputcount && inputneurons && outputcount);

            int i;

            m_inputlayer.create(inputcount,inputneurons);

            if(hiddenlayers && hiddenlayercount)
            {
                m_hiddenlayers = new layer*[hiddenlayercount];
                m_hiddenlayercount = hiddenlayercount;

                for( i = 0; i < hiddenlayercount; i++ )
                {
                    m_hiddenlayers[i] = new layer;
                    if( i==0 )
                    {
                        //first hidden layer receives the output of the inputlayer so we set as input the neuroncount
                        //of the inputlayer
                        m_hiddenlayers[i]->create(inputneurons,hiddenlayers[i]);
                    }
                    else
                    {
                        m_hiddenlayers[i]->create(hiddenlayers[i-1],hiddenlayers[i]);
                    }
                }

                m_outputlayer.create(hiddenlayers[hiddenlayercount - 1],outputcount);
            }
            else
            {
                m_outputlayer.create(inputneurons,outputcount);
            }
}


void bpnet::propagate(const float *input)
{
    //The propagation function should start from the input layer
    //first copy the input vector to the input layer Always make sure the size
    //"array input" has the same size of inputcount
    memcpy(m_inputlayer.layerinput,input,m_inputlayer.inputcount * sizeof(float));


    //now calculate the inputlayer
    m_inputlayer.calculate();


    update(-1);//propagate the inputlayer out values to the next layer


    if(m_hiddenlayers)
    {
        //Calculating hidden layers if any
        for(int i = 0; i < m_hiddenlayercount; i++ )
        {
            m_hiddenlayers[i] -> calculate();
            update(i);
        }
    }

    //calculating the final statge: the output layer
    m_outputlayer.calculate();
}

//Main training function. Run this function in a loop as many times needed per pattern
float bpnet::train(const float *desiredoutput, const float *input, float alpha, float momentum)
{
    //function train, teaches the network to recognize a pattern given a desired output

    float errorg=0; //general quadratic error
    float errorc; //local error;
    float sum=0,csum=0;
    float delta,udelta;
    float output;

    //first we begin by propagating the input
    propagate(input);
    int i,j,k;

    //the backpropagation algorithm starts from the output layer propagating the error  from the output
    //layer to the input layer

    for( i = 0; i < m_outputlayer.neuroncount; i++ )
    {
        //calculate the error value for the output layer
        output=m_outputlayer.neurons[i]->output; //copy this value to facilitate calculations

        //from the algorithm we can take the error value as
        errorc=(desiredoutput[i] - output) * output * (1 - output);

        //and the general error as the sum of delta values. Where delta is the squared difference
        //of the desired value with the output value
        //quadratic error
        errorg+=(desiredoutput[i] - output) * (desiredoutput[i] - output) ;

        //now we proceed to update the weights of the neuron
        for( j = 0; j < m_outputlayer.inputcount; j++ )
        {
            //get the current delta value
            delta=m_outputlayer.neurons[i]->deltavalues[j];

            //update the delta value
            udelta = alpha * errorc * m_outputlayer.layerinput[j] + delta * momentum;

            //update the weight values
            m_outputlayer.neurons[i]->weights[j]+=udelta;
            m_outputlayer.neurons[i]->deltavalues[j]=udelta;

            //we need this to propagate to the next layer
            sum += m_outputlayer.neurons[i]->weights[j] * errorc;
        }

        //calculate the weight gain
        m_outputlayer.neurons[i]->wgain += alpha * errorc * m_outputlayer.neurons[i]->gain;

    }

    for(i = (m_hiddenlayercount - 1); i >= 0; i--)
    {
        for( j = 0; j < m_hiddenlayers[i]->neuroncount; j++ )
        {
            output = m_hiddenlayers[i]->neurons[j]->output;

            //calculate the error for this layer
            errorc = output * (1-output) * sum;

            //update neuron weights
            for( k = 0; k < m_hiddenlayers[i]->inputcount; k++ )
            {
                delta = m_hiddenlayers[i]->neurons[j]->deltavalues[k];
                udelta = alpha * errorc * m_hiddenlayers[i]->layerinput[k] + delta * momentum;
                m_hiddenlayers[i]->neurons[j]->weights[k] += udelta;
                m_hiddenlayers[i]->neurons[j]->deltavalues[k] = udelta;
                csum += m_hiddenlayers[i]->neurons[j]->weights[k] * errorc;//needed for next layer

            }

            m_hiddenlayers[i]->neurons[j]->wgain += alpha * errorc * m_hiddenlayers[i]->neurons[j]->gain;

        }

        sum = csum;
        csum = 0;
    }

    //and finally process the input layer
    for( i = 0; i < m_inputlayer.neuroncount; i++)
    {
        output = m_inputlayer.neurons[i]->output;
        errorc = output * (1 - output) * sum;

        for(j = 0; j < m_inputlayer.inputcount; j++)
        {
            delta = m_inputlayer.neurons[i]->deltavalues[j];
            udelta = alpha * errorc * m_inputlayer.layerinput[j] + delta * momentum;
            //update weights
            m_inputlayer.neurons[i]->weights[j] += udelta;
            m_inputlayer.neurons[i]->deltavalues[j] = udelta;
        }
        //and update the gain weight
        m_inputlayer.neurons[i]->wgain+=alpha * errorc * m_inputlayer.neurons[i]->gain;
    }

    //return the general error divided by 2
    return errorg / 2;

}

void bpnet::update(int layerindex)
{
    int i;
    if( layerindex == -1 )
    {
        //dealing with the inputlayer here and propagating to the next layer
        for( i = 0; i < m_inputlayer.neuroncount; i++ )
        {
            if(m_hiddenlayers)//propagate to the first hidden layer
            {
                m_hiddenlayers[0]->layerinput[i] = m_inputlayer.neurons[i]->output;
            }
            else //propagate directly to the output layer
            {
                m_outputlayer.layerinput[i] = m_inputlayer.neurons[i]->output;
            }
        }

    }
    else
    {
        for( i = 0; i < m_hiddenlayers[layerindex]->neuroncount; i++)
        {
            //not the last hidden layer
            if( layerindex < m_hiddenlayercount -1 )
            {
                m_hiddenlayers[layerindex + 1]->layerinput[i] = m_hiddenlayers[layerindex]->neurons[i]->output;
            }
            else
            {
                m_outputlayer.layerinput[i] = m_hiddenlayers[layerindex]->neurons[i]->output;
            }
        }
    }
}

example.cpp

#include <iostream>
#include "bpnet.h"
#include <string>
#include <sstream>
#include <fstream>
#include <vector>
using namespace std;
#define HIDDEN_LAYERS 10
#define HIDDEN_LAYER_COUNT 1


float** Read_Data_Form_file ( int AN, int IN, string FN, int in, int ou, string iop )
{
    int Att_Num = AN; //number of attributes in the Dataset.
    int Inst_Num = IN; //number of instants in the Dataset.
    string File_name = FN; //file name.

    int inputs_Num = in; //number of inputs.
    int outputs_Num = ou; //number of outputs.

    string input_output_possition = iop; //define where is the inputs and outputs position at right or left.
                                             // False: outputs in the left and inputs in the right.
                                             // True: outputs in the right and inputs in the left.

    //open file.
    ifstream DataSet(File_name);

    string line; //save data line.

    //Create 2D array to save result.
    float** data = new float*[Inst_Num]; 
    for(int i = 0; i < Inst_Num; ++i)
    { data[i] = new float[Att_Num]; }

    int i=0, j=0;

    //check if the file is opened or not.
    if ( DataSet.is_open() )
    {

        int lineNo = 0; //define the rows number.
        int propertyNo; //define the columns number.

        //read the data from the .txt file line by line.
        while ( getline(DataSet,line) ) 
        {

            stringstream linestream(line); //define a stringstream variable for variable line.
            string value;

            float new_value = 0.0;

            propertyNo = 0;

            //Read line by line from linestream variable, separated by commas and save them in value variable.
            while ( getline(linestream,value, ',') )
            {
                new_value = stof(value);
                data[lineNo][propertyNo] = new_value; //save the value of variable (value) into array data.

                //cout << data[lineNo][propertyNo] << "\t";

                if ( propertyNo < Att_Num )
                { propertyNo++; }
            }

            //cout << endl;


            if ( lineNo < Inst_Num )
            { lineNo++; }

        } 

        //close the file.
        DataSet.close();
    }
    else
    { 
        //print error message if the file not found.
        cout << "error, not found the file!" << endl; 
    }


    //return back the array.
    return data;

}

int main()
{

    int counter1 = 0,counter2 = 0,counter3 = 0,counter4 = 0;

    int Att_Num; //number of attributes in the Dataset.
    int Inst_Num; //number of instants in the Dataset.
    int Inst_Num_train; //number of instants in the Dataset for training.
    int Inst_Num_test; //number of instants in the Dataset for testing.
    string File_name; //file name.

    int inputs_Num; //number of inputs.
    int outputs_Num; //number of outputs.

    string input_output_possition = "false"; //define where is the inputs and outputs position at right or left.
                                             // False: outputs in the left and inputs in the right.
                                             // True: outputs in the right and inputs in the left.

    int EPOCHS_NUM;

    //Read the file name from the user.
    cout << "Enter the file name (follow by .txt):  ";
    cin >> File_name;
    cout << endl;

    //Read the attributes and instants numbers from the user.
    cout << "Enter the attributes and instants ( for total, train and test ) numbers:  ";
    cin >> Att_Num >> Inst_Num >> Inst_Num_train >> Inst_Num_test;
    cout << endl;

    //Read the input and outputs numbers from the user.
    cout << "Enter the inputs and outputs numbers:  ";
    cin >> inputs_Num >> outputs_Num;
    cout << endl;

    //Read the input and outputs possition.
    cout << "Enter the inputs and outputs possition (true:right,false:left):  ";
    cin >> input_output_possition; 
    cout << endl;

    //Read the epochs.
    cout << "Enter the epochs number :  ";
    cin >> EPOCHS_NUM; 
    cout << endl;

    //Process of Reading the dataSet from file and save in into array.
    //Create 2D array to save result.
    float** data = new float*[Inst_Num]; 
    for(int i = 0; i < Inst_Num; ++i)
    { data[i] = new float[Att_Num]; }


    //Call the function to read data from file, and return the result.
    data = Read_Data_Form_file(Att_Num,Inst_Num,File_name,inputs_Num,outputs_Num,input_output_possition);


    //________________________________________________________________________________________________________
    //seprate the outputs and inputs in different arrays.
    //FOR TRAINING PROCESS.
    //________________________________________________________________________________________________________


    //Create 2D array to save result (inputs).
    float** data_inputs = new float*[Inst_Num_train]; 
    for(int i = 0; i < Inst_Num_train; ++i)
    { data_inputs[i] = new float[Att_Num - outputs_Num]; }

    //Create 2D array to save result (outputs).
    float** data_outputs = new float*[Inst_Num_train]; 
    for(int i = 0; i < Inst_Num_train; ++i)
    { data_outputs[i] = new float[Att_Num - inputs_Num]; }



    if ( input_output_possition == "false" )
    {
        for ( int i = 0; i < Inst_Num - Inst_Num_test; i++)
        {
            for ( int j = 0; j < outputs_Num; j++ )
            { data_outputs[i][j] = data[i][j]; }
        }

        for ( int i = 0; i < Inst_Num - Inst_Num_test; i++)
        {
            for ( int z = outputs_Num; z < Att_Num; z++)
            { data_inputs[i][z - outputs_Num] = data[i][z]; }
        }
    }
    else if ( input_output_possition == "true" )
    {
        for ( int i = 0; i < Inst_Num - Inst_Num_test; i++)
        {
            for ( int j = inputs_Num; j < Att_Num; j++ )
            { data_outputs[i][j - inputs_Num] = data[i][j]; }
        }

        for ( int i = 0; i < Inst_Num - Inst_Num_test; i++)
        {
            for ( int z = 0; z < inputs_Num - 1; z++)
            { data_inputs[i][z] = data[i][z]; }
        }
    }


    //print the result.
    //inputs.
    cout << "Intputs:" << endl;
    for ( int i = 0; i < Inst_Num_train; i++ )
    { 
        for ( int j = 0; j < inputs_Num; j++)
        { cout << data_inputs[i][j] << "\t"; }

        //counter1++;
        cout << endl;

    }

    //cout << "counter 1: " << counter1 << endl;;


    //outputs.
    cout << "Outputs:" << endl;
    for ( int i = 0; i < Inst_Num_train; i++ )
    { 
        for ( int j = 0; j < outputs_Num; j++)
        { cout << data_outputs[i][j] << "\t"; }

        //counter2++;
        cout << endl;

    }

    //cout << "counter 2: " << counter2 << endl;

    cout << " *********************************************************** " << endl;

    //________________________________________________________________________________________________________
    //seprate the outputs and inputs in different arrays.
    //FOR TESTING PROCESS.
    //________________________________________________________________________________________________________


    //Create 2D array to save result (inputs).
    float** data_inputs2 = new float*[Inst_Num_test]; 
    for(int i = 0; i < Inst_Num_test; ++i)
    { data_inputs2[i] = new float[Att_Num - outputs_Num]; }

    //Create 2D array to save result (outputs).
    float** data_outputs2 = new float*[Inst_Num_test]; 
    for(int i = 0; i < Inst_Num_test; ++i)
    { data_outputs2[i] = new float[Att_Num - inputs_Num]; }



    if ( input_output_possition == "false" )
    {
        for ( int i = Inst_Num_train; i < Inst_Num ; i++)
        {
            for ( int j = 0; j < outputs_Num; j++ )
            { data_outputs2[i - Inst_Num_train][j] = data[i][j]; }
        }

        for ( int i = Inst_Num_train; i < Inst_Num ; i++)
        {
            for ( int z = outputs_Num; z < Att_Num; z++)
            { data_inputs2[i - Inst_Num_train][z - outputs_Num] = data[i][z]; }
        }
    }
    else if ( input_output_possition == "true" )
    {
        for ( int i = Inst_Num_train; i < Inst_Num ; i++)
        {
            for ( int j = inputs_Num; j < Att_Num; j++ )
            { data_outputs2[i - Inst_Num_train][j - inputs_Num] = data[i][j]; }
        }

        for ( int i = Inst_Num_train; i < Inst_Num ; i++)
        {
            for ( int z = 0; z < inputs_Num - 1; z++)
            { data_inputs2[i - Inst_Num_train][z] = data[i][z]; }
        }
    }


    //print the result.
    //inputs.
    cout << "Intputs:" << endl;
    for ( int i = 0; i < Inst_Num_test; i++ )
    { 
        for ( int j = 0; j < inputs_Num; j++)
        { cout << data_inputs2[i][j] << "\t"; }

        //counter3++;
        cout << endl;

    }

    //cout << "counter 3: " << counter3;

    //outputs.
    cout << "Outputs:" << endl;
    for ( int i = 0; i < Inst_Num_test; i++ )
    { 
        for ( int j = 0; j < outputs_Num; j++)
        { cout << data_outputs2[i][j] << "\t"; }

        //counter4++;
        cout << endl;

    }

    //cout << "counter 4: " << counter4;

    //________________________________________________________________________________________________________
    //________________________________________________________________________________________________________



    //Create some patterns ( for train )
    float** pattern = new float*[Inst_Num_train]; 
    for(int i = 0; i < Inst_Num_train; ++i)
    { pattern[i] = new float[Att_Num - outputs_Num]; }

    pattern = data_inputs;

    //Desired output values ( for train )
    float** desiredout = new float*[Inst_Num_train]; 
    for(int i = 0; i < Inst_Num_train; ++i)
    { desiredout[i] = new float[Att_Num - inputs_Num]; }

    desiredout = data_outputs;


    //=====================================================


    //Create some patterns ( for test )
    float** pattern2 = new float*[Inst_Num_test]; 
    for(int i = 0; i < Inst_Num_test; ++i)
    { pattern2[i] = new float[Att_Num - outputs_Num]; }

    pattern2 = data_inputs2;

    //Desired output values ( for test )
    float** desiredout2 = new float*[Inst_Num_test]; 
    for(int i = 0; i < Inst_Num_test; ++i)
    { desiredout2[i] = new float[Att_Num - inputs_Num]; }

    desiredout2 = data_outputs2;

//-------------------------------------------------------------------   


    bpnet net;//Our neural network object
    int i,j;
    float error; //save the error value.

    //define the hidden layers and the number of neurons in each layer.
    int hiddenlayerNeuronCount [HIDDEN_LAYER_COUNT] = {HIDDEN_LAYERS};
    int hiddenlayercount = HIDDEN_LAYER_COUNT;


    //We create the network
    //net.create(PATTERN_SIZE,NETWORK_INPUTNEURONS,NETWORK_OUTPUT,HIDDEN_LAYERS,HIDDEN_LAYERS);
    net.create(Att_Num,inputs_Num,outputs_Num,hiddenlayerNeuronCount,hiddenlayercount);


    //Start the neural network training
    for( i = 0 ; i < EPOCHS_NUM ; i++ )
    {
        error = 0 ;//make the value of error zero.

        for( j = 0 ; j < Inst_Num_train ; j++ )
        {
            error += net.train(desiredout[j], pattern[j], 0.2f, 0.1f);
        }

        error /= Inst_Num_train;

        //display error
        cout << "ERROR:" << error << "\r";

    }


    //once trained test all patterns

    for( i = 0 ; i < Inst_Num_test ; i++ )
    {

        net.propagate( pattern2[i] );

    //display result
        cout << "TESTED PATTERN " << i+1 << " DESIRED OUTPUT: " << *desiredout2[i] << " NET RESULT: "<< net.getOutput().neurons[0]->output << endl;
    }




    system("pause");
    return 0;
}

测试算法的文件:

Balance_Scale.txt

Here

输出节目给我:

Output

Output

Output

如图所示,所有 125 测试模式都有相同的错误

0 个答案:

没有答案