Question

所以我正在开发一个backprop神经网络的实现：我做了这个'NEURON'课程，就像神经网络中的每个初学者一样。

但是，我得到了奇怪的结果：你看，当数据集很小时（比如XOR函数的情况，数据集只能有4种可能的排列（00,11,01,10）），输出神经元给我非常接近的结果，无论多少次训练迭代（epoch）发生。例：1 XOR 1给我0.987,1 XOR 0给我0.986，它们不应该相距很远吗？

这是类代码，以防：

#pragma once

#include <vector>
#include <iostream>
#include "Math.h"
#include "RandomizationUtils.h"

using namespace std;

class ClNeuron
{
    public:
    enum NEURON_TYPE { NEURON_TYPE_INPUT=1,NEURON_TYPE_HIDDEN=2,NEURON_TYPE_OUTPUT=3 };

    private:

    static const int CONST_DEFAULT_INPUT_NUMBER_PER_NEURON = 20;
    static const double CONST_DEFAULT_MOMENTUM_VALUE = 0.4;

    //Connection between 2 neurons
    struct NEURON_CONNECTION
    {
        double m_weight;
        double m_data;
        //Last modification done to the weight
        double m_weight_last_delta;
        double m_momentum_value;
        ClNeuron* m_source_neuron;
        ClNeuron* m_target_neuron;
    };

    //Initialization function
    void Init(unsigned long p_uid,NEURON_TYPE p_type);

    bool m_initialized;
    //All of the output connection of this neuron 
    vector<NEURON_CONNECTION*> m_output_connections;
    //Al of the input connection of this neuron
    vector<NEURON_CONNECTION*> m_input_connections;
    //Tmp internal result buffer (containing all weights multiplicated by their inputs)
    double m_result_buffer;
    //special weight that always has an input of 1.0
    NEURON_CONNECTION m_bias;

    public:

    //the type of this neuron
    NEURON_TYPE m_type;

    ClNeuron(NEURON_TYPE p_type);
    ClNeuron(unsigned long p_uid,NEURON_TYPE p_type);
    ClNeuron(unsigned long p_uid);
    ClNeuron();
    //Connect this neuron's output to another / others neurons' input
    bool AddOutputConnection(ClNeuron* p_neuron);

    //This neuron got a request to have a new input
    NEURON_CONNECTION* InputConnectionRequest(ClNeuron* p_source_neuron);

    //Tell the neuron to fire the sum of the processed inputs
    double Fire();

    //Tell the neuron to fire a particular data
    double Fire(double p_data);

    //Function updating all of the current neuron's weight of the OUTPUT connections , depending on an error ratio
    void UpdateWeights(double p_wanted_output);

    //Sum all the weight * their respective inputs into an internal buffer
    void ProcessInputs();

    //Print neuron & connections & weights
    void PrintNeuronData();

    //Unique ID of this neuron
    unsigned long m_uid;

    //This neuron's calculated error_delta
    double m_error_gradient;
};

ClNeuron::NEURON_CONNECTION* ClNeuron::InputConnectionRequest(ClNeuron* p_neuron)
{
    NEURON_CONNECTION* connection = new NEURON_CONNECTION;
    if(!connection)
    {
    cout << "Error creating new connection, memory full ?" << endl << flush;
    return NULL;
    } 
    connection->m_weight = GetRandomDouble(-1,1); 
    connection->m_data = 0;
    connection->m_momentum_value = CONST_DEFAULT_MOMENTUM_VALUE;
    connection->m_source_neuron = p_neuron;
    connection->m_target_neuron = this;

    m_input_connections.push_back(connection); 
    return connection;
}

bool ClNeuron::AddOutputConnection(ClNeuron* p_neuron)
{
    //If the remote neuron accept the us as a new input, then we add it to output list
    NEURON_CONNECTION* connection = p_neuron->InputConnectionRequest(this);
    if(!connection)   
    {
    return false;
    }

    m_output_connections.push_back(connection);    
    return true;
}

double ClNeuron::Fire()
{
    return Fire(m_result_buffer);
}

double ClNeuron::Fire(double p_data)
{
    if(m_output_connections.size()==0)
    {
    cout << "Final neuron " << m_uid << " return " << p_data << endl;
    return p_data;
    }
    for(unsigned long i=0;i<m_output_connections.size();i++)
    {
    m_output_connections[i]->m_data = p_data;       
    }

    return 1;
}

void ClNeuron::ProcessInputs()
{
    m_result_buffer = 0;
    for(unsigned long i=0;i<m_input_connections.size();i++)
    {
    m_result_buffer += m_input_connections[i]->m_weight * m_input_connections[i]->m_data; 
    }    

    m_result_buffer += m_bias.m_weight ;

    //sigmoid the sum
    m_result_buffer = Sigmoid(m_result_buffer);
}

void ClNeuron::UpdateWeights(double p_wanted_output)
{ 
    //Update weights from neuron to all of its inputs NOTE : p_wanted_output is the output of THIS neuron (in case their is many output neuron in the network)
    if(m_type == NEURON_TYPE_OUTPUT)
    {
    m_error_gradient = (p_wanted_output - m_result_buffer) * SigmoidDerivative(m_result_buffer);

    //Adjust the bias of this neuron
    double weight_delta = 1 * m_error_gradient * 1  ;
    double momentum = m_bias.m_weight_last_delta * m_bias.m_momentum_value;
    m_bias.m_weight += weight_delta + momentum;
    m_bias.m_weight_last_delta = weight_delta;
    }

    else if(m_type == NEURON_TYPE_HIDDEN)
    {
    double error_deriative = SigmoidDerivative(m_result_buffer);

    double tmpBuffer = 0.00;
    for(unsigned long i=0;i<m_output_connections.size();i++)
    {
        tmpBuffer += (m_output_connections[i]->m_target_neuron->m_error_gradient * m_output_connections[i]->m_weight);
    }
    m_error_gradient = error_deriative * tmpBuffer;


    //Adjust the weights for this neuron's OUTPUT connections
    for(unsigned long i=0;i<m_output_connections.size();i++)
    {
        double weight_delta = 1 * m_output_connections[i]->m_target_neuron->m_error_gradient * m_result_buffer  ;
        double momentum = m_output_connections[i]->m_weight_last_delta * m_output_connections[i]->m_momentum_value;
        m_output_connections[i]->m_weight += weight_delta + momentum;
        m_output_connections[i]->m_weight_last_delta = weight_delta;
    }

    //Adjust the bias of this neuron
    double weight_delta = 1 * m_error_gradient * 1  ;
    double momentum = m_bias.m_weight_last_delta * m_bias.m_momentum_value;
    m_bias.m_weight += weight_delta + momentum;
    m_bias.m_weight_last_delta = weight_delta;
    }

    if(m_type == NEURON_TYPE_INPUT)
    { 
    //Adjust the weights for this neuron's OUTPUT connections
    for(unsigned long i=0;i<m_output_connections.size();i++)
    {
        double weight_delta = 1 * m_output_connections[i]->m_target_neuron->m_error_gradient * m_result_buffer  ;
        double momentum = m_output_connections[i]->m_weight_last_delta * m_output_connections[i]->m_momentum_value;
        m_output_connections[i]->m_weight += weight_delta + momentum;
        m_output_connections[i]->m_weight_last_delta = weight_delta;
    }
    }
}

void ClNeuron::PrintNeuronData()
{
    cout << endl << "========================================" << endl;
    cout << "Neuron #" << m_uid << " has " << m_input_connections.size() << " input connection" << endl << endl;

    for(unsigned long i=0;i<m_input_connections.size();i++)
    {
    cout << "----> " << "conn." << i << " | Src ID: " << m_input_connections[i]->m_source_neuron->m_uid << " | W: "<< m_input_connections[i]->m_weight << " | D: "<< m_input_connections[i]->m_data << " | RB : " << m_result_buffer << " | EF: " << endl;   
    }

    cout << "Neuron #" << m_uid << " has " << m_output_connections.size() << " output connection" << endl << endl;

    for(unsigned long i=0;i<m_output_connections.size();i++)
    {
    cout << "----> " << "conn." << i << " | Dst ID: " << m_output_connections[i]->m_target_neuron->m_uid << " | W: "<< m_output_connections[i]->m_weight << " | D: "<< m_output_connections[i]->m_data << " | RB : " << m_result_buffer << " | EF: " << endl;   
    }    

    cout << endl << "========================================" << endl;
}

void ClNeuron::Init(unsigned long p_uid,NEURON_TYPE p_type)
{
    m_initialized = false;
    m_output_connections.clear();    
    m_input_connections.clear();
    m_input_connections.reserve(CONST_DEFAULT_INPUT_NUMBER_PER_NEURON);

    m_type = p_type;
    m_uid = rand() % RAND_MAX;
    m_result_buffer = 0;

    m_bias.m_weight = GetRandomDouble(-1,1); 
    m_bias.m_data = 0;
    m_bias.m_momentum_value = CONST_DEFAULT_MOMENTUM_VALUE;
    m_bias.m_source_neuron = NULL;
    m_bias.m_target_neuron = this;

    m_initialized = true;
}

ClNeuron::ClNeuron(unsigned long p_uid,NEURON_TYPE p_type)
{
    Init(p_uid,p_type);
}

ClNeuron::ClNeuron(NEURON_TYPE p_type)
{
    Init(0,p_type);
}

ClNeuron::ClNeuron(unsigned long p_uid)
{
    Init(p_uid,NEURON_TYPE_HIDDEN);
}

ClNeuron::ClNeuron()
{
    Init(0,NEURON_TYPE_HIDDEN);
}

Answer 1

问题是每个神经元的BIAS重量值：

更确切地说，偏差的误差梯度始终为0（导致weight_delta为0），最终导致偏差不更新其输出权重。

神经网络 - 小数据集的无关紧要的输出数据

1 个答案: