Question

我已经创建了以下应该根据反向传播进行学习的NN。

我通过大量的阅读和一系列不同的教程将它结合在一起。

为了测试，我试过给它一个XOR问题。每个数据集是2个输入和2个输出。这两个输入都是1或0，两个输出应指示是否应输出0（第一个输出）或输出1 （第二个输出）。

当我提供以下数据时会发生什么：

___________________________________________________________________________
| Input 1 | Input 2 | Expected 1 | Expected 2 | NN Output 1 | NN Output 2 |
|-------------------------------------------------------------------------|
|    0    |    1    |     1      |     0      |    0.49     |    0.50     |
|    1    |    0    |     1      |     0      |    0.98     |    0.01     |
|    1    |    1    |     0      |     1      |    0.01     |    0.98     |
|    0    |    0    |     0      |     1      |    0.49     |    0.50     |
---------------------------------------------------------------------------

上面有希望清楚的是，给出了两个问题;它有点工作，假设有一个误差范围，在答案的0.01之内是相当不错的。

但对于其他两个答案，它已经过时了。当然可以正确地执行步进功能，但它基本上说它有50/50的分割。

这是100,000个纪元，学习率为0.03，您在上面看到的是实际的训练数据。

如果我将学习率提高到0.9;结果不同但也让我提问：

___________________________________________________________________________
| Input 1 | Input 2 | Expected 1 | Expected 2 | NN Output 1 | NN Output 2 |
|-------------------------------------------------------------------------|
|    0    |    1    |     1      |     0      |    0.99     |    0.00     |
|    1    |    0    |     1      |     0      |    0.99     |    0.00     |
|    1    |    1    |     0      |     1      |    0.49     |    0.99     |
|    0    |    0    |     0      |     1      |    0.00     |    0.99     |
---------------------------------------------------------------------------

好多了;但是1,1输入仍然是奇怪的输出。

我的代码相当简短，如下所示。这是完整的代码：

#include <iostream>
#include <array>
#include <random>
#include <vector>

class RandomGenerator
{
public:
    RandomGenerator(const double min, const double max)
    :
        m_ran(),
        m_twister(m_ran()),
        m_distrib(min,max)
    {

    }

    double operator()(void) { return m_distrib(m_twister); }

private:
    std::random_device                      m_ran;
    std::mt19937_64                         m_twister;
    std::uniform_real_distribution<double>  m_distrib;
} randGen(-2,2);

double sigmoid(const double x)
{
    return 1.0 / (1.0 + std::exp(-x));
}

double softplus(const double x)
{
    return std::log(1.0 + std::exp(x));
}

double step(const double x)
{
    return x > 0 ? 1 : 0;
}

template<int NumInputs, double(*ActivationFunction)(const double)>
class Neuron
{
public:

    void SetInput(const std::size_t index, const double value)
    {
        m_inputsAndWeights[index].value = value;
    }

    double GetInput(const std::size_t index) const { return m_inputsAndWeights[index].value; }

    void SetWeight(const std::size_t index, const double weight)
    {
        m_inputsAndWeights[index].weight = weight;
    }

    double GetWeight(const std::size_t index) const { return m_inputsAndWeights[index].weight; }

    void SetBiasWeight(const double weight) { m_biasWeight = weight; }
    double GetBiasWeight() const { return m_biasWeight; }

    double GetOutput() const
    {
        double output = 0;
        for(const auto& p : m_inputsAndWeights)
            output += p.value * p.weight;
        output += 1.0 * m_biasWeight;
        return ActivationFunction(output);
    }

private:
    struct DataPair
    {
        double value;
        double weight;
    };

    std::array<DataPair,NumInputs> m_inputsAndWeights;
    double m_biasWeight;
};

template<std::size_t NumInputs, std::size_t NumOutputs>
class NeuralNetwork
{
public:
    static constexpr NumHidden() { return (NumInputs+NumOutputs) / 2; }

    SetInputs(std::array<double,NumInputs> inputData)
    {
        for(auto& i : m_hiddenNeurons)
        {
            for(auto index = 0; index < inputData.size(); ++index)
                i.SetInput(index,inputData[index]);
        }
    }

    std::array<double,NumOutputs> GetOutputs() const
    {
        std::array<double,NumOutputs> outputs;
        for(auto i = 0; i < NumOutputs; ++i)
        {
            outputs[i] = m_outputNeurons[i].GetOutput();
        }
        return outputs;
    }

    void PassForward(std::array<double,NumInputs> inputData)
    {
        SetInputs(inputData);
        for(auto i = 0; i < NumHidden(); ++i)
        {
            for(auto& o : m_outputNeurons)
            {
                o.SetInput(i,m_hiddenNeurons[i].GetOutput());
            }
        }
    }

    void Train(std::vector<std::array<double,NumInputs>> trainingData,
               std::vector<std::array<double,NumOutputs>> targetData,
               double learningRate, std::size_t numEpochs)
    {
        for(auto& h : m_hiddenNeurons)
        {
            for(auto i = 0; i < NumInputs; ++i)
                h.SetWeight(i,randGen());
            h.SetBiasWeight(randGen());
        }
        for(auto& o : m_outputNeurons)
        {
            for(auto h = 0; h < NumHidden(); ++h)
                o.SetWeight(h,randGen());
            o.SetBiasWeight(randGen());
        }

        for(std::size_t e = 0; e < numEpochs; ++e)
        {
            for(std::size_t dataIndex = 0; dataIndex < trainingData.size(); ++dataIndex)
            {
                PassForward(trainingData[dataIndex]);

                std::array<double,NumHidden()+1> deltaHidden;
                std::array<double,NumOutputs> deltaOutput;

                for(auto i = 0; i < NumOutputs; ++i)
                {
                    auto output = m_outputNeurons[i].GetOutput();
                    deltaOutput[i] = output * (1.0 - output) * (targetData[dataIndex][i] - output);
                }
                for(auto i = 0; i < NumHidden(); ++i)
                {
                    double error = 0;
                    for(auto j = 0; j < NumOutputs; ++j)
                    {
                        error += m_outputNeurons[j].GetWeight(i) * deltaOutput[j];
                    }
                    auto output = m_hiddenNeurons[i].GetOutput();
                    deltaHidden[i] = output * (1.0 - output) * error;
                }
                for(auto i = 0; i < NumOutputs; ++i)
                {
                    for(auto j = 0; j < NumHidden(); ++j)
                    {
                        auto currentWeight = m_outputNeurons[i].GetWeight(j);
                        m_outputNeurons[i].SetWeight(j,currentWeight + learningRate * deltaOutput[i] * m_hiddenNeurons[j].GetOutput());
                    }
                    auto currentWeight = m_outputNeurons[i].GetBiasWeight();
                    m_outputNeurons[i].SetBiasWeight(currentWeight + learningRate * deltaOutput[i] * (1.0*currentWeight));
                }
                for(auto i = 0; i < NumHidden(); ++i)
                {
                    for(auto j = 0; j < NumInputs; ++j)
                    {
                        auto currentWeight = m_hiddenNeurons[i].GetWeight(j);
                        m_hiddenNeurons[i].SetWeight(j,currentWeight + learningRate * deltaHidden[i] * m_hiddenNeurons[i].GetInput(j));
                    }
                    auto currentWeight = m_hiddenNeurons[i].GetBiasWeight();
                    m_hiddenNeurons[i].SetBiasWeight(currentWeight + learningRate * deltaHidden[i] * (1.0*currentWeight));
                }
            }
        }
    }

private:
    std::array<Neuron<NumInputs,sigmoid>,NumHidden()> m_hiddenNeurons;
    std::array<Neuron<NumHidden(),sigmoid>,NumOutputs> m_outputNeurons;
};

int main()
{

    NeuralNetwork<2,2> NN;

    std::vector<std::array<double,2>> trainingData = {{{0,1},{1,0},{1,1},{0,0}}};
    std::vector<std::array<double,2>> targetData = {{{1,0},{1,0},{0,1},{0,1}}};

    NN.Train(trainingData,targetData,0.03,100000);

    for(auto i = 0; i < trainingData.size(); ++i)
    {
        NN.PassForward(trainingData[i]);
        auto outputs = NN.GetOutputs();
        for(auto o = 0; o < outputs.size(); ++o)
        {
            std::cout << "Out " << o << ":\t" << outputs[o] << std::endl;
        }
    }

    return 0;
}

Answer 1

我几天前做过同样的事情，我可以告诉你，如果你遇到一些不幸的重量初始化，那么反向传播的10万次迭代是不够的。不要随意初始化你的体重，对于大体重，S形体很容易陷入饱和状态，另一方面，0体重也不会有任何帮助。我已经初始化了矿权+/-（0.3,0.7），并且收敛得到了显着改善。

我的神经网络只学习一些数据集

1 个答案: