我已经创建了以下应该根据反向传播进行学习的NN。
我通过大量的阅读和一系列不同的教程将它结合在一起。
为了测试,我试过给它一个XOR问题。每个数据集是2个输入和2个输出。这两个输入都是1
或0
,两个输出应指示是否应输出0
(第一个输出)或输出1
(第二个输出)。
当我提供以下数据时会发生什么:
___________________________________________________________________________
| Input 1 | Input 2 | Expected 1 | Expected 2 | NN Output 1 | NN Output 2 |
|-------------------------------------------------------------------------|
| 0 | 1 | 1 | 0 | 0.49 | 0.50 |
| 1 | 0 | 1 | 0 | 0.98 | 0.01 |
| 1 | 1 | 0 | 1 | 0.01 | 0.98 |
| 0 | 0 | 0 | 1 | 0.49 | 0.50 |
---------------------------------------------------------------------------
上面有希望清楚的是,给出了两个问题;它有点工作,假设有一个误差范围,在答案的0.01之内是相当不错的。
但对于其他两个答案,它已经过时了。当然可以正确地执行步进功能,但它基本上说它有50/50的分割。
这是100,000个纪元,学习率为0.03
,您在上面看到的是实际的训练数据。
如果我将学习率提高到0.9
;结果不同但也让我提问:
___________________________________________________________________________
| Input 1 | Input 2 | Expected 1 | Expected 2 | NN Output 1 | NN Output 2 |
|-------------------------------------------------------------------------|
| 0 | 1 | 1 | 0 | 0.99 | 0.00 |
| 1 | 0 | 1 | 0 | 0.99 | 0.00 |
| 1 | 1 | 0 | 1 | 0.49 | 0.99 |
| 0 | 0 | 0 | 1 | 0.00 | 0.99 |
---------------------------------------------------------------------------
好多了;但是1,1
输入仍然是奇怪的输出。
我的代码相当简短,如下所示。这是完整的代码:
#include <iostream>
#include <array>
#include <random>
#include <vector>
class RandomGenerator
{
public:
RandomGenerator(const double min, const double max)
:
m_ran(),
m_twister(m_ran()),
m_distrib(min,max)
{
}
double operator()(void) { return m_distrib(m_twister); }
private:
std::random_device m_ran;
std::mt19937_64 m_twister;
std::uniform_real_distribution<double> m_distrib;
} randGen(-2,2);
double sigmoid(const double x)
{
return 1.0 / (1.0 + std::exp(-x));
}
double softplus(const double x)
{
return std::log(1.0 + std::exp(x));
}
double step(const double x)
{
return x > 0 ? 1 : 0;
}
template<int NumInputs, double(*ActivationFunction)(const double)>
class Neuron
{
public:
void SetInput(const std::size_t index, const double value)
{
m_inputsAndWeights[index].value = value;
}
double GetInput(const std::size_t index) const { return m_inputsAndWeights[index].value; }
void SetWeight(const std::size_t index, const double weight)
{
m_inputsAndWeights[index].weight = weight;
}
double GetWeight(const std::size_t index) const { return m_inputsAndWeights[index].weight; }
void SetBiasWeight(const double weight) { m_biasWeight = weight; }
double GetBiasWeight() const { return m_biasWeight; }
double GetOutput() const
{
double output = 0;
for(const auto& p : m_inputsAndWeights)
output += p.value * p.weight;
output += 1.0 * m_biasWeight;
return ActivationFunction(output);
}
private:
struct DataPair
{
double value;
double weight;
};
std::array<DataPair,NumInputs> m_inputsAndWeights;
double m_biasWeight;
};
template<std::size_t NumInputs, std::size_t NumOutputs>
class NeuralNetwork
{
public:
static constexpr NumHidden() { return (NumInputs+NumOutputs) / 2; }
SetInputs(std::array<double,NumInputs> inputData)
{
for(auto& i : m_hiddenNeurons)
{
for(auto index = 0; index < inputData.size(); ++index)
i.SetInput(index,inputData[index]);
}
}
std::array<double,NumOutputs> GetOutputs() const
{
std::array<double,NumOutputs> outputs;
for(auto i = 0; i < NumOutputs; ++i)
{
outputs[i] = m_outputNeurons[i].GetOutput();
}
return outputs;
}
void PassForward(std::array<double,NumInputs> inputData)
{
SetInputs(inputData);
for(auto i = 0; i < NumHidden(); ++i)
{
for(auto& o : m_outputNeurons)
{
o.SetInput(i,m_hiddenNeurons[i].GetOutput());
}
}
}
void Train(std::vector<std::array<double,NumInputs>> trainingData,
std::vector<std::array<double,NumOutputs>> targetData,
double learningRate, std::size_t numEpochs)
{
for(auto& h : m_hiddenNeurons)
{
for(auto i = 0; i < NumInputs; ++i)
h.SetWeight(i,randGen());
h.SetBiasWeight(randGen());
}
for(auto& o : m_outputNeurons)
{
for(auto h = 0; h < NumHidden(); ++h)
o.SetWeight(h,randGen());
o.SetBiasWeight(randGen());
}
for(std::size_t e = 0; e < numEpochs; ++e)
{
for(std::size_t dataIndex = 0; dataIndex < trainingData.size(); ++dataIndex)
{
PassForward(trainingData[dataIndex]);
std::array<double,NumHidden()+1> deltaHidden;
std::array<double,NumOutputs> deltaOutput;
for(auto i = 0; i < NumOutputs; ++i)
{
auto output = m_outputNeurons[i].GetOutput();
deltaOutput[i] = output * (1.0 - output) * (targetData[dataIndex][i] - output);
}
for(auto i = 0; i < NumHidden(); ++i)
{
double error = 0;
for(auto j = 0; j < NumOutputs; ++j)
{
error += m_outputNeurons[j].GetWeight(i) * deltaOutput[j];
}
auto output = m_hiddenNeurons[i].GetOutput();
deltaHidden[i] = output * (1.0 - output) * error;
}
for(auto i = 0; i < NumOutputs; ++i)
{
for(auto j = 0; j < NumHidden(); ++j)
{
auto currentWeight = m_outputNeurons[i].GetWeight(j);
m_outputNeurons[i].SetWeight(j,currentWeight + learningRate * deltaOutput[i] * m_hiddenNeurons[j].GetOutput());
}
auto currentWeight = m_outputNeurons[i].GetBiasWeight();
m_outputNeurons[i].SetBiasWeight(currentWeight + learningRate * deltaOutput[i] * (1.0*currentWeight));
}
for(auto i = 0; i < NumHidden(); ++i)
{
for(auto j = 0; j < NumInputs; ++j)
{
auto currentWeight = m_hiddenNeurons[i].GetWeight(j);
m_hiddenNeurons[i].SetWeight(j,currentWeight + learningRate * deltaHidden[i] * m_hiddenNeurons[i].GetInput(j));
}
auto currentWeight = m_hiddenNeurons[i].GetBiasWeight();
m_hiddenNeurons[i].SetBiasWeight(currentWeight + learningRate * deltaHidden[i] * (1.0*currentWeight));
}
}
}
}
private:
std::array<Neuron<NumInputs,sigmoid>,NumHidden()> m_hiddenNeurons;
std::array<Neuron<NumHidden(),sigmoid>,NumOutputs> m_outputNeurons;
};
int main()
{
NeuralNetwork<2,2> NN;
std::vector<std::array<double,2>> trainingData = {{{0,1},{1,0},{1,1},{0,0}}};
std::vector<std::array<double,2>> targetData = {{{1,0},{1,0},{0,1},{0,1}}};
NN.Train(trainingData,targetData,0.03,100000);
for(auto i = 0; i < trainingData.size(); ++i)
{
NN.PassForward(trainingData[i]);
auto outputs = NN.GetOutputs();
for(auto o = 0; o < outputs.size(); ++o)
{
std::cout << "Out " << o << ":\t" << outputs[o] << std::endl;
}
}
return 0;
}
答案 0 :(得分:1)
我几天前做过同样的事情,我可以告诉你,如果你遇到一些不幸的重量初始化,那么反向传播的10万次迭代是不够的。不要随意初始化你的体重,对于大体重,S形体很容易陷入饱和状态,另一方面,0体重也不会有任何帮助。我已经初始化了矿权+/-(0.3,0.7),并且收敛得到了显着改善。