Question

在 The Coding Train awesome video 之后，我使用感知器模型在 XOR 门上训练神经网络。

我有 2 个输入和 1 个输出。

我的大部分代码与视频非常相似，除了我使用了不同的异或门数据集。

我在训练模型后遇到了问题，即使经过 10 万个训练数据，它也没有猜出正确答案，我不知道为什么。

这是我的完整代码：

感知器.h

#include <stdio.h>      /* printf, scanf, puts, NULL */
#include <stdlib.h>     /* srand, rand */
#include <time.h>       /* time */
#include <vector>

using std::vector;

class Perceptron {
private:
    vector<float> weights;
    float lr = 0.15;
public:
    Perceptron() {
        // initialize the weights randomly 
        srand(time(NULL));
        for (int i = 0; i < 2; i++)
        {
            int x = -1 + rand() % (3);
            while (x == 0)
                x = -1 + rand() % (3);
            weights.push_back(x);
        }
    }
    int sign(float n) // activation function
    {
        if (n >= 0)
            return 1;
        else
            return 0;
    }
    int guess(vector<float> inputs)
    {
        float sum = 0;
        for (int i = 0; i < weights.size(); i++)
        {
            sum += inputs[i] * weights[i];
        }

        int output = sign(sum);
        return output;
    }
    void train(vector<float> inputs, int target)
    {
        int guess1 = guess(inputs);
        int error = target - guess1;
        for (int i = 0; i < weights.size(); i++) {
            weights[i] += error * inputs[i] * lr;
        }
    }
};

训练.h

#include <stdio.h>      /* printf, scanf, puts, NULL */
#include <stdlib.h>     /* srand, rand */
#include <time.h>       /* time */
#include <iostream>  
#include <vector>

using std::vector;

class XOR {
private:
    float x1;
    float x2;
    float label;

public:
    XOR() {

        x1 = rand() % (2);
        x2 = rand() % (2);

        if (x1 == 0 && x2 == 0)
            label = 0;
        else if (x1 == 0 && x2 == 1)
            label = 1;
        else if (x1 == 1 && x2 == 0)
            label = 1;
        else
            label = 0;
    }

    float getX1(){ return x1; };
    float getX2() { return x2; };
    float getLabel() { return label; };

    vector<float> getInputs() {
        return vector<float> {x1, x2};
    }
    float getTarget() {
        return label;
    }
    
};

main.cpp

#include "Perceptron.h"
#include <iostream>
#include "Training.h"

using std::cout;
using std::endl;

int main()
{
    Perceptron brain;
    vector<XOR> trainingData(100);
    for (int i = 0; i < 100; i++)
    {
        brain.train(trainingData[i].getInputs(), trainingData[i].getTarget());
    }
    
    vector<float> inputs = { 0,0 };
    vector<float> inputs2 = { 0,1 };
    vector<float> inputs3 = { 1,0 };
    vector<float> inputs4 = { 1,1 };

    int guess1 = 0;

    guess1 = brain.guess(inputs);
    cout  << "guess: " << guess1 << endl;

    guess1 = brain.guess(inputs2);
    cout << "guess: " << guess1 << endl;

    guess1 = brain.guess(inputs3);
    cout << "guess: " << guess1 << endl;

    guess1 = brain.guess(inputs4);
    cout << "guess: " << guess1 << endl;

    return 0;
}

您可以在您的机器上运行此代码并自行测试，如果您注意到多次运行它会得到不同的输出，这更奇怪。

Answer 1

答案是使用值为 1 的附加偏置权重，以防输入为 0,0。

另外，我用来学习 XOR 操作的模型太基础了，因为它不适用于 2 个以上的场景。

训练感知器模型

1 个答案: