Question

所以我写了一个简单的Perceptron模型，当我完成代码并看到没有错误时，我非常惊讶。但似乎我的模型没有收敛（以及其他一些奇怪的东西）。

基本上它在每个时代都能获得25/100个样本。当每个纪元结束时，权重总是回到0。

由于代码位于多个文件上，我将它放在Google云端硬盘上，它是： https://drive.google.com/folderview?id=0B_r3mf9HbUrLaDNlc1F6RXhNMnM&usp=sharing

这是一个Visual Studio Community 2013项目。您可以打开并运行它，以便您获得更好的想法。

这里是文件的快速预览。

main.cpp中：

#include <iostream>
#include <vector>
#include <algorithm>
#include <fstream>
#include <string>
#include <math.h> 

#include "LinearAlgebra.h"
#include "MachineLearning.h"

using namespace std;
using namespace LinearAlgebra;
using namespace MachineLearning;

void printVector(vector< vector<float> > X);
vector< vector<float> > getIrisX();
vector<float> getIrisy();

int main()
{
    vector< vector<float> > X = getIrisX();
    vector<float> y = getIrisy();
    vector<float> test1;
    test1.push_back(5.0);
    test1.push_back(3.3);
    test1.push_back(1.4);
    test1.push_back(0.2);

    vector<float> test2;
    test2.push_back(6.0);
    test2.push_back(2.2);
    test2.push_back(5.0);
    test2.push_back(1.5);

    //printVector(X);
    //for (int i = 0; i < y.size(); i++){ cout << y[i] << " "; }cout << endl;

    perceptron clf(0.1, 10);
    clf.fit(X, y);
    cout << "Now Predicting: 5.0,3.3,1.4,0.2(CorrectClass=1,Iris-setosa) -> " << clf.predict(test1) << endl;
    cout << "Now Predicting: 6.0,2.2,5.0,1.5(CorrectClass=-1,Iris-virginica) -> " << clf.predict(test2) << endl;

    system("PAUSE");
    return 0;
}

void printVector(vector< vector<float> > X)
{
    for (int i = 0; i < X.size(); i++)
    {
        for (int j = 0; j < X[i].size(); j++)
        {
            cout << X[i][j] << " ";
        }
        cout << endl;
    }
}

vector<float> getIrisy()
{
    vector<float> y;

    ifstream inFile;
    inFile.open("y.data");
    string sampleClass;
    for (int i = 0; i < 100; i++)
    {
        inFile >> sampleClass;
        if (sampleClass == "Iris-setosa")
        {
            y.push_back(1);
        }
        else
        {
            y.push_back(-1);
        }
    }

    return y;
}

vector< vector<float> > getIrisX()
{
    ifstream af;
    ifstream bf;
    ifstream cf;
    ifstream df;
    af.open("a.data");
    bf.open("b.data");
    cf.open("c.data");
    df.open("d.data");

    vector< vector<float> > X;

    for (int i = 0; i < 100; i++)
    {
        char scrap;
        int scrapN;
        af >> scrapN;
        bf >> scrapN;
        cf >> scrapN;
        df >> scrapN;

        af >> scrap;
        bf >> scrap;
        cf >> scrap;
        df >> scrap;
        float a, b, c, d;
        af >> a;
        bf >> b;
        cf >> c;
        df >> d;
        X.push_back(vector < float > {a, b, c, d});
    }

    af.close();
    bf.close();
    cf.close();
    df.close();

    return X;
}

MachineLearning.h：

#pragma once
#include<vector>

using namespace std;

namespace MachineLearning{

    class perceptron
    {
    public:
        perceptron(float eta,int epochs);
        float netInput(vector<float> X);
        int predict(vector<float> X);
        void fit(vector< vector<float> > X, vector<float> y);
    private:
        float m_eta;
        int m_epochs;
        vector < float > m_w;
        vector < float > m_errors;
    };

}

MachineLearning.cpp

#include<vector>
#include <algorithm>
#include <iostream>
#include<fstream>
#include <math.h> 

#include "MachineLearning.h"

using namespace std;

namespace MachineLearning{

    perceptron::perceptron(float eta, int epochs)
    {
        m_epochs = epochs;
        m_eta = eta;
    }

    void perceptron::fit(vector< vector<float> > X, vector<float> y)
    {
        for (int i = 0; i < X[0].size() + 1; i++) // X[0].size() + 1 -> I am using +1 to add the bias term
        {
            m_w.push_back(0);
        }
        for (int i = 0; i < m_epochs; i++)
        {
            int errors = 0;
            for (int j = 0; j < X.size(); j++)
            {
                float update = m_eta * (y[j] - predict(X[j]));
                m_w[0] = update;
                for (int w = 1; w < m_w.size(); w++){ m_w[w] = update * X[j][w - 1]; }
                errors += update != 0 ? 1 : 0;
            }
            m_errors.push_back(errors);
        }
    }

    float perceptron::netInput(vector<float> X)
    {
        // Sum(Vector of weights * Input vector) + bias
        float probabilities = m_w[0];
        for (int i = 0; i < X.size(); i++)
        {
            probabilities += X[i] * m_w[i + 1];
        }
        return probabilities;
    }

    int perceptron::predict(vector<float> X)
    {
        return netInput(X) > 0 ? 1 : -1; //Step Function
    }

}

非常感谢任何形式的帮助。

提前致谢。 Panos P。

Answer 1

经过数小时的繁琐调试后，我终于找到了错误。当我更新权重时，我的代码中有一个错误。

for (int j = 0; j < X.size(); j++)
        {
            float update = m_eta * (y[j] - predict(X[j]));
            m_w[0] = update;
            for (int w = 1; w < m_w.size(); w++){ m_w[w] = update * X[j][w - 1]; }
            errors += update != 0 ? 1 : 0;
        }

注意：

m_w[w] = update * X[j][w - 1]

我将权重设置为等于更新。看起来我忘记了＆＃34; +＆＃34;标志。现在它工作正常。

现在是：

m_w[w] += update * X[j][w - 1]

有时候最愚蠢的错误会导致最烦人的错误。我希望这可以帮助任何人犯同样的错误。

C ++中的感知器模型没有收敛

1 个答案: