为什么我的涂鸦分类器无法正确解析我的数据?

时间:2018-05-31 09:01:28

标签: c++ data-structures neural-network

我为Doodle Classifier项目编写了一个Data类,我从Google Quick Draw Dataset中获取了3种不同涂鸦类型的二进制文件,将它们转换为三个单独的向量,通过连接每个向量然后随机化它来制作训练数据。我在类型数组中为每个类型分配了不同的索引。 添加类的代码下面:

Data.h =>

#pragma once
#pragma warning(disable:4996)

#include "ImageHandling.h"

#include <string>
#include <vector>
#include <fstream>
#include <random>
#include <iostream>

using namespace std;

class Data {
public:
    int num;
    Data(int);

    struct data {
        vector<double> vec;
        vector<double> type;
    };

    mt19937 gen;

    vector<Data::data> cactus_testing;
    vector<Data::data> skull_testing;
    vector<Data::data> guitar_testing;

    vector<Data::data> training;
    vector<Data::data> testing;

private:
    vector<Data::data> concatData(vector<Data::data>, vector<Data::data>, vector<Data::data>);
    vector<Data::data> randomize(vector<Data::data>);
    vector<double> binToVec(string add);
    // static vector<double> pngToVec(string add);

    void TrainginPrep();
    void TestingPrep();


    string _cactus;
    string _skull;
    string _guitar;

    vector<double> _cactus_data;
    vector<double> _skull_data;
    vector<double> _guitar_data;

    vector<Data::data> _cactus_training;
    vector<Data::data> _skull_training;
    vector<Data::data> _guitar_training;
};

Data.cpp =&gt;

#include "Data.h"

using namespace std;

Data::Data(int n = 10000)
{
    this->num = n;
    gen.seed(1);

    cout << "Loading Files...\n";
    if (this->num == 10000) {
        this->_cactus = "C:/Users/CHOPRA/Documents/Visual Studio 2015/Projects/DoodleClassifier/imgData/10kData/cactus10k.bin";
        this->_guitar = "C:/Users/CHOPRA/Documents/Visual Studio 2015/Projects/DoodleClassifier/imgData/10kData/guitar10k.bin";
        this->_skull  = "C:/Users/CHOPRA/Documents/Visual Studio 2015/Projects/DoodleClassifier/imgData/10kData/skull10k.bin";
    }
    else if (this->num == 1000) {
        this->_cactus = "C:/Users/CHOPRA/Documents/Visual Studio 2015/Projects/DoodleClassifier/imgData/1kData/cactus1k.bin";
        this->_guitar = "C:/Users/CHOPRA/Documents/Visual Studio 2015/Projects/DoodleClassifier/imgData/1kData/guitar1k.bin";
        this->_skull  = "C:/Users/CHOPRA/Documents/Visual Studio 2015/Projects/DoodleClassifier/imgData/1kData/skull1k.bin";
    }
    else {
        cout << "Invalid Number Entered!;
        exit(1);
    }

    cout << "All Files Loaded Succesfully!\n\n";

    cout << "Initiliazing File Parsing...\n";
    //Parsing the bin files to a vector
    this->_cactus_data = binToVec(this->_cactus);
    this->_guitar_data = binToVec(this->_guitar);
    this->_skull_data = binToVec(this->_skull);

    cout << "All Files Parsed Succesfully!\n\n";

    cout << "\nSize of Cactus Data: " << _cactus_data.size();
    cout << "\nSize of Guitar Data: " << _guitar_data.size();
    cout << "\nSize of Skull Data: " << _skull_data.size() << "\n\n";

    cout << "Initialzing Training Data Prep...\n";
    TrainginPrep();
    cout << "Training Data Prepared!\n";
    cout << "Size of Training Data = " << this->training.size() << "\n\n";


    cout << "Initialzing Testing Data Prep...\n";
    TestingPrep();
    cout << "Testing Data Prepared!\n\n";
    cout << "Size of Testing Data = " << this->testing.size() << "\n\n";

}

std::vector<double> Data::binToVec(string add)
{
    fstream FILE(add, ios::binary | ios::in);

    vector<char> buffer((istreambuf_iterator<char>(FILE)), (istreambuf_iterator<char>()));

    vector<double> buffer_d;

    for (int i = 0; i < buffer.size(); ++i) {
        buffer_d.push_back((double)buffer[i]);
    }

    return buffer_d;
}

vector<Data::data>  Data::concatData(vector<Data::data> a, vector<Data::data> b, vector<Data::data> c)
{
    vector<Data::data> res;

    res.insert(res.end(), a.begin(), a.end());
    res.insert(res.end(), b.begin(), b.end());
    res.insert(res.end(), c.begin(), c.end());

    return res;
}

vector<Data::data> Data::randomize(vector<Data::data> vec){
    for(int i = 0; i < (int)(vec.size() * 0.5); ++i){
        int n1 = gen() % vec.size();
        int n2 = gen() % vec.size();

        Data::data temp = vec[n1];
        vec[n1] = vec[n2];
        vec[n2] = temp;
    }
    return vec;
}

void Data::TrainginPrep()
{
    cout << "Trainining Data Prep Initilized...\n";
    int val = (int)(num * 0.8);

    //Resizing Vectors
    this->_cactus_training.resize(val);
    this->_guitar_training.resize(val);
    this->_skull_training.resize(val);

    // Preparing Indivdual Training Data Set
    for(int i = 0; i < val ; ++i ){
        this->_cactus_training[i].type =  {1, 0, 0};
        this->_guitar_training[i].type =  {0, 1, 0};
        this->_skull_training[i].type =  {0, 0, 1};

        for (int j = 0; j < 784; ++j) {
            int index = (i * 784) + j;
            this->_cactus_training[i].vec.push_back(this->_cactus_data[index]);
            this->_guitar_training[i].vec.push_back(this->_guitar_data[index]);
            this->_skull_training[i].vec.push_back(this->_skull_data[index]);
        }
    }

    this->training = randomize(concatData(
        this->_cactus_training,
        this->_skull_training,
        this->_guitar_training ));

    }

    void Data::TestingPrep()
    {
        cout << "Testing Data Prep Initilized...\n";

        int val = (int)(num * 0.2);
        //Resizing Vectors
        this->cactus_testing.resize(val);
        this->skull_testing.resize(val);
        this->guitar_testing.resize(val);

        int buffer = num - val;

        //Preparing  Indivdual Testing Data Set
        for (int i = 0; i < val; ++i) {
            this->cactus_testing[i].type =  {1, 0, 0};
            this->skull_testing[i].type =  {0, 1, 0};
            this->guitar_testing[i].type =  {0, 0, 1};
            for (int j = 0; j < 784; ++j) {
                int index = ((buffer + i) * 784) + j;
                this->cactus_testing[i].vec.push_back(this->_cactus_data[index]);
                this->skull_testing[i].vec.push_back(this->_skull_data[index]);
                this->guitar_testing[i].vec.push_back(this->_guitar_data[index]);
            }
        }

    this->testing = concatData(
        this->cactus_testing,
        this->skull_testing,
        this->guitar_testing );
    }

每当我使用这些数据时,结果永远不会像预期的那样,总是偏向索引1.(0 =仙人掌,1 =吉他,2 =头骨)

神经网络可以使用的数据集的创建是否存在严重错误?

0 个答案:

没有答案