我为Doodle Classifier项目编写了一个Data类,我从Google Quick Draw Dataset中获取了3种不同涂鸦类型的二进制文件,将它们转换为三个单独的向量,通过连接每个向量然后随机化它来制作训练数据。我在类型数组中为每个类型分配了不同的索引。 添加类的代码下面:
Data.h =>
#pragma once
#pragma warning(disable:4996)
#include "ImageHandling.h"
#include <string>
#include <vector>
#include <fstream>
#include <random>
#include <iostream>
using namespace std;
class Data {
public:
int num;
Data(int);
struct data {
vector<double> vec;
vector<double> type;
};
mt19937 gen;
vector<Data::data> cactus_testing;
vector<Data::data> skull_testing;
vector<Data::data> guitar_testing;
vector<Data::data> training;
vector<Data::data> testing;
private:
vector<Data::data> concatData(vector<Data::data>, vector<Data::data>, vector<Data::data>);
vector<Data::data> randomize(vector<Data::data>);
vector<double> binToVec(string add);
// static vector<double> pngToVec(string add);
void TrainginPrep();
void TestingPrep();
string _cactus;
string _skull;
string _guitar;
vector<double> _cactus_data;
vector<double> _skull_data;
vector<double> _guitar_data;
vector<Data::data> _cactus_training;
vector<Data::data> _skull_training;
vector<Data::data> _guitar_training;
};
Data.cpp =&gt;
#include "Data.h"
using namespace std;
Data::Data(int n = 10000)
{
this->num = n;
gen.seed(1);
cout << "Loading Files...\n";
if (this->num == 10000) {
this->_cactus = "C:/Users/CHOPRA/Documents/Visual Studio 2015/Projects/DoodleClassifier/imgData/10kData/cactus10k.bin";
this->_guitar = "C:/Users/CHOPRA/Documents/Visual Studio 2015/Projects/DoodleClassifier/imgData/10kData/guitar10k.bin";
this->_skull = "C:/Users/CHOPRA/Documents/Visual Studio 2015/Projects/DoodleClassifier/imgData/10kData/skull10k.bin";
}
else if (this->num == 1000) {
this->_cactus = "C:/Users/CHOPRA/Documents/Visual Studio 2015/Projects/DoodleClassifier/imgData/1kData/cactus1k.bin";
this->_guitar = "C:/Users/CHOPRA/Documents/Visual Studio 2015/Projects/DoodleClassifier/imgData/1kData/guitar1k.bin";
this->_skull = "C:/Users/CHOPRA/Documents/Visual Studio 2015/Projects/DoodleClassifier/imgData/1kData/skull1k.bin";
}
else {
cout << "Invalid Number Entered!;
exit(1);
}
cout << "All Files Loaded Succesfully!\n\n";
cout << "Initiliazing File Parsing...\n";
//Parsing the bin files to a vector
this->_cactus_data = binToVec(this->_cactus);
this->_guitar_data = binToVec(this->_guitar);
this->_skull_data = binToVec(this->_skull);
cout << "All Files Parsed Succesfully!\n\n";
cout << "\nSize of Cactus Data: " << _cactus_data.size();
cout << "\nSize of Guitar Data: " << _guitar_data.size();
cout << "\nSize of Skull Data: " << _skull_data.size() << "\n\n";
cout << "Initialzing Training Data Prep...\n";
TrainginPrep();
cout << "Training Data Prepared!\n";
cout << "Size of Training Data = " << this->training.size() << "\n\n";
cout << "Initialzing Testing Data Prep...\n";
TestingPrep();
cout << "Testing Data Prepared!\n\n";
cout << "Size of Testing Data = " << this->testing.size() << "\n\n";
}
std::vector<double> Data::binToVec(string add)
{
fstream FILE(add, ios::binary | ios::in);
vector<char> buffer((istreambuf_iterator<char>(FILE)), (istreambuf_iterator<char>()));
vector<double> buffer_d;
for (int i = 0; i < buffer.size(); ++i) {
buffer_d.push_back((double)buffer[i]);
}
return buffer_d;
}
vector<Data::data> Data::concatData(vector<Data::data> a, vector<Data::data> b, vector<Data::data> c)
{
vector<Data::data> res;
res.insert(res.end(), a.begin(), a.end());
res.insert(res.end(), b.begin(), b.end());
res.insert(res.end(), c.begin(), c.end());
return res;
}
vector<Data::data> Data::randomize(vector<Data::data> vec){
for(int i = 0; i < (int)(vec.size() * 0.5); ++i){
int n1 = gen() % vec.size();
int n2 = gen() % vec.size();
Data::data temp = vec[n1];
vec[n1] = vec[n2];
vec[n2] = temp;
}
return vec;
}
void Data::TrainginPrep()
{
cout << "Trainining Data Prep Initilized...\n";
int val = (int)(num * 0.8);
//Resizing Vectors
this->_cactus_training.resize(val);
this->_guitar_training.resize(val);
this->_skull_training.resize(val);
// Preparing Indivdual Training Data Set
for(int i = 0; i < val ; ++i ){
this->_cactus_training[i].type = {1, 0, 0};
this->_guitar_training[i].type = {0, 1, 0};
this->_skull_training[i].type = {0, 0, 1};
for (int j = 0; j < 784; ++j) {
int index = (i * 784) + j;
this->_cactus_training[i].vec.push_back(this->_cactus_data[index]);
this->_guitar_training[i].vec.push_back(this->_guitar_data[index]);
this->_skull_training[i].vec.push_back(this->_skull_data[index]);
}
}
this->training = randomize(concatData(
this->_cactus_training,
this->_skull_training,
this->_guitar_training ));
}
void Data::TestingPrep()
{
cout << "Testing Data Prep Initilized...\n";
int val = (int)(num * 0.2);
//Resizing Vectors
this->cactus_testing.resize(val);
this->skull_testing.resize(val);
this->guitar_testing.resize(val);
int buffer = num - val;
//Preparing Indivdual Testing Data Set
for (int i = 0; i < val; ++i) {
this->cactus_testing[i].type = {1, 0, 0};
this->skull_testing[i].type = {0, 1, 0};
this->guitar_testing[i].type = {0, 0, 1};
for (int j = 0; j < 784; ++j) {
int index = ((buffer + i) * 784) + j;
this->cactus_testing[i].vec.push_back(this->_cactus_data[index]);
this->skull_testing[i].vec.push_back(this->_skull_data[index]);
this->guitar_testing[i].vec.push_back(this->_guitar_data[index]);
}
}
this->testing = concatData(
this->cactus_testing,
this->skull_testing,
this->guitar_testing );
}
每当我使用这些数据时,结果永远不会像预期的那样,总是偏向索引1.(0 =仙人掌,1 =吉他,2 =头骨)
神经网络可以使用的数据集的创建是否存在严重错误?