在此代码中使用多个线程时遇到问题(这只是其中的一小部分):
template <typename Heuristic, typename Meta>
vector<double> kfold(Heuristic &heu,Meta &metaheu,mat &data,Col<int> &results,int k,int knearest,double pNeigh,double pCost,double pIni,bool st,bool units){
//Some stuff...
for (int j=0; j<k;j++){
mat auxMat = folds.slice(j);
Col<int> auxCol = res.col(j);
Instance auxI(auxU,pNeigh,pCost,&auxMat,&auxMat,&auxCol,&auxCol,un.n_rows);
threads1[j] = thread(searchInstanceStrat<Heuristic,Meta>,heu,metaheu,auxI,knearest,ref(tiempo),ref(reduction),j);
}
for (int b = 0; b<k; b++) threads1[b].join();
More stuff to do...
}
用线程调用的函数是:
template <typename Heuristic, typename Meta>
void searchInstanceStrat(Heuristic heu,Meta metaheu, Instance auxI, int knearest, vec &tiempo, vector<Instance> &reduction,int index){
auto start = chrono::high_resolution_clock::now();
pair<double,Instance> pairAux = heu.find(auxI,knearest);
//pair<double,Instance> pairAux = metaheu.find(pairAux2.second,knearest);
auto stop = chrono::high_resolution_clock::now();
using fpSeconds = chrono::duration<float,chrono::seconds::period>;
tiempo(index) = (double)(fpSeconds(stop - start).count());
reduction[index] = pairAux.second;
}
启发式课程是:
template<typename MetricType>
struct CNN{
/*Perform CNN, constructs a reduced data set beginning from one instance and adding each instance not
classified correctly */
MetricType* metric;
CNN(MetricType* met):metric(met){}
pair<double,Instance> find(Instance &initial, int knearest){
bool flag = false, flag2 = false;
Instance current = initial;
int j = 0;
vector <int> indexes(initial.units.n_rows);
for (int i=0;i<initial.units.n_rows;i++) indexes[i] = i;
random_shuffle(indexes.begin(),indexes.end());
for (int i = 0; (i < initial.units.n_rows) && !flag; i++){
Knn knn(current.training,current.trainResults,current.unique);
flag2 = false;
while ((j < current.originalTraining->n_rows) && !flag2){
mat query(current.originalTraining->row(indexes[j]));
Col<int> prediction(knn.search(query,knearest,*metric));
if (prediction(0) != (*(current.originaltrainResults))(indexes[j])){
Col<int> nunits(current.units);
nunits(indexes[j]) = 1;
current.units = nunits;
current.changeTrainingSet();
flag2 = true;
}
j++;
}
if (!flag2) flag = true; //flag becomes true when every point is classified correctly
}
Knn knn(current.training,current.trainResults,current.unique);
double costResult = knn.score(*(current.originalTraining),knearest,*metric,*(current.originaltrainResults));
return make_pair(costResult,current);
}
};
实例类是:
struct Instance{
Col <int> units;
double percenVecinity, percenCost;
mat* originalTraining;
mat* test;
Col<int>* originaltrainResults;
Col<int>* testResults;
mat training;
Col<int> trainResults;
int unique,totalInstances;
Instance(){}
Instance(Col<int> &u,double p1,double p2,mat* tr,mat* te,Col<int>* trr,Col<int>* ter,int un):
units(u),percenVecinity(p1),percenCost(p2),test(te),testResults(ter),unique(un),
originalTraining(tr),originaltrainResults(trr){
totalInstances = tr->n_rows;
int count = 0,index=0;
for (int i=0;i<u.n_rows;i++){ if (u(i)==1) count++; }
training.set_size(count,tr->n_cols);
trainResults.set_size(count);
for (int i=0;i<u.n_rows;i++){
if (u(i)==1){
training.row(index) = tr->row(i);
trainResults(index) = (*trr)(i);
index++;
}
}
}
}
还有Knn类和公制类,但我不认为它们对案例很重要。
当我调用kfold并创建k个线程以便每个人并行计算它的东西时会出现分段错误,并且在searchInstanceStrat中调用find函数时会发生这种情况
pair<double,Instance> pairAux = heu.find(auxI,knearest);
但是当我在每个线程创建之后放入线程[j] .join()时(有效地使其序列化)代码完美地工作。问题是并发性。但是我不明白为什么,不应该在thread()初始化中复制每个参数而不是在ref中这样每个线程都有传递给它的数据的工作副本吗?
这些类确实有指向其他数据的指针,但如果它是副本,它可能会影响我的代码吗?但是如果每个线程都有一个副本,我不明白为什么我的代码是坏的。
提前感谢您的帮助
答案 0 :(得分:2)
问题在于auxMat
和auxCol
。使用Instance
类将指向这些局部变量的指针传递给线程函数。在创建线程后立即在j
循环结束时销毁这些本地。当线程试图访问它们时,它可以访问被破坏的对象,为下一个线程访问部分构造的对象,或者为另一个线程构造的对象。
您希望将auxMat
和auxCol
值存储在Instance
中,或者可能是folds
和res
中原始数据的常量指针{1}}。