每当我尝试使用多个线程运行它时,我的代码就会挂起。在这里,
double CMpifun::sendData2()
{
double *tStatistics=new double[8], tmp_time; // wall clock time
double SY, Sto, header[SZ_HEADER];
int a_tasks=0, file_p=0;
vector<myDataType *> d = getData();
int idx=0;
opt_k.k=1; opt_k.proc_files=0; opt_k.p=this->node_sz; SY=0; Sto=0;
std::fill(header,header+SZ_HEADER,-1);
omp_set_num_threads(4);// for now
// parallel region
#pragma omp parallel default(none) shared(idx,SY,Sto,d,a_tasks,stdout) firstprivate(header) //firstprivate(dat_dim,dat)
{
int tid = omp_get_thread_num(), cur_idx, cur_k; int N=d.size();
while (idx<N) { // Assign tasks and fetch results where available
printf("-------------------------\n%d - 1\n", tid); fflush(stdout);
#pragma omp critical(update__a_task)
{
printf("%d - critique 1\n", tid); fflush(stdout);
if (idx<N) {
printf("%d - critique 2\n", tid); fflush(stdout);
if (a_tasks<node_sz-1){ // available nodes to assign
printf("%d - 2.1\n", tid); fflush(stdout);
MPI_Recv(header,SZ_HEADER,MPI_DOUBLE,MPI_ANY_SOURCE,TAG_HEADER,MY_COMM_GRP,this->Stat);
cur_idx=idx; cur_k=opt_k.k; idx+=cur_k;
a_tasks+=cur_k;
} else {// all nodes assigned. only fetch result
printf("%d - 2.2\n", tid); fflush(stdout);
MPI_Recv(header,SZ_HEADER,MPI_DOUBLE,MPI_ANY_SOURCE,TAG_RESULT,MY_COMM_GRP,this->Stat);
}
}else ;//printf("%d - done task assignment\n", tid); fflush(stdout);
}
printf("%d - 3\n", tid); fflush(stdout);
if (cur_idx<N) {
printf("%d: cur_idx:%d, opt_k.k:%d, idx:%d, N:%d \n", tid, cur_idx,opt_k.k,idx,N); fflush(stdout);
if(this->Stat->MPI_TAG == TAG_HEADER){ // serve tasks
printf("%d - task %d being assigned to %d\n", tid,cur_idx,(int)header[4]); fflush(stdout);
while (cur_k && cur_idx<N) {
printf("%d - T1\n", tid); fflush(stdout);
header[1]=d[cur_idx]->nRows; header[2]=d[cur_idx]->nCols; header[3]=cur_idx;
header[9]=--cur_k;
MPI_Send(header,SZ_HEADER,MPI_DOUBLE,(int)header[4],TAG_HEADER,MY_COMM_GRP);
printf("%d - T2 %d\n", tid,(int)header[4]); fflush(stdout);
MPI_Send(d[cur_idx]->data,d[cur_idx]->nRows*d[cur_idx]->nCols,MPI_DOUBLE,(int)header[4],TAG_DATA,MY_COMM_GRP);
printf("%d - T3 %d\n", tid,(int)header[4]); fflush(stdout);
delete[] d[cur_idx]->data; ++cur_idx;
}
} else if(this->Stat->MPI_TAG == TAG_RESULT){ // collect results
printf("%d - result from %d\n", tid,(int)header[4]); fflush(stdout);
while(true){
printf("%d - R1\n", tid); fflush(stdout);
#pragma omp atomic
--a_tasks;
double *results = new double[(int)(header[1]*header[2])];
MPI_Recv(results,(int)(header[1]*header[2]),MPI_DOUBLE,(int)header[4],TAG_DATA,MY_COMM_GRP,this->Stat);
printf("%d - R2 received result from %d\n", tid,(int)header[4]); fflush(stdout);
delete[] results;
if ((int)header[9]>0) {
MPI_Recv(header,SZ_HEADER,MPI_DOUBLE,(int)header[4],TAG_RESULT,MY_COMM_GRP,this->Stat);
} else break;
} //end while
} // end collect results
} //end if(loopmain)
printf("%d - NExt idx: %d\n", tid,idx); fflush(stdout);
} // end while(loopmain)
} // end parallel section]
printf("<<<<<<<<<<<<< MASTER - COLLECTING RESULTS >>>>>>>>>>>> "); fflush(stdout);
printf("MASTER - pending tasks:%d\n",a_tasks); fflush(stdout);
while (a_tasks>0) {
printf("MASTER - wait for slave result request... pending tasks:%d\n",a_tasks); fflush(stdout);
MPI_Recv(header,SZ_HEADER,MPI_DOUBLE,MPI_ANY_SOURCE,TAG_RESULT,MY_COMM_GRP,this->Stat);
while (true) {
double *results = new double[(int)(header[1]*header[2])];
printf("MASTER - wait for result from %d... pending tasks\n",(int)header[4]); fflush(stdout);
MPI_Recv(results,(int)(header[1]*header[2]),MPI_DOUBLE,(int)header[4],TAG_DATA,MY_COMM_GRP,this->Stat);
delete[] results;
--a_tasks;
if ((int)header[9]>0) {
printf("MASTER - result from slave .. some more\n"); fflush(stdout);
MPI_Recv(header,SZ_HEADER,MPI_DOUBLE,(int)header[4],TAG_RESULT,MY_COMM_GRP,this->Stat);
} else break;
}
}
message("<<<<<<<<<<<<<<<<<< MASTER - terminate slaves >>>>>>>>>>>>>>>>>");
for(int i=1;i<node_sz;++i){ // terminate
MPI_Recv(header,SZ_HEADER,MPI_DOUBLE,MPI_ANY_SOURCE,TAG_HEADER,MY_COMM_GRP,this->Stat);
printf("MASTER - terminate to signal %d\n",(int)header[4]); fflush(stdout);
MPI_Send(header,SZ_HEADER,MPI_DOUBLE,(int)header[4],TAG_TERMINATE,MY_COMM_GRP);
printf("MASTER - done terminated %d\n",(int)header[4]); fflush(stdout);
}
printf("MASTER - bye\n"); fflush(stdout);
return 0;
Slave功能如下,
void CMpifun::slave2()
{
double *Data; vector<myDataType> dataQ; vector<hist_type> resQ;
char out_opt='b'; // irrelevant
myDataType *out_im = new myDataType; hist_type *out_hist; CLdp ldp;
int file_cnt=0; double tmp_t; //local variables
double time_arr[3]={}; //1: task wait latency, 2: task set total send time, 3: taskset total process time
while (true) { // main while loop
printf("Slave: %d - ........... ready for task......\n",myRank); fflush(stdout);
header[4]=myRank; MPI_Send(header,SZ_HEADER,MPI_DOUBLE,MASTER,TAG_HEADER,MY_COMM_GRP);
time_arr[0] = MPI_Wtime();
printf("Slave: %d - got master. waiting for task\n",myRank); fflush(stdout);
MPI_Recv(header,SZ_HEADER,MPI_DOUBLE,MASTER,MPI_ANY_TAG,MY_COMM_GRP,this->Stat);
time_arr[0] = MPI_Wtime() - time_arr[0]; // wait for task latency
if(this->Stat->MPI_TAG == TAG_TERMINATE) {
printf("Slave: %d - terminate signal received\n",myRank); fflush(stdout);
break;
}
printf("Slave: %d - got header. waiting for data\n",myRank); fflush(stdout);
//receive data
tmp_t = MPI_Wtime();
while(true) {
Data=new double[(int)(header[1]*header[2])];
MPI_Recv(Data,(int)(header[1]*header[2]),MPI_DOUBLE,MASTER,TAG_DATA,MY_COMM_GRP,this->Stat);
myDataType d; d.data=Data; d.nRows=(int)header[1]; d.nCols=(int)header[2];
dataQ.push_back(d);
file_cnt++;
if ((int)header[9]) {
MPI_Recv(header,SZ_HEADER,MPI_DOUBLE,MASTER,TAG_HEADER,MY_COMM_GRP,this->Stat);
} else break;
}
time_arr[1] = (MPI_Wtime()-tmp_t); // Total bandwidth time for entire taskset
file_cnt = dataQ.size();
tmp_t = MPI_Wtime();
printf("Slave: %d - got data. processing\n",myRank); fflush(stdout);
while (dataQ.size()) { // process data
out_hist = new hist_type();
myDataType d = dataQ.back(); dataQ.pop_back(); // critical section
ldp.process(d.data, d.nRows,d.nCols,out_opt,out_im, out_hist);
resQ.push_back(*out_hist); out_hist=0;
delete[] d.data; delete[] out_im->data;
}
time_arr[2] = (MPI_Wtime()-tmp_t); // Total processing time for entire taskset
// tuma results
//time_arr[1] /= file_cnt; time_arr[2] /= file_cnt;
printf("Slave: %d - sending results\n",myRank); fflush(stdout);
header[4]=myRank; header[6]=time_arr[0]; header[7]=time_arr[1]; header[8]=time_arr[2];
for (size_t i = 0; i < resQ.size(); i++) {
header[1]=resQ[i].h_nHists; header[2]=resQ[i].h_binSz; header[9]=resQ.size()-i-1;
MPI_Send(header,SZ_HEADER,MPI_DOUBLE,MASTER,TAG_RESULT,MY_COMM_GRP);
MPI_Send(resQ[i].hist_data,resQ[i].h_nHists*resQ[i].h_binSz,MPI_DOUBLE,MASTER,TAG_DATA,MY_COMM_GRP);
}
resQ.clear();
} // end main while loop
message("terminating");
}
它在if (idx<N)
循环的随机迭代次数后挂起。我整整2天都在这里。有人可以请仔细检查代码,让我知道导致问题的原因是什么?所有帮助提前赞赏
答案 0 :(得分:0)
我决定为每个MPI通信对创建唯一的标头标签 - 它确实解决了这个问题。有没有办法可以删除这个问题?