在我的主功能中,我播放了多个对象:
bool kill_all_slave=false;
broadcast(mpi_world,kill_all_slave,0);
broadcast(mpi_world,periodicity_vector,0);
broadcast(mpi_world,J,0);
broadcast(mpi_world,x_range,0);
broadcast(mpi_world,y_range,0);
broadcast(mpi_world,z_range,0);
broadcast(mpi_world,component,0);
broadcast(mpi_world,min_bound,0);
在我的奴隶功能中,我收到了它们:
while (kill_all_slave==false) {
vector<tuple<int,int,int,int,int,int> > periodicity_vector;
std::cout << "[SLAVE: " << mpi_world.rank()<< "] I am waiting to receive periodicity vector"<<endl;
broadcast(mpi_world,periodicity_vector,0);
std::cout << "[SLAVE: " << mpi_world.rank()<< "] I am waiting to J "<<endl;
broadcast(mpi_world,J,0);
int x_range,y_range,z_range;
double min_bound;
map<int,int> component;
cout<<"\n hello I am slave: "<<mpi_world.rank()<<" received 3rd set of the global parameters"<<endl;
broadcast(mpi_world,x_range,0);
broadcast(mpi_world,y_range,0);
cout<<"\n hello I am slave: "<<mpi_world.rank()<<" received 4th set of the global parameters"<<endl;
broadcast(mpi_world,z_range,0);
broadcast(mpi_world,component,0);
cout<<"\n hello I am slave: "<<mpi_world.rank()<<" received 5th set of the global parameters"<<endl;
broadcast(mpi_world,min_bound,0);
cout<<"\n hello I am slave: "<<mpi_world.rank()<<" received 6th set of the global parameters"<<endl;
bool stop = false;
string status="initial";
mpi_world.send(0, 0, status);
//
cout<<"\n hello I am slave: "<<mpi_world.rank()<<" I am going to send to root status"<<endl;
mpi_world.recv(0, 0, stop);
cout<<"\n hello I am slave: "<<mpi_world.rank()<<" I am going to receive stop signal from root"<<endl;
while(!stop) {
map< tuple<int,int,int,int,int,int>, map<set<tuple<int,int,int,int,int> >, double> > clustertype_periodic;
// Wait for new job
unsigned int job_id = 0;
cout<<"\n hello I am slave: "<<mpi_world.rank()<<" I am going to receive stop job id from root"<<endl;
mpi_world.recv(0, 0, job_id);
cout<<"\n hello I am slave: "<<mpi_world.rank()<<" I am going to receive stop min bound from root"<<endl;
mpi_world.recv(0, 1, min_bound);
std::cout << "[SLAVE: " << mpi_world.rank()
<< "] Received job " << job_id << " from MASTER.\n"<<endl;
// Perform "job"
map<tuple<int,int,int,int>,int> spin_tmp;
double energy_tmp=0;
tuple<int,int,int,int,int,int> periodicity_now;
{
int i=job_id;
periodicity_now=periodicity_vector[i];
int a0=periodicity_now.get<0>();
int a1=periodicity_now.get<1>();
int a2=periodicity_now.get<2>();
int a3=periodicity_now.get<3>();
int a4=periodicity_now.get<4>();
int a5=periodicity_now.get<5>();
{
if (pseudo_mode) {
periodic(a0, a1, a2, a3, a4, a5,
J, x_range, y_range, z_range, component,
spin_tmp,
energy_tmp,
clustertype_periodic, min_bound, id,
true, false, false,
obscenely_verbose,global_parameters);
// {
// spin_periodic[make_tuple(a0,a1,a2,a3,a4,a5)]=spin_tmp;
// energy_periodic[make_tuple(a0,a1,a2,a3,a4,a5)]=energy_tmp;
// }
}
else if (basic_exact_mode){
periodic(a0, a1, a2, a3, a4, a5,
J, x_range, y_range, z_range, component,
spin_tmp,
energy_tmp,
clustertype_periodic, min_bound, id,
false, true, false,
obscenely_verbose,global_parameters);
// {
// spin_periodic[make_tuple(a0,a1,a2,a3,a4,a5)]=spin_tmp;
// energy_periodic[make_tuple(a0,a1,a2,a3,a4,a5)]=energy_tmp;
// }
}
}
}
// Notify master that the job is done
std::cout << "[SLAVE: " << mpi_world.rank()
<< "] Done with job " << job_id << ". Notifying MASTER.\n"<<endl;
status="finish";
mpi_world.send(0, 0,status);
// std::cout << "[SLAVE: " << mpi_world.rank()<< "] I have sent status back to master"<<endl;
//note here, I deliberated exclude cluster_type_periodic which may be the culprit of a lot of bad computing performance for realistic system
//send periodicity, spin_tmp and energy_tmp
// and remember to send more detail;
mpi_world.send(0, 1,spin_tmp);
std::cout << "[SLAVE: " << mpi_world.rank()<< "] I have sent spin_tmp back to master"<<endl;
mpi_world.send(0, 2,energy_tmp);
std::cout << "[SLAVE: " << mpi_world.rank()<< "] I have sent energy_tmp back to master"<<endl;
mpi_world.send(0, 3,periodicity_now);
std::cout << "[SLAVE: " << mpi_world.rank()<< "] I have sent periodicity_now back to master"<<endl;
// Check if a new job is coming
mpi_world.recv(0, 0, stop);
std::cout << "[SLAVE: " << mpi_world.rank()<< "] I have receive stop from master"<<endl;
}
std::cout << "[SLAVE: " << mpi_world.rank()<< "] I am waiting for signal whether kill_all_slave"<<endl;
broadcast(mpi_world,kill_all_slave,0);
std::cout << "[SLAVE: " << mpi_world.rank()<< "] I receive kill_all_slave signal as "<<kill_all_slave<<endl;
}
有时会出现segmentaiton故障,因为一个从设备接收到一个广播但后来无法捕获下一个广播....我应该为广播添加一些usleep(1000)吗?
我怎么能处理这个?
[SLAVE: 56] I receive kill_all_slave signal as 0
但是当其他奴隶确实收到时,它没有收到下一轮广播。