我知道这不是第一次有人问这个问题,但我真的很困惑。我是MPI的新手,我试图为线性系统Ax = b实现Jocobi求解器,我想比较一下之间的时间并行和串行问题,但是对于大的N,我的MPI代码要慢得多。我不知道问题出在我的代码还是计算机上。顺便说一句,我在双核CPU上使用了2个进程。我的CPU模型是Intel®CoreTM i7-7000U,我使用mpic ++ j.cpp -oj和mpirun -np 2 j编译/运行它。例如,n = 10000的并行代码大约需要7秒和我的序列号为0.8秒。相差非常大。通过序列号,我使用g ++ -o serial serial.cpp和./serial执行了相同的代码。这就是我的并行代码:
#include <iostream>
#include "mpi.h"
#include <cmath>
#include <cstdlib>
using namespace std;
#define n 10000
void CreateMatrix(double *A){
double value;
double sum=0;
int j=0;
for (int i=0;i<n*n;i++){
j++;
//cout<<"Enter matrix value row i="<<i<<endl;
//cin>>value;
value=(rand()%1000)+1;
A[i]=value;
}
for(int i=0;i<n;i++){
A[i*n+i]=(1000*n+128.5)+rand()%8984;//dummy valus in diagonal...
} //in order to be diagonally dominant
};
void CreateVector(double *b){
double value;
for(int i=0;i<n;i++){
//cout<<"Enter vector b value row i="<<i<<endl;
value=(rand()%974)-i*(rand()%20)+(rand()%1000);//random values
b[i]=value;
}
};
int JacobiMethod(double *A,double *b,double *x_out,int size,int rank,double tol,int max_iter){
int n_local=n/size;
double *xold;
double *xnew;
double *temp1=new double [n];
double *temp2=new double [n];
double *swap;
int iterations=0;
int k;
double sum,sum1;
double error=1;
int flag;
MPI_Allgather(b,n_local,MPI_DOUBLE,temp1,n_local,MPI_DOUBLE,MPI_COMM_WORLD);//gather all b matrices from each process to temp1
xold=temp2;
xnew=temp1;
while(abs(error)>tol && iterations<max_iter){
swap=xnew;
xnew=xold;
xold=swap;
iterations+=1;
for(int i=0;i<n_local;i++){
k=i+rank*n_local;//tells us the location of diagonal element
sum=0;
for(int j=0;j<n;j++){
if (j!=k){
sum+=A[j+n*i]*xold[j];//A is a continuus array so n*i gives us the next row
}
}
x_out[i]=(b[i]-sum)/A[n*i+k];
}
MPI_Allgather(x_out,n_local,MPI_DOUBLE,xnew,n_local,MPI_DOUBLE,MPI_COMM_WORLD);
sum1=0;
for(int i=0;i<n;i++){
sum1=sum1+pow((xnew[i]-xold[i]),2);
}
error=sqrt(sum1);
}
if (iterations>=max_iter){
flag= -1;
}
else{
flag= 0;
}
delete []A;
delete []b;
delete []temp1;
delete [] temp2;
return flag;
};
void printResults( double *x_out,int size,int rank){
int n_local=n/size;
double *answ=new double [n];
MPI_Gather(x_out, n_local, MPI_DOUBLE, answ, n_local, MPI_DOUBLE,0,MPI_COMM_WORLD);//gather all data to answ-->in one process(process 0)
if (rank==0){
cout<<"The algorith converges"<<endl;
cout<<"The results are: "<<endl;
for(int i=0;i<n;i++){
//cout<<answ[i]<<endl;
}
}
delete[] answ;
};
int main(){
double tol;//tolerance
int max_iter;
int converged;
//Mpi initialization
MPI_Init(NULL,NULL);
double *b_local;
double *A_local;
int size;
MPI_Comm_size(MPI_COMM_WORLD, &size);
int rank;
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
if (rank==0){
b_local=new double[n];
A_local=new double[n*n];
cout<<"Enter talerance,number of iterations"<<endl;
cin>>tol;
cin>>max_iter;
//Create A and scatter it to all process
CreateMatrix(A_local);
//Create b and scatter it to all process
CreateVector(b_local);
}
//data init
double *A=new double[n*n/size];
double *b=new double[n/size];
double *x_out=new double[n/size];
//brocast tol,max_iter to all processes
MPI_Bcast(&tol,1,MPI_DOUBLE,0,MPI_COMM_WORLD);//send to all processes
MPI_Bcast(&max_iter,1,MPI_INT,0,MPI_COMM_WORLD);
//scatter vector b.Each process takes n/size
MPI_Scatter(b_local,n/size,MPI_DOUBLE,b,n/size,MPI_DOUBLE,0,MPI_COMM_WORLD);//here n_local cause we have only one column
//scatter it to all processes
MPI_Scatter(A_local,(n/size)*n,MPI_DOUBLE,A,(n/size)*n,MPI_DOUBLE,0,MPI_COMM_WORLD);//n_local*n--->number of elements in n/size rows
if (rank==0){
delete [] b_local;
delete [] A_local;
}
double time0,time1;
time0=MPI_Wtime();
converged=JacobiMethod(A,b,x_out,size,rank,tol,max_iter);
time1=MPI_Wtime();
if (converged==0){
cout<<"Time needed for the jacobi algorith to be executed is :"<<time1-time0<<endl;
printResults(x_out,size,rank);
}
else{
cout<<"Jacobi doesnt converge"<<endl;
}
MPI_Finalize();
return 0;
}