我试图将n * n矩阵划分为p行,n可能无法被p整除。所以我需要划分成不同大小的行,最简单的方法是向每个处理器发送n / p行,除了最后一行需要n / p + n%p。
这是我的代码:
using namespace std;
int main(int argc, char* argv[])
{
int my_rank = 0;
int comm_size = 0;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
double *Adata;
double **adjArray;
int n;
if (my_rank == 0){
n=6;
Adata = (double *)malloc(sizeof(double)*n*n);
adjArray = (double **)malloc(sizeof(double*) * n);
for(int i = 0; i < n; i++) {
adjArray[i] = &(Adata[i*n]);
}
int k=0;
for (int i=0; i<n; i++) {
for (int j=0; j<n; j++) {
adjArray[i][j]=k;
k++;
}
}
cout<<"---Adjacancy Matrix:"<<endl;
for (int i=0; i<n; i++) {
for (int j=0; j<n; j++) {
if(adjArray[i][j]==INT_MAX)
{
cout<< " - ";
}else
{
cout<< adjArray[i][j]<<" ";
}
}
cout<<endl;
}
cout<<"----------------------------------------------------"<<endl;
}
//---------------------------------------------------------
// Broadcasting the data among the processors.
MPI_Bcast( &n,1,MPI_INT,0,MPI_COMM_WORLD);
//---------------------------------------------------------
// Scatter the rows to each processor
int rem = 0; // elements remaining after division among processes
int sum = 0; // Sum of counts. Used to calculate displacements
if(my_rank==comm_size-1) rem=n%comm_size;
int *displs = (int *)malloc(comm_size*sizeof(int));
int *sendcounts = (int *)malloc(comm_size*sizeof(int));
int numPerProc=n/comm_size;
int receive_buffer[numPerProc+rem];
for (int i=0; i<comm_size-1; i++) {
sendcounts[i]=(n)/comm_size;
displs[i] = sum;
sum += sendcounts[i];
}
sendcounts[comm_size-1]=n/comm_size+rem;
displs[comm_size-1]=sum;
MPI_Datatype strip;
/* defining a datatype for sub-matrix */
MPI_Type_vector(numPerProc, n, n, MPI_DOUBLE, &strip);
MPI_Type_commit(&strip);
double **strip_A,*stripdata;
stripdata = (double *)malloc(sizeof(double)*numPerProc*n);
strip_A = (double **)malloc(sizeof(double*)*numPerProc);
for(int i= 0; i< numPerProc+rem; i++) {
strip_A[i] = &(stripdata[i*n]);
}
MPI_Scatterv(Adata, sendcounts, displs, strip, &(strip_A[0][0]), sendcounts[my_rank], strip, 0, MPI_COMM_WORLD);
for(int i = 0; i < sendcounts[my_rank]; i++) {
if(i == 0) {
printf("rank = %d\n", my_rank);
}
for(int j = 0; j < n; j++) {
if(strip_A[i][j]==INT_MAX)
{
cout<< " - ";
}else
{
cout<< strip_A[i][j]<<" ";
}
}
printf("\n");
}
MPI_Finalize();
return 0;
}
不幸的是,一旦n不等于p,它就不起作用。例如,一旦我尝试p = 4,输出为:
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
[warn] kq_init: detected broken kqueue; not using.: No such file or directory
---Adjacancy Matrix:
0 1 2 3 4 5
6 7 8 9 10 11
12 13 14 15 16 17
18 19 20 21 22 23
24 25 26 27 28 29
30 31 32 33 34 35
----------------------------------------------------
rank = 0
0 1 2 3 4 5
rank = 2
12 13 14 15 16 17
rank = 1
6 7 8 9 10 11
rank = 3
18 19 20 21 22 23
6.95287e-310 6.95287e-310 6.95287e-310 1.99804e+161 8.11662e+217 3.25585e-86
1.94101e-80 2.68185e-80 4.81827e+151 1.39957e-306 2.33584e-314 6.95287e-310
任何帮助表示赞赏! 谢谢!
答案 0 :(得分:1)
一行的派生数据类型应该像这样构建(音符计数为1
而不是numPerProc
)
MPI_Type_vector(1, n, n, MPI_DOUBLE, &strip);
注意一个更简单的选项是
MPI_Type_contiguous(n, MPI_DOUBLE, &strip);
还有其他问题
sendcounts
和displs
仅与排名0
相关,sendcounts[comm_size-1]
在该排名上不正确stripdata
和strip_A
在最后一个排名上的大小不正确(例如,您分配了numPerProc
行,但访问了numPerProc+rem
行。