我一直在尝试实现分布式矩阵转置程序。主要思想是为每个处理器(pxq)建立一个模板,并使用模板在处理器之间拆分矩阵,然后在处理器之间分配块之后使用块循环分配。 我想要做的是在下面链接的简单文件中,......
我已从您的网站查看以下答案:
sending blocks of 2D array in C using MPI
..似乎没问题,因为你使用了MPI_create,在我的代码中我实际上使用了MPI_Cart,笛卡尔拓扑但是我被困在一半因为我不明白他们在论文中做了什么。他们如何在不同的处理器之间分配块,(如何在这些处理器之间循环编程2D块)?
所以我的问题是,如果你可以帮助我:
我如何编码2维块循环(如果我们说我们有12x12矩阵,每个处理器有3x4模板)?
您可以查看上面的链接,看看他们是如何在处理器之间分配块的?我需要任何帮助!我绝望了,最后我继续进行笛卡尔拓扑?
以下是我的代码,其中一部分,无法知道下一步:
#include "mpi.h"
#include <stdio.h>
#define NP 4 // number of processors
#define M_ROW 4 //template of processor row
#define M_COL 3 //template of processor col
int main(int argc, char *argv[])
{
int myid, numprocs;
MPI_Comm comm;
int dim[2], period[2], reorder;
int coord[2];
int A[8][6], array_P[M_ROW][M_COL]; //, AT[8][6];
int n =0, Temp;
int TT[8][6];
int iv, jv, rankid; // for coordinates of each processor in the Cartesian matrix
int k, y, i,j;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
// First: building the matrix
for (i = 0; i < 8; i++)
for (j = 0; j < 6; j++)
{
A[i][j] = n;
n++;
}
//Second to the virtual matrix with each processor having cartesian Coord.
dim[0]= 2; // dimension
dim[1]= 2; // dimensions assign for Cartesian
period[0]=1; period[1]=1; //row periodic + col periodic (each column/row forms a ring)
reorder=1; // here is false in meaning to allow the reordering of the processors
MPI_Cart_create(MPI_COMM_WORLD, 2, dim, period, reorder, &comm);
MPI_Comm_rank(comm, &rankid); // creating rank for each processor location
MPI_Cart_coords(comm, rankid, 2, coord); // creating coordinates for each Prc.
MPI_Barrier(MPI_COMM_WORLD);
iv = coord[0];
jv = coord[1];
//printf("Processor Rank %d receive dimensions (iv,jv)-> iv: %d ,jv: %d \n", myid, coord[0], coord[1]);
for (k=0; k<M_ROW; k++)
{
for (y=0; y<M_COL; y++)
{
i = k + iv*M_ROW;
j = y + jv*M_COL;
//array_P[k][y] = i*10 + j;
array_P[k][y] = A[i][j];
}
}//end loop of filling data
//("Processor %d: Before Transpose:\n", myid);
if(myid == 3)
{
for (k=0; k<M_ROW; k++) // 3 ?? NBLK_R;
{
j = k + iv*M_ROW;
for (y=0; y<M_COL; y++) // 2 ?
{
i = y + jv*M_COL;
printf(" %d ", A[j][i]);
}
printf("\n");
}
}
printf("\n");
//MPI_Alltoall(TT, M_ROW*M_COL, MPI_INT, TT, M_ROW*M_COL, MPI_INT, MPI_COMM_WORLD);
/*
if(myid == 2)
{
for (k=0; k<M_ROW; k++) // 3 ?? NBLK_R;
{
// = k + iv*M_ROW;
for (y=0; y<M_COL; y++) // 2 ?
{
//i = y + jv*M_COL;
//Final[j][i] = array_PT[x][y];// check the arraypt ?
printf(" %d ", array_P[k][y]);
}
printf("\n");
}
} */
//Fourth - transposing the original matrix
for (k=0; k<M_ROW; k++)
{
for (y=0; y<M_COL; y++)
{
i = k + iv*M_ROW;
j = y + jv*M_COL;
Temp = A[i][j];
A[i][j] = A[j][i];
A[j][i] = Temp;
}
}
printf("\n \n");
if(myid == 3)
{
for (k=0; k<M_ROW; k++) // 3 ?? NBLK_R;
{
j = k + iv*M_ROW;
for (y=0; y<M_COL; y++) // 2 ?
{
i = y + jv*M_COL;
printf(" %d ", A[j][i]);
}
printf("\n");
}
}
printf("\n");
//MPI_Barrier(comm);
// send to main process - process 0 in our case - all the array_PT transposed
// ml*nl -> 2*3
//MPI_Send(array_PT,M_COL*M_ROW , MPI_INT, 0, 1, comm);
//MPI_Isend(array_PT,M_COL*M_ROW , MPI_INT, 0, 1, comm, &request);
//MPI_Barrier(MPI_COMM_WORLD);
//int iv_tt , jv_tt;
//******************************
MPI_Finalize();
return 0;
}