0: 1 1 1 2 2 2 1: 1 1 1 2 2 2 2: 1 1 1 2 2 2 3: 1 1 1 2 2 2 4: 1 1 1 2 2 2 5: 1 1 1 2 2 2 6: 1 1 1 2 2 2
另一方面,如果我用COLUMNS = 5执行它,我希望得到:
0: 1 1 1 2 2 1: 1 1 1 2 2 2: 1 1 1 2 2 3: 1 1 1 2 2 4: 1 1 1 2 2 5: 1 1 1 2 2 6: 1 1 1 2 2
0: 1 1 1 2 2 1: 2 1 1 2 2 2: 2 1 1 2 2 3: 2 1 1 2 2 4: 2 1 1 2 2 5: 1 1 1 -0 -0 6: 1 1 1 -0 -0
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#define ROWS 7
#define COLUMNS 6 // 5 or 6 only. I could pass this in the cmd line...
void print_matrix (float ** X, int rows, int cols)
for (int i = 0; i < rows; ++i) {
printf ("%3d: ", i);
for (int j = 0; j < cols; ++j)
printf ("%2.0f ", X[i][j]);
printf ("\n");
float **allocate_matrix (int rows, int cols)
float *data = (float *) malloc (rows * cols * sizeof(float));
float **matrix = (float **) malloc (rows * sizeof(float *));
for (int i = 0; i < rows; i++)
matrix[i] = & (data[i * cols]);
return matrix;
int main (int argc, char *argv[])
int num_procs, my_rank, i, j, root = 0, ncols, ndims = 2, strts;
float **matrix;
MPI_Datatype sendsubarray, recvsubarray, resizedrecvsubarray;
assert (COLUMNS == 5 || COLUMNS == 6);
MPI_Init (&argc, &argv);
MPI_Comm_size (MPI_COMM_WORLD, &num_procs);
if (num_procs != NR_OF_PROCESSES) MPI_Abort (MPI_COMM_WORLD, -1);
MPI_Comm_rank (MPI_COMM_WORLD, &my_rank);
ncols = (my_rank == root) ? 3 : COLUMNS - 3;
strts = (my_rank == root) ? 0 : 3;
int sizes[2] = {ROWS, COLUMNS};
int subsizes[2] = {ROWS, ncols};
int starts[2] = {0, strts};
// Create and populate the matrix at each node (incl. the root):
matrix = allocate_matrix (ROWS, COLUMNS);
for (i = 0; i < ROWS; i++)
for (j = 0; j < COLUMNS; j++)
matrix[i][j] = my_rank * -1.0;
for (i = starts[0]; i < starts[0] + subsizes[0]; i++)
for (j = starts[1]; j < starts[1] + subsizes[1]; j++)
matrix[i][j] = my_rank + 1.0;
// Create the subarray type for use by each send node (incl. the root):
MPI_Type_create_subarray (ndims, sizes, subsizes, starts, MPI_ORDER_C,
MPI_FLOAT, &sendsubarray);
MPI_Type_commit (&sendsubarray);
// Create the subarray type for use by the receive node (the root):
if (my_rank == root) {
MPI_Type_create_subarray (ndims, sizes, subsizes, starts, MPI_ORDER_C,
MPI_FLOAT, &recvsubarray);
MPI_Type_commit (&recvsubarray);
MPI_Type_create_resized (recvsubarray, 0, 1 * sizeof(float),
MPI_Type_commit (&resizedrecvsubarray);
// Gather the send matrices into the receive matrix:
int counts[NR_OF_PROCESSES] = {3, COLUMNS - 3};
int displs[NR_OF_PROCESSES] = {0, 3};
MPI_Gatherv (matrix[0], 1, sendsubarray,
matrix[0], counts, displs, resizedrecvsubarray,
// Have the root send the main array to the output:
if (my_rank == root) print_matrix (matrix, ROWS, COLUMNS);
// Free out all the allocations we created in this node...
if (my_rank == 0) {
MPI_Type_free (&resizedrecvsubarray);
MPI_Type_free (&recvsubarray);
MPI_Type_free (&sendsubarray);
free (matrix);
return 0;
答案 0 :(得分:3)
很好!那里有很多关于MPI细节的杂耍,最后只有一个缺失 - 我只需要添加两行并更改第三行以使代码工作。
int counts[NR_OF_PROCESSES] = {3, COLUMNS - 3};
int displs[NR_OF_PROCESSES] = {0, 3};
MPI_Type_create_subarray (ndims, sizes, subsizes, starts, MPI_ORDER_C,
MPI_FLOAT, &recvsubarray);
您正在创建一个接收类型,对于接收过程,它是发送数据的大小,子大小和偏移量!相反,你只想创建一个恰好一列的接收子阵列类型,并且开头为{0,0} - 所以没有(内在的)偏移,所以你可以指出它需要去位移的位置:
int colsubsizes[]={ROWS, 1};
int colstarts[]={0,0};
MPI_Type_create_subarray (ndims, sizes, colsubsizes, colstarts, MPI_ORDER_C,
MPI_FLOAT, &recvsubarray);
,因为您从未将其用于实际通信;它仅用于构建{{1} } type,然后提交。)