我正在尝试学习MPI并尝试实例。
我遇到了像
这样的错误 Fatal error in PMPI_Scatter: Invalid buffer pointer, error stack:
PMPI_Scatter(783): MPI_Scatter(sbuf=0x6021e0, scount=16, MPI_INT, rbuf=0x6021e0, rcount=16, MPI_INT, root=0, MPI_COMM_WORLD) failed
PMPI_Scatter(710): Buffers must not be aliased
===================================================================================
= BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES
= EXIT CODE: 1
= CLEANING UP REMAINING PROCESSES
= YOU CAN IGNORE THE BELOW CLEANUP MESSAGES
===================================================================================
我的代码是:
#include <mpi.h>
#include <stdio.h>
#define SIZE 8 /* Size of matrices */
#define MAX_RAND 100
int A[SIZE][SIZE], B[SIZE][1], C[SIZE][1],D[SIZE][SIZE],E[SIZE][1];
void fill_matrix(int m[SIZE][SIZE])
{
//static int n=0;
int i, j;
printf("\n*****************************\n");
for (i=0; i<SIZE; i++)
{
for (j=0; j<SIZE; j++){
m[i][j] = rand() % MAX_RAND;
printf("%2d ", m[i][j]);
}
printf("\n");
}
printf("\n*****************************\n");
}
void fill_vector(int m[SIZE][1])
{
//static int n=0;
int i, j;
printf("\n*****************************\n");
for (i=0; i<SIZE; i++)
{
for (j=0; j<1; j++){
m[i][j] = rand() % MAX_RAND;
printf("%2d ", m[i][j]);
}
printf("\n");
}
printf("\n*****************************\n");
}
void print_matrix(int m[SIZE][SIZE])
{
int i, j = 0;
for (i=0; i<SIZE; i++) {
printf("\n\t| ");
for (j=0; j<SIZE; j++)
printf("%2d ", m[i][j]);
printf("|");
}
}
void print_vector(int m[SIZE][1])
{
int i, j = 0;
for (i=0; i<SIZE; i++) {
printf("\n\t| ");
for (j=0; j<1; j++)
printf("%2d ", m[i][j]);
printf("|");
}
}
int main(int argc, char *argv[])
{
int myrank, P, from, to, i, j, k;
// int tag = 666; /* any value will do */
// MPI_Status status;
MPI_Init (&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank); /* who am i */
MPI_Comm_size(MPI_COMM_WORLD, &P); /* number of processors */
if (SIZE%P!=0) {
if (myrank==0) printf("Matrix size not divisible by number of processors\n");
MPI_Finalize();
exit(-1);
}
from = myrank * SIZE/P;
to = ((myrank+1) * SIZE/P);
/* Process 0 fills the input matrices and broadcasts them to the rest */
/* (actually, only the relevant stripe of A is sent to each process) */
if (myrank==0) {
{
//static int n=0;
int i, j;
printf("\n*****************************\n");
for (i=0; i<SIZE; i++)
{
for (j=0; j<SIZE; j++){
A[i][j] = rand() % MAX_RAND;
printf("%d ", A[i][j]);
}
printf("\n");
}
printf("\n*****************************\n");
}
fill_vector(B);
}
int s=SIZE*SIZE/P;
// printf("computing slice %d (from row %d to %d)\n", myrank, from, to-1);
MPI_Bcast (B, SIZE*1, MPI_INT, 0, MPI_COMM_WORLD);
// printf("\n\n%d",s);
//print_vector(s);
//printf("\n\n");
MPI_Scatter (&A, SIZE*SIZE/P, MPI_INT, &A[from], SIZE*SIZE/P, MPI_INT, 0, MPI_COMM_WORLD);
printf("computing slice %d (from row %d to %d)\n", myrank, from, to-1);
for (i=from; i<to; i++)
for (j=0; j<SIZE; j++) {
C[i][0]=0;
for (k=0; k<SIZE; k++){
C[i][0] += A[i][k]*B[k][0];
}
}
MPI_Gather (&C[from], SIZE*SIZE/P, MPI_INT, &C, SIZE*SIZE/P, MPI_INT, 0, MPI_COMM_WORLD);
if (myrank==0) {
printf("\n\n");
{
int i, j = 0;
for (i=0; i<SIZE; i++) {
printf("\n\t| ");
for (j=0; j<SIZE; j++)
printf("%d ", A[i][j]);
printf("|");
}
}
printf("\n\n");
print_matrix(D);
printf("\n\n\t * \n");
print_vector(B);
printf("\n\n\t = \n");
print_vector(C);
printf("\n\n");
print_vector(E);
printf("\n\n");
}
MPI_Finalize();
return 0;
}
由于我是JAVA程序员,我对指针知之甚少,所以如果我的问题听起来愚蠢,请原谅我,因为我还在学习。我在这里尝试做的是将矩阵行拆分到不同的处理器并广播整个B向量并将两者相乘得到C向量,然后我想再次使用收集函数接收。
答案 0 :(得分:2)
您的代码中出现了两件事:
正如您所猜测的那样,一个是与指针相关的错误。 MPI_Scatter()
需要指向要发送的数据的指针和指向将接收数据的缓冲区的指针。例如,由于A
是二维数组(在内存中是连续的):
MPI_Scatter (&A[0][0], SIZE*SIZE/P, MPI_INT, &A[from][0], SIZE*SIZE/P, MPI_INT, 0, MPI_COMM_WORLD);
其中&A[0][0]
是指向发送缓冲区的指针,&A[from][0]
是指向接收缓冲区的指针。
MPI_Gather()
中的第二个问题。当然,也会出现与第一个相同的错误。此外,C
是一个向量,而不是一个矩阵:要发送的整数数远低于SIZE*SIZE/P
。因此,要发送的整数数为SIZE/P
。
MPI_Gather (&C[from][0], SIZE/P, MPI_INT, &C[0][0], SIZE/P, MPI_INT, 0, MPI_COMM_WORLD);
其中int C[SIZE][1]
是向量。
以下是您的代码,稍作修改:
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#define SIZE 8 /* Size of matrices */
#define MAX_RAND 100
int A[SIZE][SIZE], B[SIZE][1], C[SIZE][1];//D[SIZE][SIZE],E[SIZE][1];
void fill_matrix(int m[SIZE][SIZE])
{
//static int n=0;
int i, j;
printf("\n*****************************\n");
for (i=0; i<SIZE; i++)
{
for (j=0; j<SIZE; j++){
m[i][j] = rand() % MAX_RAND;
printf("%2d ", m[i][j]);
}
printf("\n");
}
printf("\n*****************************\n");
}
void fill_vector(int m[SIZE][1])
{
//static int n=0;
int i, j;
printf("\n*****************************\n");
for (i=0; i<SIZE; i++)
{
for (j=0; j<1; j++){
m[i][j] = rand() % MAX_RAND;
printf("%2d ", m[i][j]);
}
printf("\n");
}
printf("\n*****************************\n");
}
void print_matrix(int m[SIZE][SIZE])
{
int i, j = 0;
for (i=0; i<SIZE; i++) {
printf("\n\t| ");
for (j=0; j<SIZE; j++)
printf("%2d ", m[i][j]);
printf("|");
}
}
void print_vector(int m[SIZE][1])
{
int i, j = 0;
for (i=0; i<SIZE; i++) {
printf("\n\t| ");
for (j=0; j<1; j++)
printf("%2d ", m[i][j]);
printf("|");
}
}
int main(int argc, char *argv[])
{
int myrank, P, from, to, i, j, k;
// int tag = 666; /* any value will do */
// MPI_Status status;
MPI_Init (&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank); /* who am i */
MPI_Comm_size(MPI_COMM_WORLD, &P); /* number of processors */
if (SIZE%P!=0) {
if (myrank==0) printf("Matrix size not divisible by number of processors\n");
MPI_Finalize();
exit(-1);
}
from = myrank * SIZE/P;
to = ((myrank+1) * SIZE/P);
/* Process 0 fills the input matrices and broadcasts them to the rest */
/* (actually, only the relevant stripe of A is sent to each process) */
if (myrank==0) {
//static int n=0;
int i, j;
printf("\n*****************************\n");
for (i=0; i<SIZE; i++)
{
for (j=0; j<SIZE; j++){
A[i][j] = rand() % MAX_RAND;
printf("%d ", A[i][j]);
}
printf("\n");
printf("\n*****************************\n");
}
fill_vector(B);
}
//int s=SIZE*SIZE/P;
// printf("computing slice %d (from row %d to %d)\n", myrank, from, to-1);
MPI_Bcast (B, SIZE*1, MPI_INT, 0, MPI_COMM_WORLD);
// printf("\n\n%d",s);
//print_vector(s);
//printf("\n\n");
if(myrank==0){
MPI_Scatter (&A[0][0], SIZE*SIZE/P, MPI_INT, MPI_IN_PLACE, SIZE*SIZE/P, MPI_INT, 0, MPI_COMM_WORLD);
}else{
MPI_Scatter (&A[0][0], SIZE*SIZE/P, MPI_INT, &A[from][0], SIZE*SIZE/P, MPI_INT, 0, MPI_COMM_WORLD);
}
printf("computing slice %d (from row %d to %d)\n", myrank, from, to-1);
for (i=from; i<to; i++)
for (j=0; j<SIZE; j++) {
C[i][0]=0;
for (k=0; k<SIZE; k++){
C[i][0] += A[i][k]*B[k][0];
}
}
if(myrank==0){
MPI_Gather (MPI_IN_PLACE, SIZE/P, MPI_INT, &C[0][0], SIZE/P, MPI_INT, 0, MPI_COMM_WORLD);
}else{
MPI_Gather (&C[from][0], SIZE/P, MPI_INT, &C[0][0], SIZE/P, MPI_INT, 0, MPI_COMM_WORLD);
}
if (myrank==0) {
printf("\n\n");
{
int i, j = 0;
for (i=0; i<SIZE; i++) {
printf("\n\t| ");
for (j=0; j<SIZE; j++)
printf("%d ", A[i][j]);
printf("|");
}
}
printf("\n\n");
// print_matrix(D);
printf("\n\n\t * \n");
print_vector(B);
printf("\n\n\t = \n");
print_vector(C);
printf("\n\n");
// print_vector(E);
// printf("\n\n");
}
MPI_Finalize();
return 0;
}
它可以由mpicc main.c -o main -Wall
编译并由mpirun -np 4 main
运行。
我认为不会使用srand()
来获得可重现的结果。如果您打算使用更大的数组,则需要分配它们。如果是这种情况,请查看以下问题:sending blocks of 2D array in C using MPI
编辑:我应该注意到发送缓冲区和接收缓冲区是相同的。这称为缓冲区别名(请参阅Mvapich2 buffer aliasing),而必须使用标记MPI_IN_PLACE
(请参阅How does MPI_IN_PLACE work with MPI_Scatter?)。上面的代码会相应修改。对不起以前不完整的答案!