我是MPI的新手,因此,有人可以告诉我在下面的代码中我做错了什么吗?我根据我在stackoverflow上找到的答案构建了这段代码,所以我只是稍微修改了它以满足我的需求。我面临的问题是在进程0中创建的2D数组“A”不能被1,2和3级的其他进程看到(我只有4个处理器)。也就是说,当一个进程想要在函数matrix_element()中使用这个矩阵“A”时,即(发生这种情况的地方由星号*表示)
cc[m][n]=matrix_element(m,n,x,ngauher,A)
由排名高于0的进程调用,程序以分段错误终止。只有根进程0能够使用此数组在每个进程内生成子数组cc [m] [n]。我试图使用
将2D数组“A”广播到所有进程 for(m=0;m<MAX_M;m++) {
MPI_Bcast(A[m],MAX_M,MPI_DOUBLE,0,MPI_COMM_WORLD);
}
这样每个进程都可以使用它,但是代码会终止,甚至更早出现与代码中稍后调用的MPI_Barrier有关的错误。我只是想知道如何正确地向所有进程广播“A”,并且如果在所有进程使用的matrix_element()函数内调用“A”有任何问题。无论如何,代码如下:
#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "nrutil.h"
#define SQR(x) ((x)*(x))
double x[1000],w[1000];
double result,global_result;
double **A,**c,**cc;
long nrl,nrh,ncl,nch;
int i,j,MAX_M,n,m,k;
int mx,my,mz,nx,ny,nz;
int ngauher;
int lstart,lend,id,p,num_elements;
int count;
FILE *ingau,*inc,*mdat,*outptr_A;
FILE *globalarrayptr=NULL;
FILE *globalarrayptr2=NULL;
FILE *localarrayptr=NULL;
char *globalarray;
char *localarray;
int malloc2d(double ***array, int n, int m);
double **dmatrix(long nrl, long nrh, long ncl, long nch);
double matrix_element(int m, int n, double **A);
double hermitef(double u, int m);
double calculate_c(int m,int n, double *x, double *w, int ngauher);
double *h;
int main(int argc, char **argv) {
const int MAX_M=10;
const int procMAX_M=2;
const int ngauher=20;
int i,j,k,id,p,disp;
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&id);
MPI_Comm_size(MPI_COMM_WORLD,&p);
if((localarrayptr = fopen("a.dat","w")) == NULL) {
fprintf(stderr,"Could not open file a.dat\n");
exit(1);
}
if(id == 0) {
if((ingau = fopen("gauspnts.dat","r")) == NULL) {
fprintf(stderr,"Could not open file gauspnts.dat");
exit(1);
}
if((globalarrayptr = fopen("c.dat","w")) == NULL) {
fprintf(stderr,"Could not open file c.dat\n");
exit(1);
}
printf(" opened files \n");
h = (double *)calloc(MAX_M,sizeof(double));
nrl=0;
nrh=MAX_M;
ncl=0;
nch=MAX_M;
malloc2d(&c,MAX_M,MAX_M);
malloc2d(&A,MAX_M,MAX_M);
for(i=0;i<nrh;i++) {
for(j=0;j<nch;j++) {
c[i][j]=0.0;
fprintf(globalarrayptr," %g ",c[i][j]);
}
fprintf(globalarrayptr,"\n");
}
for(k=0;k<ngauher;k++) {
fscanf(ingau," %lf %lf \n",&x[k],&w[k]);
printf(" %g %g \n",x[k],w[k]);
}
// MPI_Bcast(x,ngauher,MPI_DOUBLE,0,MPI_COMM_WORLD); // doesn't work
// MPI_Bcast(w,ngauher,MPI_DOUBLE,0,MPI_COMM_WORLD); // doesn't work
/* The problematic array is created right here in rank 0*/
for(m=0;m<MAX_M;m++) {
for(n=0;n<MAX_M;n++) {
A[m][n]=calculate_c(m,n,x,w,ngauher);
printf(" rank=%d A[%d][%d] = %g \n",i,m,n,A[m][n]);
}
}
}
for(m=0;m<MAX_M;m++) {
MPI_Bcast(A[m],MAX_M,MPI_DOUBLE,0,MPI_COMM_WORLD); // doesn't work and
// causes a seg fault
}
nrl=0;
nrh=MAX_M/procMAX_M;
ncl=0;
nch=MAX_M/procMAX_M;
malloc2d(&cc,MAX_M/procMAX_M,MAX_M/procMAX_M);
int sizes[2] = {MAX_M,MAX_M};
int subsizes[2] = {MAX_M/procMAX_M,MAX_M/procMAX_M};
int starts[2] = {0,0};
MPI_Datatype type, subarrtype;
MPI_Type_create_subarray(2,sizes,subsizes,starts,MPI_ORDER_C,MPI_DOUBLE,&type);
MPI_Type_create_resized(type,0,MAX_M/procMAX_M*sizeof(double),&subarrtype);
MPI_Type_commit(&subarrtype);
double *globalptr=NULL;
if(id == 0) globalptr=&(c[0][0]);
int sendcounts[procMAX_M*procMAX_M];
int displs[procMAX_M*procMAX_M];
if(id == 0) {
for(i=0; i<procMAX_M*procMAX_M;i++) sendcounts[i] = 1;
disp = 0;
for(i=0;i<procMAX_M;i++) {
for (j=0; j<procMAX_M; j++) {
displs[i*procMAX_M+j] = disp;
disp += 1;
}
disp += ((MAX_M/procMAX_M)-1)*procMAX_M;
}
}
if (id == 0) {
for(i=0; i<procMAX_M*procMAX_M; i++) sendcounts[i] = 1;
disp = 0;
for(i=0; i<procMAX_M; i++) {
for (j=0; j<procMAX_M; j++) {
displs[i*procMAX_M+j] = disp;
disp += 1;
}
disp += ((MAX_M/procMAX_M)-1)*procMAX_M;
}
}
MPI_Scatterv(globalptr,sendcounts,displs,subarrtype,&(cc[0][0]),
MAX_M*MAX_M/(procMAX_M*procMAX_M),
MPI_DOUBLE,0,MPI_COMM_WORLD);
/* all processors print their local data */
for(i=0;i<p;i++) {
if(id == i) {
printf("Local process on rank %d is:\n", id);
for(m=0;m<MAX_M/procMAX_M;m++) {
putchar('|');
for (n=0; n<MAX_M/procMAX_M;n++) {
// if(id == 0) { j=m; k=n; }
// if(id == 1) { j=m+procMAX_M-1; k=n; }
// if(id == 2) {j=m; k=n+procMAX_M-1; }
// if(id == 3) {j=m+procMAX_M-1; k=n+procMAX_M-1; }
/* ************************************************************************* */
cc[m][n]=matrix_element(m,n,A); // THE PROBLEM WITH "A" ARISES HERE!!
/* ************************************************************************* */
printf(" cc[%d][%d] = %g id = %d\n",m,n,cc[m][n],id);
}
printf("|\n");
}
}
MPI_Barrier(MPI_COMM_WORLD);
}
/* it all goes back to process 0 */
MPI_Gatherv(&(cc[0][0]),MAX_M*MAX_M/(procMAX_M*procMAX_M),MPI_DOUBLE,
globalptr,sendcounts,displs,subarrtype,0, MPI_COMM_WORLD);
/* don't need the local data anymore */
/* or the MPI data type */
MPI_Type_free(&subarrtype);
if (id == 0) {
if((globalarrayptr2 = fopen("B.dat","w")) == NULL) {
fprintf(stderr,"Could not open file B.dat\n");
exit(1);
}
for(i=0;i<MAX_M;i++) {
for(j=0;j<MAX_M;j++) {
fprintf(globalarrayptr2," %g ",c[i][j]);
}
fprintf(globalarrayptr2,"\n");
}
}
MPI_Finalize();
return 0;
}
int malloc2d(double ***array, int n, int m) {
double *p;
/* allocate the n*m contiguous items */
p = (double *)malloc(n*m*sizeof(double));
if (!p) return -1;
/* allocate the row pointers into the memory */
(*array) = (double **)malloc(n*sizeof(double *));
if (!(*array)) {
free(p);
return -1;
}
/* set up the pointers into the contiguous memory */
for (i=0; i<n; i++)
(*array)[i] = &(p[i*m]);
return 0;
}
double calculate_c(int m, int n, double *x, double *w, int ngauher) {
double result=0;
int i;
for(i=0;i<ngauher;i++) {
result+=w[i]*exp(-SQR(x[i]))*SQR(hermitef(x[i],m)*hermitef(x[i],n));
}
return(result);
}
double hermitef(double u, int m) {
int j;
double x,pi;
pi=acos(-1.);
x=u;
h[0]=1./pow(pi,0.25);
h[1]=sqrt(2.)*x/pow(pi,0.25);
for(j=2;j<m+1;j++) {
h[j] = sqrt(2./(double)j)*x*h[j-1]-sqrt((double)(j-1)/((double)j))*h[j-2];
}
return(h[m]);
}
double matrix_element(int m, int n, double **A) {
result=0.;
/* In this function, A is seen only by the root process ?? */
for(mx=0;mx<=m;mx++) {
for(my=0;my<=m;my++) {
for(mz=0;mz<=m;mz++) {
for(nx=0;nx<=n;nx++) {
for(ny=0;ny<=n;ny++) {
for(nz=0;nz<=n;nz++) {
if(((mx+my+mz == m) && (nx+ny+nz == n))) {
result+=A[mx][nx]*A[my][ny]*A[mz][nz];
}
}
}
}
}
}
}
return(result);
}
答案 0 :(得分:0)
如果你希望MPI_Bcast()
数组到每个进程,你的数组应该被分配size
次,每个进程一个。分配0级是不够的。问题是:
if(id==0){
...
malloc2d(&A,MAX_M,MAX_M);
...
}
尝试让malloc2d(&A,MAX_M,MAX_M);
退出此测试,MPI_Bcast()
将正常运行。
请注意,由于您已将2D数组分配为在内存中具有连续值,因此可以使用
MPI_Bcast(A[0],MAX_M*MAX_M,MPI_DOUBLE,0,MPI_COMM_WORLD);
再见,