我是MVAPICH2的新用户,当我开始使用它时遇到了麻烦
首先,我认为我已经成功安装了它,通过这个:
./configure --disable-fortran --enable-cuda
make -j 4
make install
没有错误。
但是当我试图在示例目录中运行cpi的示例时,我遇到了这样的情况:
我可以通过没有密码的ssh连接节点gpu-cluster-1和gpu-cluster-4;
我使用mpirun_rsh在gpu-cluster-1和gpu-cluster-4上单独运行cpi示例,它运行正常,就像这样:
run @gpu-cluster-1:〜/ mvapich2-2.1rc1 / examples $ mpirun_rsh -ssh -np 2 gpu-cluster-1 gpu-cluster-1 ./cpi
进程0 of 2在gpu-cluster-1上
过程1的2在gpu-cluster-1上
pi约为3.1415926544231318,错误为0.0000000008333387
挂钟时间= 0.000089
运行@gpu-cluster-4:〜/ mvapich2-2.1rc1 / examples $ mpirun_rsh -ssh -np 2 gpu-cluster-4 gpu-cluster-4 ./cpi
进程0 of 2在gpu-cluster-4上
过程1的2在gpu-cluster-4上
pi约为3.1415926544231318,错误为0.0000000008333387
挂钟时间= 0.000134
我使用mpiexec在gpu-cluster-1和gpu-cluster-4上运行cpi示例,它运行正常,就像这样:
run @gpu-cluster-1:〜/ mvapich2-2.1rc1 / examples $ mpiexec -np 2 -f hostfile ./cpi
进程0 of 2在gpu-cluster-1上
过程1的2在gpu-cluster-4上
pi约为3.1415926544231318,错误为0.0000000008333387
挂钟时间= 0.000352
hostfile中的内容为" gpu-cluster-1 \ ngpu-cluster-4"
但是,当我运行cpi示例时,使用mpirun_rsh,在gpu-cluster-1和gpu-cluster-4上使用,问题出现了:
run @gpu-cluster-1:〜/ mvapich2-2.1rc1 / examples $ mpirun_rsh -ssh -np 2 -hostfile hostfile ./cpi
过程1的2在gpu-cluster-4上
-----------------它停留在这里,没有继续------------------------
很长一段时间后,我按下Ctrl + C,然后显示:
^ C [gpu-cluster-1:mpirun_rsh] [signal_processor]抓到信号2,查杀作业
run @gpu-cluster-1:〜/ mvapich2-2.1rc1 / examples $ [gpu-cluster-4:mpispawn_1] [read_size]文件描述符上意外的文件结束6. MPI进程死了?
[gpu-cluster-4:mpispawn_1] [read_size]文件描述符上出现意外的文件结束6. MPI进程死了?
[gpu-cluster-4:mpispawn_1] [handle_mt_peer]读取PMI套接字时出错。 MPI过程死了?
[gpu-cluster-4:mpispawn_1] [report_error] connect()失败:拒绝连接(111)
我已经困惑了很长时间,你能帮我解决一下这个问题吗?
以下是cpi示例的代码:
#include "mpi.h"
#include <stdio.h>
#include <math.h>
double f(double);
double f(double a)
{
return (4.0 / (1.0 + a*a));
}
int main(int argc,char *argv[])
{
int n, myid, numprocs, i;
double PI25DT = 3.141592653589793238462643;
double mypi, pi, h, sum, x;
double startwtime = 0.0, endwtime;
int namelen;
char processor_name[MPI_MAX_PROCESSOR_NAME];
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
MPI_Get_processor_name(processor_name,&namelen);
fprintf(stdout,"Process %d of %d is on %s\n",
myid, numprocs, processor_name);
fflush(stdout);
n = 10000; /* default # of rectangles */
if (myid == 0)
startwtime = MPI_Wtime();
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
h = 1.0 / (double) n;
sum = 0.0;
/* A slightly better approach starts from large i and works back */
for (i = myid + 1; i <= n; i += numprocs)
{
x = h * ((double)i - 0.5);
sum += f(x);
}
mypi = h * sum;
MPI_Reduce(&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
if (myid == 0) {
endwtime = MPI_Wtime();
printf("pi is approximately %.16f, Error is %.16f\n",
pi, fabs(pi - PI25DT));
printf("wall clock time = %f\n", endwtime-startwtime);
fflush(stdout);
}
MPI_Finalize();
return 0;
}
#include "mpi.h"
#include <stdio.h>
#include <math.h>
double f(double);
double f(double a)
{
return (4.0 / (1.0 + a*a));
}
int main(int argc,char *argv[])
{
int n, myid, numprocs, i;
double PI25DT = 3.141592653589793238462643;
double mypi, pi, h, sum, x;
double startwtime = 0.0, endwtime;
int namelen;
char processor_name[MPI_MAX_PROCESSOR_NAME];
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&numprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
MPI_Get_processor_name(processor_name,&namelen);
fprintf(stdout,"Process %d of %d is on %s\n",
myid, numprocs, processor_name);
fflush(stdout);
n = 10000; /* default # of rectangles */
if (myid == 0)
startwtime = MPI_Wtime();
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
h = 1.0 / (double) n;
sum = 0.0;
/* A slightly better approach starts from large i and works back */
for (i = myid + 1; i <= n; i += numprocs)
{
x = h * ((double)i - 0.5);
sum += f(x);
}
mypi = h * sum;
MPI_Reduce(&mypi, &pi, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
if (myid == 0) {
endwtime = MPI_Wtime();
printf("pi is approximately %.16f, Error is %.16f\n",
pi, fabs(pi - PI25DT));
printf("wall clock time = %f\n", endwtime-startwtime);
fflush(stdout);
}
MPI_Finalize();
return 0;
}