salloc -A arctest --gres=gpu:1 --partition=v100_dev_q -N 3 --tasks-per-node=2 --cpus-per-task=4 --time=2:00:00 srun -n1 -N1 --pty --preserve-env --cpu-bind=no --mpi=pmi2 --distribution=cycl
ic:cyclic $SHELL
module load intel/18.2 R/3.6.1 openmpi/4.0.1 R-parallel/3.6.1 cuda/10.1.168
我们正在使用SLURM 17.11(我认为),并且启用了cgroups,UCX 1.3.0,版本为4.2.1.2。
--with-cma \
--enable-dlopen \
--enable-shared \
--with-mxm=/opt/mellanox/mxm \
--with-pmi=/usr \
所有这些都在Mellanox ConnectX-5卡上。
#include <stdio.h>
#include <omp.h>
#include <sched.h>
#include <unistd.h>
int main() {
#pragma omp parallel num_threads(10)
char hostbuffer[256];
int hostname;
hostname = gethostname(hostbuffer, sizeof(hostbuffer));
int schedaff = sched_getaffinity();
int coreid = sched_getcpu();
int id = omp_get_thread_num();
int total = omp_get_num_threads();
int maxthread = omp_get_max_threads();
//printf("host %s\n", hostbuffer);
printf("Host: %s : core: %d , I am running process %d out of %d (max %d ) with affinity %d \n", hostbuffer, coreid, id, total, maxthread, schedaff);
printf("parallel for ends.\n");
return 0;
Host: ca223 : core: 0 , I am running process 8 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 8 , I am running process 7 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 2 , I am running process 9 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 6 , I am running process 3 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 8 , I am running process 2 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 10 , I am running process 1 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 4 , I am running process 4 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 12 , I am running process 6 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 14 , I am running process 5 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 18 , I am running process 0 out of 10 (max 12 ) with affinity -1
看起来不错,所以让我们在制作主机文件后添加一些R / Rmpi
ca223 slots=2
ca224 slots=2
mpirun -v -np 1 --bind-to none --hostfile hostfile --mca mpi_warn_on_fork 0 --mca btl_openib_allow_ib 1 Rscript omp_test_S2.R
# In case R exits unexpectedly, have it automatically clean up
# resources taken up by Rmpi (slaves, memory, etc...)
.Last <- function(){
if (is.loaded("mpi_initialize")){
if (mpi.comm.size(1) > 0){
print("Please use mpi.close.Rslaves() to close slaves.")
print("Please use mpi.quit() to quit R")
Sys.setenv(OMP_NUM_THREADS = 12 )
Sys.setenv(OMP_PROC_BIND = "false")
cat("show quick core spread on master","\n",sep="")
ns <- mpi.universe.size()
cat("mpi.universe.size = ",ns,"\n",sep="")
ns <- 2
# Tell all slaves to return a message identifying themselves
mpi.bcast.cmd( id <- mpi.comm.rank() )
mpi.bcast.cmd( ns <- mpi.comm.size() )
mpi.bcast.cmd( host <- mpi.get.processor.name() )
mpi.bcast.cmd( Sys.setenv(OMP_NUM_THREADS = 12 ))
mpi.bcast.cmd( Sys.setenv(OMP_PROC_BIND = "false") )
mpi.remote.exec(paste("I am",mpi.comm.rank(),"of",mpi.comm.size()))
mpi.bcast.cmd(system('./openmp_example >>slave_S2'))
mpi.bcast.cmd(system('env >> slave_env_S2'))
# Tell all slaves to close down, and exit the program
mpi.close.Rslaves(dellog = FALSE)
core spread from slaves
Host: ca223 : core: 4 , I am running process 0 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 4 , I am running process 2 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 4 , I am running process 3 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 4 , I am running process 4 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 4 , I am running process 5 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 4 , I am running process 6 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 4 , I am running process 7 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 4 , I am running process 8 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 4 , I am running process 9 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 4 , I am running process 1 out of 10 (max 12 ) with affinity -1
parallel for ends.
Host: ca224 : core: 0 , I am running process 0 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 0 , I am running process 2 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 0 , I am running process 3 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 0 , I am running process 4 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 0 , I am running process 5 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 0 , I am running process 6 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 0 , I am running process 7 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 0 , I am running process 8 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 0 , I am running process 9 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 0 , I am running process 1 out of 10 (max 12 ) with affinity -1
parallel for ends.
core spread from slaves
Host: ca223 : core: 2 , I am running process 9 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 8 , I am running process 7 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 18 , I am running process 0 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 0 , I am running process 8 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 6 , I am running process 3 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 10 , I am running process 1 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 12 , I am running process 6 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 4 , I am running process 4 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 8 , I am running process 2 out of 10 (max 12 ) with affinity -1
Host: ca223 : core: 14 , I am running process 5 out of 10 (max 12 ) with affinity -1
parallel for ends.
Host: ca224 : core: 2 , I am running process 8 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 16 , I am running process 5 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 10 , I am running process 1 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 0 , I am running process 9 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 6 , I am running process 2 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 4 , I am running process 3 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 12 , I am running process 4 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 10 , I am running process 7 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 14 , I am running process 6 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 18 , I am running process 0 out of 10 (max 12 ) with affinity -1
parallel for ends.
Host: ca224 : core: 8 , I am running process 0 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 16 , I am running process 7 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 2 , I am running process 3 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 6 , I am running process 2 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 14 , I am running process 8 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 8 , I am running process 1 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 16 , I am running process 5 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 16 , I am running process 9 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 16 , I am running process 6 out of 10 (max 12 ) with affinity -1
Host: ca224 : core: 16 , I am running process 4 out of 10 (max 12 ) with affinity -1
parallel for ends.
Run failed due to: pml_ucx.c:176 Error: Failed to receive UCX worker address
在询问问题时,mpi.universe.size()现在似乎与slurm中的配置匹配,即nodes = 2,tasks = 3的大小为6。如果我想简单地与节点数?理想情况下,这将是我通过mpi调用得到的,而不是not昧的环境。
正在为-np 2
mpirun -np 2 --bind-to none --hostfile hostfile --map-by ppr:1:node --mca mpi_warn_on_fork 0 --mca btl_openib_allow_ib 1 ./openmp_example
Host: ca207 : core: 2 , I am running process 8 out of 10 (max 8 ) with affinity -1
Host: ca207 : core: 0 , I am running process 9 out of 10 (max 8 ) with affinity -1
Host: ca207 : core: 14 , I am running process 0 out of 10 (max 8 ) with affinity -1
Host: ca207 : core: 8 , I am running process 2 out of 10 (max 8 ) with affinity -1
Host: ca207 : core: 6 , I am running process 3 out of 10 (max 8 ) with affinity -1
Host: ca207 : core: 4 , I am running process 4 out of 10 (max 8 ) with affinity -1
Host: ca207 : core: 10 , I am running process 1 out of 10 (max 8 ) with affinity -1
Host: ca207 : core: 12 , I am running process 6 out of 10 (max 8 ) with affinity -1
Host: ca207 : core: 0 , I am running process 5 out of 10 (max 8 ) with affinity -1
Host: ca207 : core: 8 , I am running process 7 out of 10 (max 8 ) with affinity -1
parallel for ends.
Host: ca208 : core: 2 , I am running process 9 out of 10 (max 8 ) with affinity -1
Host: ca208 : core: 0 , I am running process 8 out of 10 (max 8 ) with affinity -1
Host: ca208 : core: 14 , I am running process 0 out of 10 (max 8 ) with affinity -1
Host: ca208 : core: 6 , I am running process 3 out of 10 (max 8 ) with affinity -1
Host: ca208 : core: 10 , I am running process 1 out of 10 (max 8 ) with affinity -1
Host: ca208 : core: 4 , I am running process 4 out of 10 (max 8 ) with affinity -1
Host: ca208 : core: 12 , I am running process 6 out of 10 (max 8 ) with affinity -1
Host: ca208 : core: 2 , I am running process 5 out of 10 (max 8 ) with affinity -1
Host: ca208 : core: 8 , I am running process 2 out of 10 (max 8 ) with affinity -1
Host: ca208 : core: 8 , I am running process 7 out of 10 (max 8 ) with affinity -1
parallel for ends.
mpirun -np 2 --hostfile hostfile --map-by ppr:1:node --mca mpi_warn_on_fork 0 --mca btl_openib_allow_ib 1 ./openmp_example
Host: ca207 : core: 0 , I am running process 0 out of 10 (max 1 ) with affinity -1
Host: ca207 : core: 0 , I am running process 1 out of 10 (max 1 ) with affinity -1
Host: ca208 : core: 0 , I am running process 0 out of 10 (max 1 ) with affinity -1
Host: ca208 : core: 0 , I am running process 2 out of 10 (max 1 ) with affinity -1
Host: ca207 : core: 0 , I am running process 4 out of 10 (max 1 ) with affinity -1
Host: ca207 : core: 0 , I am running process 8 out of 10 (max 1 ) with affinity -1
Host: ca207 : core: 0 , I am running process 2 out of 10 (max 1 ) with affinity -1
Host: ca207 : core: 0 , I am running process 3 out of 10 (max 1 ) with affinity -1
Host: ca207 : core: 0 , I am running process 9 out of 10 (max 1 ) with affinity -1
Host: ca207 : core: 0 , I am running process 5 out of 10 (max 1 ) with affinity -1
Host: ca207 : core: 0 , I am running process 7 out of 10 (max 1 ) with affinity -1
Host: ca207 : core: 0 , I am running process 6 out of 10 (max 1 ) with affinity -1
parallel for ends.
Host: ca208 : core: 0 , I am running process 3 out of 10 (max 1 ) with affinity -1
Host: ca208 : core: 0 , I am running process 4 out of 10 (max 1 ) with affinity -1
Host: ca208 : core: 0 , I am running process 5 out of 10 (max 1 ) with affinity -1
Host: ca208 : core: 0 , I am running process 6 out of 10 (max 1 ) with affinity -1
Host: ca208 : core: 0 , I am running process 7 out of 10 (max 1 ) with affinity -1
Host: ca208 : core: 0 , I am running process 8 out of 10 (max 1 ) with affinity -1
Host: ca208 : core: 0 , I am running process 9 out of 10 (max 1 ) with affinity -1
Host: ca208 : core: 0 , I am running process 1 out of 10 (max 1 ) with affinity -1