我正在尝试将openmp添加到此代码中,因为它需要花费大量时间来执行。无论如何加速这个循环将非常感激。我已经尝试过一个解决方案,但它永远不会在高于0的线程数上执行。
!$OMP PARALLEL do private(pm25_tmp,pm10_tmp,V,aerombin,LVL,ROW,COL,isec)
write(*,*) 'num procs:',omp_get_num_procs()
write(*,*) 'num of thread:',omp_get_num_threads()
write(*,*) 'thread number:', OMP_GET_THREAD_NUM()
do LVL = 1, NLAYS3D
write(*,*) 'THIS IS THE LVL FOR PSO4', LVL
DO ROW = 1, NROWS
DO COL = 1, NCOLS
pm25_tmp = 1.*PSO4(COL,ROW,LVL)
pm10_tmp = 0.*PSO4(COL,ROW,LVL)
if (PSO4(COL,ROW,LVL).gt.1.E-10) then
call mode2sec(pm25_tmp,pm10_tmp,aerombin,pmtot,
& JDATE,JTIME,COL,ROW,LVL)
DO isec=1,nsections
V = ngasemis + isec+8*5
EMIS1_SEC(COL,ROW,LVL,V) = aerombin(6,isec)
aerombin(6,isec) = 0.0
END DO
end if
end do
end do
end do
!$OMP END PARALLEL
这是我正在使用的makefile:
FC = ifort
M3LIB = /scratch3/NAGAPE/arl/Barry.Baker/cmaq502/CMAQv5.0.2/lib
icldir = ${M3LIB}/ioapi_31/Linux2_x86_64ifort
# IOAPI LIBRARIES
IOAPI_INC = ${M3LIB}/ioapi_31/fixed_src
IOAPI_LIB = ${M3LIB}/ioapi_31/Linux2_x86_64ifort
NETCDF_LIB = ${M3LIB}/netCDF/Linux2_x86_64ifort
#LIB1 = /usr2/tmp/lib/ioapi/ioapi_22/Linux2_x86pg
#LIB2 = /usr2/tmp/lib/netCDF/Linux
#LIBS = -L$(LIB1) -lioapi -L$(LIB2) -lnetcdf
LIBS = -L$(IOAPI_LIB) -lioapi -L$(NETCDF_LIB) -lnetcdf
EXE = senex_rwce3_8sec
# Set compiler version dependent flags and LINKTOOL
# Fortran flags
FLAGS2 = -openmp -I$(icldir)
#
# Object files
#
OBJS = senex_rwce3_eightsec.o \
senex_mode2sec_eightsec.o \
../integrator/qk15.o ../integrator/qk41.o ../integrator/qpsrt.o \
../integrator/qag.o ../integrator/qk21.o ../integrator/qk51.o \
../integrator/r1mach.o ../integrator/qage.o ../integrator/qk31.o \
../integrator/qk61.o ../integrator/xerror.o
$EXE: $(OBJS)
$(FC) $(FLAGS2) -o $(EXE) $(OBJS) $(LIBS)
答案 0 :(得分:2)
您错过了一些重要信息,例如您使用的编译器以及casely评论过的,您如何编译应用程序。此外,您的OpenMP构造似乎是非法的,因为
!$OMP PARALLEL DO
应该在DO循环之前出现。
话虽这么说,您可以尝试以下测试,看看您的系统是否可以成功生成OpenMP应用程序。您需要为Fortran编译器调整FC,为Makefile中的Fortran标志调整FFLAGS。请注意,在示例中我提供了添加-fopenmp,因为它告诉gfortran在代码中启用OpenMP(否则,将忽略OpenMP pragma!)。
以下代码的输出在我的旧版Lenovo T400中有以下代码:
# ./test
Serial: num procs: 2 num of threads = 1 thread num = 0
Parallel: num procs: 2 num of threads = 2 thread num = 0
Parallel: num procs: 2 num of threads = 2 thread num = 1
请注意,omp_get_num_procs()给出了OpenMP检测到的处理器数量。例程omp_get_num_threads()通知在并行区域内运行 的线程数,并在任何并行区域外调用它时返回1。最后,omp_get_thread_num()返回执行并行区域的线程标识符(从0到omp_get_num_threads() - 1),如果在并行区域外调用它则返回0。
档案 test.f90
PROGRAM TEST
IMPLICIT NONE
INTEGER I, &
omp_get_num_procs, &
omp_get_num_threads, &
omp_get_thread_num
write(*,*) 'Serial: num procs:',omp_get_num_procs(), &
" num of threads = ", omp_get_num_threads(), &
" thread num = ", omp_get_thread_num()
!$OMP PARALLEL DO
DO I = 1, omp_get_num_procs()
write(*,*) 'Parallel: num procs:',omp_get_num_procs(), &
" num of threads = ", omp_get_num_threads(), &
" thread num = ", omp_get_thread_num()
ENDDO
END
文件 Makefile
FC = gfortran
FFLAGS = -fopenmp
test: test.f90
$(FC) $(FFLAGS) $< -o $@