当我将循环索引增加到某个阈值时,我使用OpenMP的F77脚本(下面给出)会出现seg故障问题。我意识到这是一个常见的问题,但我已经在mac上找到了解决方法,并试图弄清楚如何在我的linux机器上做同样的事情。
代码包含在下面,我为其长度道歉 - 大部分脚本专门用于设置通过ZGEMM倍增的矩阵。注意,当我增加方程的数量时,在我的代码中用neq表示,我得到了前面提到的seg故障问题。
program test2
implicit none
include 'omp_lib.h'
integer neq, niv, Mt
parameter (neq = 4*200)
parameter (niv = neq/4)
double precision y(neq), yp(neq), atol(neq), u(niv)
integer m, j, kk, l, n, l1, l2, l3, i, ll, indsum, indsum2
integer indsum3, i4, k, k1, k2,k3, k4, k5, k6, l2p, l2pp
integer k3a, k3b
double complex p(-niv:niv,-niv:niv), V(niv), resd2a,
& Vec(-niv:niv), p2(1:niv,-niv:niv)
double complex qmat(-niv:niv,-niv:niv,-niv:niv)
double complex qmat2(1:niv,-niv:niv)
double complex sqmat(-niv:niv,-niv:niv,-niv:niv)
double complex sqmat0(-niv:niv,-niv:niv,-niv:niv)
double complex sqmat1(-niv:niv,-niv:niv,-niv:niv)
double complex sqmat2(-niv:niv,-niv:niv,-niv:niv)
double complex sqmat3(-niv:niv,-niv:niv,-niv:niv),
& C(-niv:niv,-niv:niv)
double complex sqmat3b(-niv:niv,-niv:niv,-niv:niv),
& sqmat3c(-niv:niv,-niv:niv,-niv:niv)
double complex q(-niv:niv,-niv:niv), q2(-niv:niv,-niv:niv),
& smat(-niv:niv,-niv:niv,-niv:niv), alpha, beta
double complex sumdum, sum1, sum2(niv), sum3(niv), qsum,
& resd, resd2, sumdum2, out(-niv:niv,-niv:niv)
double complex ssum1, ssum2, forcing(niv)
double precision seconds
call omp_set_num_threads ( 16 )
seconds = omp_get_wtime ( )
write ( *, '(a)' ) ' '
write ( *, '(a,i8)' )
& ' Number of processors available = ', omp_get_num_procs ( )
write ( * ,'(a,i8)' )
& ' Number of threads = ', omp_get_max_threads ( )
y = 0
do i = 1, niv
y(i) = 0.1d0*i
enddo
do i = niv+1,2*niv
y(i) = 0.2d0
enddo
do j = -niv, niv
do m = -niv, niv
kk = m-j
l = m-j
k1 = m
k2 = j
k3a = m
k3b = j
k4 = m-j
k5 = j
k6 = m-j
if (abs(kk) .gt. niv) then
kk = 0
else
kk = 1
end if
if (k1 .eq. 0) then
k1 = 0
else
k1=1
end if
if (k2.eq.0) then
k2 = 0
else
k2 = 1
end if
if (k3a .eq. 0) then
k3a=1
else
k3a=0
end if
if (k3b .eq. 0) then
k3b=1
else
k3b=0
end if
if (k4 .eq. 0) then
k4 = 0
else
k4 = 1
end if
if (k5 .eq. 0) then
k5=2
else
k5=1
end if
if (k6 .eq. 0) then
k6=2
else
k6=1
end if
p(m,j)=(0.5*k5*k6*kk*abs(k4*sign(1,m-j)-k2*sign(1,j))*cmplx(k4
& *y(abs(m-j))+1-k4,k4*y(niv+abs(m-j))*sign(1,m-j))-0.5*cmplx(k1*
& y(abs(m))+(1-k1), k1*y(abs(m)+niv)*sign(1, m))*cmplx(k2*y(abs(j
& ))+(1-k2),-k2*y(abs(j)+niv)*sign(1,j)))/((abs(m)+k3a)**(0.5)*(
& k3b+abs(j)))
enddo
enddo
Mt= 2*niv+1
alpha = cmplx(0.25d0,0.0d0)
beta = cmplx(0.0d0,0.0d0)
call ZGEMM('T','N',Mt,Mt,niv,alpha, p(1:niv,-niv:niv),niv,
&p(-1:-niv:-1,-niv:niv),niv,beta, q ,Mt)
do m= -niv, niv
do j= -niv, niv
do n= -niv, niv
kk = m-j
k1 = m
k2 = j
k3a = m
k3b = j
k5 = m-j
k6 = j
l = m-j
if (abs(kk). gt. niv) then
kk = 0
else
kk = 1
end if
indsum = m-j-n
if (indsum .eq. 0) then
indsum = 1
else
indsum = 0
end if
indsum2 = m+j-n
if (indsum2 .eq. 0) then
indsum2 = 1
else
indsum2 = 0
end if
indsum3 = j+n
if (indsum3 .eq. 0) then
indsum3 = 1
else
indsum3 = 0
end if
i4 = m-n
if (i4 .eq. 0) then
i4 = 1
else
i4 = 0
end if
if (k3a .eq. 0) then
k3a=1
else
k3a=0
end if
if (k3b .eq. 0) then
k3b=1
else
k3b=0
end if
if (k1 .eq. 0) then
k1 = 0
else
k1 = 1
end if
if (k2 .eq. 0) then
k2 = 0
else
k2 = 1
end if
if (k5 .eq. 0) then
k5=2
else
k5=1
end if
if (k6 .eq. 0) then
k6=2
else
k6=1
end if
if (l.eq.0) then
l = 0
else
l = 1
end if
qmat(m,j,n) = (0.5*k5*k6*kk*abs(l*sign(1,m-j)-k2*sign(1,j))*
& cmplx(indsum,0)-0.5*indsum3*cmplx(y(abs(m))+1-k1,sign(1,
& m)*y(abs(m)+niv))-0.5*i4*cmplx(y(abs(j))+1-k2, -sign(1,
& j)*y(abs(j)+niv)))/((abs(m)+k3a)**(0.5)*(
& k3b+abs(j)))
enddo
enddo
enddo
c$omp parallel private(n) shared(sqmat3)
c$omp do
do n=-niv ,niv
call ZGEMM('T','N',Mt,Mt,niv,alpha, qmat(1:niv,-niv:niv,n),niv,
&p(-1:-niv:-1,-niv:niv),niv,beta, sqmat0(:,:,n),Mt)
call ZGEMM('T','N',Mt,Mt,niv,alpha,p(1:niv,-niv:niv)
& ,niv,qmat(-1:-niv:-1,-niv:niv,n),niv,beta, sqmat2(:,:,n),Mt)
sqmat3(:,:,n) = sqmat0(:,:,n)+sqmat2(:,:,n)
enddo
c$omp end do
c$omp end parallel
seconds = omp_get_wtime ( ) - seconds;
print*, seconds
return
end
seg故障是由于最后一个循环,涉及到矩阵乘法的循环。
现在,我使用以下方法在我的mac上编译:
$gfortran -fopenmp -O3 -Wl,-stack_size,0x320000000 test.f -lblas
我发现程序执行并给出了预期的结果。注意,如果我将stacksize设置为,例如
-stack_size,0x80000000
我遇到了一个段错误。
在我的linux机器上(Ubunut 12.04 _x86_64)我已经设置了
ulimit -s unlimited
并使用
进行编译 $gfortran -fopenmp -O3 test2.f /home/user/OpenBLAS/libopenblas.a
我得到了分段错误。检查ulimit -a,我看到stacksize设置为无限制。另外,我已经使用了例如
来设置从属线程的堆栈大小 $export OMP_STACKSIZE=16M
但这似乎没有帮助。在我看来,基于我的mac上的类似问题,我必须将stacksize增加到超过8M的东西,但似乎不能强迫linux机器执行此操作。
所以,我的问题是,克服这样一个问题最简单的方法是什么?我当然是初学者,所以任何建议都表示赞赏。
尼克