我有一个前一个问题here提供的示例cuBLAS Fortran绑定例程。我正在运行Ubuntu 13.10,IFORT 14.0.1和Cuda 5.5。代码如下:
cublas.f
program cublas_fortran_example
implicit none
integer i, j
c helper functions
integer cublas_init
integer cublas_shutdown
integer cublas_alloc
integer cublas_free
integer cublas_set_vector
integer cublas_get_vector
c selected blas functions
double precision cublas_ddot
external cublas_daxpy
external cublas_dscal
external cublas_dcopy
double precision cublas_dnrm2
c cublas variables
integer cublas_status
real*8 x(30), y(30)
double precision alpha, beta
double precision nrm
integer*8 d_x, d_y, d_alpha, d_beta, d_nrm
integer*8 dsize1, dlength1, dlength2
double precision dresult
write(*,*) "testing cublas fortran example"
c initialize cublas library
c CUBLAS_STATUS_SUCCESS=0
cublas_status = cublas_init()
if (cublas_status /= 0) then
write(*,*) "CUBLAS Library initialization failed"
write(*,*) "cublas_status=",cublas_status
stop
endif
c initialize data
do j=1,30
x(j) = 1.0
y(j) = 2.0
enddo
dsize1 = 8
dlength1 = 30
dlength2 = 1
alpha = 2.0
beta = 3.0
c allocate device storage
cublas_status = cublas_alloc(dlength1, dsize1, d_x)
if (cublas_status /= 0) then
write(*,*) "CUBLAS device malloc failed"
stop
endif
cublas_status = cublas_alloc(dlength1, dsize1, d_y)
if (cublas_status /= 0) then
write(*,*) "CUBLAS device malloc failed"
stop
endif
cublas_status = cublas_alloc(dlength2, dsize1, d_alpha)
if (cublas_status /= 0) then
write(*,*) "CUBLAS device malloc failed"
stop
endif
cublas_status = cublas_alloc(dlength2, dsize1, d_beta)
if (cublas_status /= 0) then
write(*,*) "CUBLAS device malloc failed"
stop
endif
cublas_status = cublas_alloc(dlength2, dsize1, d_nrm)
if (cublas_status /= 0) then
write(*,*) "CUBLAS device malloc failed"
stop
endif
c copy data from host to device
cublas_status = cublas_set_vector(dlength1, dsize1, x, dlength2,
> d_x, dlength2)
if (cublas_status /= 0) then
write(*,*) "CUBLAS copy to device failed"
write(*,*) "cublas_status=",cublas_status
stop
endif
cublas_status = cublas_set_vector(dlength1, dsize1, y, dlength2,
> d_y, dlength2)
if (cublas_status /= 0) then
write(*,*) "CUBLAS copy to device failed"
write(*,*) "cublas_status=",cublas_status
stop
endif
dresult = cublas_ddot(dlength1, d_x, dlength2, d_y, dlength2)
write(*,*) "dot product result=",dresult
dresult = cublas_dnrm2(dlength1, d_x, dlength2)
write(*,*) "nrm2 of x result=",dresult
dresult = cublas_dnrm2(dlength1, d_y, dlength2)
write(*,*) "nrm2 of y result=",dresult
call cublas_daxpy(dlength1, alpha, d_x, dlength2, d_y, dlength2)
cublas_status = cublas_get_vector(dlength1, dsize1, d_y, dlength2,
> y, dlength2)
if (cublas_status /= 0) then
write(*,*) "CUBLAS copy to host failed"
write(*,*) "cublas_status=",cublas_status
stop
endif
write(*,*) "daxpy y(1) =", y(1)
write(*,*) "daxpy y(30) =", y(30)
call cublas_dscal(dlength1, beta, d_x, dlength2)
cublas_status = cublas_get_vector(dlength1, dsize1, d_x, dlength2,
> x, dlength2)
if (cublas_status /= 0) then
write(*,*) "CUBLAS copy to host failed"
write(*,*) "cublas_status=",cublas_status
stop
endif
write(*,*) "dscal x(1) =", x(1)
write(*,*) "dscal x(30) =", x(30)
call cublas_dcopy(dlength1, d_x, dlength2, d_y, dlength2)
cublas_status = cublas_get_vector(dlength1, dsize1, d_y, dlength2,
> y, dlength2)
if (cublas_status /= 0) then
write(*,*) "CUBLAS copy to host failed"
write(*,*) "cublas_status=",cublas_status
stop
endif
write(*,*) "dcopy y(1) =", y(1)
write(*,*) "dcopy y(30) =", y(30)
c deallocate GPU memory and exit
cublas_status = cublas_free(d_x)
cublas_status = cublas_free(d_y)
cublas_status = cublas_free(d_alpha)
cublas_status = cublas_free(d_beta)
cublas_status = cublas_free(d_nrm)
cublas_status = cublas_shutdown()
stop
end
当我使用gfortran编译时,我得到了正确的输出,如下所示:
编译并运行
$gfortran -c -o cublasf.o cublasf.f
$nvcc -c -DCUBLAS_GFORTRAN -I/usr/local/cuda/include -I/usr/local/cuda/src -o fortran.o /usr/local/cuda/src/fortran.c
$gfortran -o cublasf cublasf.o fortran.o -L/usr/local/cuda/lib64 -lcublas
$ ./cublasf
testing cublas fortran example
dot product result= 60.000000000000000
nrm2 of x result= 5.4772255750516612
nrm2 of y result= 10.954451150103322
daxpy y(1) = 4.0000000000000000
daxpy y(30) = 4.0000000000000000
dscal x(1) = 3.0000000000000000
dscal x(30) = 3.0000000000000000
dcopy y(1) = 3.0000000000000000
dcopy y(30) = 3.0000000000000000
但是,我需要将英特尔Fortran编译器与cuBLAS库一起使用,但是当我从gfortran切换到ifort时,我会得到各种未定义的引用,如下所示:
$ifort -c -o cublasf.o cublasf.f
$nvcc -c -DCUBLAS_INTEL_FORTRAN -I/usr/local/cuda/include -I/usr/local/cuda/src -o fortran.o /usr/local/cuda/src/fortran.c
$ifort -o cublasf cublasf.o fortran.o -L/usr/local/cuda/lib64 -lcublas
cublasf.o: In function `MAIN__':
cublasf.f:(.text+0x8a): undefined reference to `cublas_init_'
cublasf.f:(.text+0x1f5): undefined reference to `cublas_alloc_'
cublasf.f:(.text+0x21c): undefined reference to `cublas_alloc_'
cublasf.f:(.text+0x243): undefined reference to `cublas_alloc_'
cublasf.f:(.text+0x26a): undefined reference to `cublas_alloc_'
cublasf.f:(.text+0x291): undefined reference to `cublas_alloc_'
cublasf.f:(.text+0x2c8): undefined reference to `cublas_set_vector_'
cublasf.f:(.text+0x3d1): undefined reference to `cublas_set_vector_'
cublasf.f:(.text+0x4d5): undefined reference to `cublas_ddot_'
cublasf.f:(.text+0x56f): undefined reference to `cublas_dnrm2_'
cublasf.f:(.text+0x609): undefined reference to `cublas_dnrm2_'
cublasf.f:(.text+0x6b6): undefined reference to `cublas_daxpy_'
cublasf.f:(.text+0x6e6): undefined reference to `cublas_get_vector_'
cublasf.f:(.text+0x8c5): undefined reference to `cublas_dscal_'
cublasf.f:(.text+0x8f5): undefined reference to `cublas_get_vector_'
cublasf.f:(.text+0xad7): undefined reference to `cublas_dcopy_'
cublasf.f:(.text+0xb07): undefined reference to `cublas_get_vector_'
cublasf.f:(.text+0xcce): undefined reference to `cublas_free_'
cublasf.f:(.text+0xcdd): undefined reference to `cublas_free_'
cublasf.f:(.text+0xcec): undefined reference to `cublas_free_'
cublasf.f:(.text+0xcfb): undefined reference to `cublas_free_'
cublasf.f:(.text+0xd0a): undefined reference to `cublas_free_'
cublasf.f:(.text+0xd11): undefined reference to `cublas_shutdown_'
我已经看过其他关于同一问题的帖子,但没有真正的解决方案/解释。如果有人能解释为什么这些引用在使用ifort时未定义,而不是在使用gfortran时,以及解决问题的任何解决方案,我将非常感激。非常感谢你!
答案 0 :(得分:1)
Intel Fortran documentation states, that
在Linux和OS X系统上,编译器附加下划线 字符到外部用户定义的名称。
你需要通过
禁用它ifort -assume nounderscore ...
请考虑names案例
这是Linux *和OS X *系统的默认设置。编译器忽略标识符中的大小写差异,并将外部名称转换为小写。
所以你可能也需要
-names uppercase