Question

以下是我尝试使用Cuda Fortran处理数组的示例代码。

    module mathOps
    contains
      attributes(global) subroutine saxpy(x, y, a)
        implicit none
        real :: x(:), y(:)
        real, value :: a
        integer :: i, n
        n = size(x)
        i = blockDim%x * (blockIdx%x - 1) + threadIdx%x
        if (i <= n) y(i) = y(i) + a*x(i)
      end subroutine saxpy 
    end module mathOps

    SUBROUTINE testSaxpy(x,y)
      use mathOps
      use cudafor
      implicit none

      real :: x(*), y(*), a
      real, device :: x_d(*), y_d(*) ! This line shows error and I want a statement that is something like this.
      type(dim3) :: grid, tBlock

      tBlock = dim3(256,1,1)
      grid = dim3(ceiling(real(N)/tBlock%x),1,1)


      x_d = x
      y_d = y
      call saxpy<<<grid, tBlock>>>(x_d, y_d, a)
      y = y_d
      write(*,*) 'Max error: ', maxval(abs(y-4.0))
    END SUBROUTINE testSaxpy


    PROGRAM TEST
        integer, parameter :: N = 40000
    real :: x(100), y(100), a
    x = 1.0; y = 2.0; a = 2.0
    testSaxpy(x,y)
    END TEST

有什么方法可以声明假定大小的设备数组变量或类似的东西。我可以声明固定大小的数组，但问题是我没有包含主程序的文件。我只提供子程序文件，我需要从子程序调用cuda内核。那么，有什么方法可以使用假定大小的设备数组或类似的东西吗？

Answer 1

我认为你想要的是

real, device :: x_d(size(x)), y_d(size(y))

这将分配与x_d和y_d大小相同的数组x和y。您还可以将数组声明为allocatable，然后将它们分配到正文中，

real, device, allocatable :: x_d(:), y_d(:)
allocate(x_d(size(x)), y_d(size(y)))

但是从我粗略阅读有关CUDA功能的内容来看，似乎可以通过告诉它你希望设备上有x和y来让Fortran透明地完成所有这些工作。首先（未经测试！）：

subroutine testSaxpy(x,y)
  use mathOps
  use cudafor
  implicit none
  real, device :: x(:), y(:), a
  type(dim3) :: grid, tBlock

  tBlock = dim3(256,1,1)
  grid = dim3(ceiling(real(N)/tBlock%x),1,1)
  call saxpy<<<grid, tBlock>>>(x, y, a)
  write(*,*) 'Max error: ', maxval(abs(y-4.0))
end subroutine

如何在Cuda Fortran中声明假定大小的设备数组？

1 个答案: