我有关于数据复制表单主机到设备的问题。这是我的问题。我有一个数组定义为
real, allocatable :: cpuArray(:,:,:)
real, device, allocatable :: gpuArrray(:,:,:)
allocate(cpuArray(0:imax-1,0:jmax-1,0:kmax-1))
allocate(gpuArrray(-1:imax,-1:jmax,-1:kmax))
!array initialiazation
cpuArrray = randomValue !non 0 value
gpuArray = 0.0 !first 0 all gpu array elements
gpuArrray(0:imax-1,0:jmax-1,0:kmax-1)= cpuArray
我的期望是,只有gpuArray中的指定索引才能从主机接收数据,但它不起作用。
你能帮我找到这个有什么问题吗?
PS:我的方法基于PGI主页tutorial
- 当我设置cpuArray和gpuArray相同的维度时, 我得到了正确的结果。
但是当前情况为gpuArray中的所有元素生成0。我将默认值修改为非零(即.gpuArray = 10.0!前10个所有gpu数组元素)但结果仍为0.
祝你好运, Adjeiinfo
答案 0 :(得分:1)
我向整个社区道歉。我可以解决我的问题。这是我在测试程序中引入的一个愚蠢的错误。我在检查程序中没有cpuArrray= cpuArray(0:imax-1,0:jmax-1,0:kmax-1)
,而是cpuArrray= cpuArray
。所以程序运行良好,但结果检查程序是错误的。
感谢您的跟进。
供参考,这是程序的一部分(可以构建和运行)
module mytest
use cudafor
implicit none
integer :: imax , jmax, kmax
integer :: i,j,k
!host arrays
real,allocatable:: h_a(:,:,:)
real,allocatable:: h_b(:,:,:)
real,allocatable:: h_c(:,:,:)
!device array
real,device,allocatable:: d_b(:,:,:)
real,device,allocatable:: d_c(:,:,:)
real,device,allocatable:: d_b_copy(:,:,:)
real,device,allocatable:: d_c_copy(:,:,:)
contains
attributes(global) subroutine testdata()
integer :: d_i, d_j,d_k
d_i = (blockIdx%x-1) * blockDim%x + threadIdx%x-1
d_j = (blockIdx%y-1) * blockDim%y + threadIdx%y-1
do d_k = 0, 1
d_b_copy(d_i, d_j, d_k) = d_b(d_i, d_j, d_k)
d_c_copy(d_i, d_j, d_k) = d_c(d_i, d_j, d_k)
end do
end subroutine testdata
end module mytest
program Test
use mytest
type(dim3) :: dimGrid, dimBlock,dimGrid1, dimBlock1
imax = 32
jmax = 32
kmax = 2
dimGrid = dim3(2,2, 1)
dimBlock = dim3(imax,jmax,1)
allocate(h_a(0:imax-1,0:jmax-1,0:1))
allocate(h_b(0:imax-1,0:jmax-1,0:1))
allocate(h_c(0:imax-1,0:jmax-1,0:1))
!real,device,allocatable::d_c(:,:,:)
allocate(d_b(0:imax-1,0:jmax-1,0:1))
allocate(d_c(-1:imax,-1:jmax,-1:16))
allocate(d_b_copy(0:imax-1,0:jmax-1,0:1))
allocate(d_c_copy(-1:imax,-1:jmax,-1:1))
!array initialization
do k = 0,kmax-1
do j=0, jmax-1
do i = 0, imax-1
h_a(i,j,k) = i*0.1
end do
end do
end do
!data transfer (cpu to gpu)
d_b = h_a
d_c(0:imax-1,0:jmax-1,0:kmax-1)= h_a
call testdata<<<dimGrid,dimBlock>>>()
!copy back to cpu
h_b = d_b_copy(0:imax-1,0:jmax-1,0:kmax-1)
h_c = d_c_copy(0:imax-1,0:jmax-1,0:kmax-1)
!just for visual test
write(*,*), h_b
open(24,file='h_a.dat')
write(24,*) h_a
close(24)
open(24,file='d_b_copy.dat')
write(24,*) h_b
close(24)
open(24,file='d_c_copy.dat')
write(24,*) h_c
close(24)
end program Test