Question

为了在主机和设备上有更多的舒适处理内存，我创建了以下类。从理论上讲，它应该管理从主机到设备的复制，反之亦然。

struct CudaArray
{

int* memoryHost;
int* memoryDevice;

int size;

CudaArray(int datasize) // creates array on host and allocates memory on device with cudaMalloc
{
    size = datasize;
    memoryHost = new int[size];

    for (int i = 0; i < size; i++)
    {
        memoryHost[i] = 0;
    }

    cudaMalloc((void**)&memoryDevice, sizeof(int) * size);
}

~CudaArray() // frees memory on device and host
{
    delete[] memoryHost;
    cudaFree(memoryDevice);
}

void Upload() // upload data from host to device
{
    cudaMemcpy(memoryDevice, memoryHost, sizeof(int) * size, cudaMemcpyHostToDevice);
}
void Download() // download data from device to host
{
    cudaMemcpy(memoryHost, memoryDevice, sizeof(int) * size, cudaMemcpyDeviceToHost);
}

void Insert(int* src); // copy from src to memoryHost
void Retrieve(int* dest); // copy from memoryHost to dest
};

在内部，一切都很好。但是当我使用我的CudaArray的一个对象时，指针存在问题：

CudaArray cuda_ar(1000);
kernel <<<blocks, threads_per_block>>> (cuda_ar.memoryDevice, cuda_ar.size);

通过使用调试器，我设法读取指针memoryDevice。在结构内部（例如，当单步执行Upload（）时，它是0x01000000。但是在执行内核的地方，memoryDevice指向0x00000400（数字是示例）。

我知道memoryDevice是一个指向设备内存的指针。有没有办法解释这种行为并解决我的问题？

Answer 1

当我运行以下程序时

#include <cstdio>
struct CudaArray
{

  int* memoryHost;
  int* memoryDevice;

  int size;

  CudaArray(int datasize) // creates array on host and allocates memory on device with cudaMalloc
  {
    size = datasize;
    memoryHost = new int[size];

    for (int i = 0; i < size; i++)
    {
      memoryHost[i] = 0;
    }

    cudaMalloc((void**)&memoryDevice, sizeof(int) * size);
}

~CudaArray() // frees memory on device and host
{
  delete[] memoryHost;
  cudaFree(memoryDevice);
}

void Upload() // upload data from host to device
{
  cudaMemcpy(memoryDevice, memoryHost, sizeof(int) * size, cudaMemcpyHostToDevice);
}
void Download() // download data from device to host
{
  cudaMemcpy(memoryHost, memoryDevice, sizeof(int) * size, cudaMemcpyDeviceToHost);
}

};

__global__ void kernel(int *ptr, int n)
{
  printf("On Device : %p %d\n", ptr, n);
}

int main(void)
{
  CudaArray cuda_ar(1000);
  printf("On Host   : %p %d\n", cuda_ar.memoryDevice, cuda_ar.size);
  kernel<<<1, 1>>>(cuda_ar.memoryDevice, cuda_ar.size);
  return 0;
}

我得到了

On Host   : 0x200400000 1000
On Device : 0x200400000 1000

您应该确保您的CUDA运行时调用如cudaMalloc，cudaMemcpy和内核启动都已成功返回。您可以在所有CUDA运行时调用之后尝试此代码以验证：

if (cudaSuccess != cudaGetLastError())
  printf( "Error!\n" );

在类中包装cuda内存处理会导致内存地址损坏

1 个答案: