cudaMemcpy没有复制任何数据

时间:2015-09-27 12:43:03

标签: cuda

我使用cuda-gdb检查dcost的值,发现cudaMemcpy没有复制,即使它返回cudaSuccess

这是我的代码:

#include<stdint.h>

int main(){
int64_t nvtx_scale = ((int64_t)1)<<14;

uint64_t* cost = (uint64_t*)malloc(sizeof(uint64_t)*nvtx_scale);

for(int64_t i=0; i < nvtx_scale; i++)
    cost[i] = uint64_t(-1);

uint64_t* dcost;
cudaMalloc(&dcost, nvtx_scale*sizeof(uint64_t));

cudaError_t err;

err = cudaMemcpy(dcost, cost, sizeof(uint64_t)*nvtx_scale, cudaMemcpyHostToDevice);
return 0;
}

这与我设定的事实有关吗? {/ 1}}如cuda-gdb文档中所建议的那样,允许使用相同的gpu进行调试而不停止显示管理器?

1 个答案:

答案 0 :(得分:1)

以下稍微修改过的代码版本:

#include<stdint.h>
#include<iostream>

int main(){
    int64_t nvtx_scale = ((int64_t)1)<<14;

    uint64_t* cost = (uint64_t*)malloc(sizeof(uint64_t)*nvtx_scale);

    for(int64_t i=0; i < nvtx_scale; i++)
        cost[i] = uint64_t(123456789);

    uint64_t* dcost;
    cudaMalloc(&dcost, nvtx_scale*sizeof(uint64_t));
    cudaMemcpy(dcost, cost, sizeof(uint64_t)*nvtx_scale, cudaMemcpyHostToDevice);

    memset(cost, 0, sizeof(uint64_t)*nvtx_scale);
    cudaMemcpy(cost, dcost, sizeof(uint64_t)*nvtx_scale, cudaMemcpyDeviceToHost);

    for(int i=0; i<10; i++) {
        std::cout << i << " " << cost[i] << std::endl;
    }

    return 0;
}

像这样为我编译和运行:

C:\Users\talonmies>nvcc cudacopy.cu
cudacopy.cu
   Creating library a.lib and object a.exp

C:\Users\talonmies>cuda-memcheck a.exe
========= CUDA-MEMCHECK
0 123456789
1 123456789
2 123456789
3 123456789
4 123456789
5 123456789
6 123456789
7 123456789
8 123456789
9 123456789
========= ERROR SUMMARY: 0 errors

如果您无法重现此问题,则说明您的CUDA安装存在问题。