Question

我在设备中分配了一个2D数组，并希望将2D浮点数组复制到设备上。 ImgSrc是openCV中的Mat类型，我通过使用cudaMemcpy2DToArray（）将其元素复制到名为ImgSrc_f.then的2D浮点数组中。我将主机2D数组（ImgSrc_f）复制到设备2D数组Src。 2D阵列的大小为512X512。

cudaChannelFormatDesc floattex = cudaCreateChannelDesc<float>();
cudaArray *Src;
cudaMallocArray(&Src, &floattex, 512, 512);

float *ImgSrc_f[512];
for (int i=0; i<512; i++)
         ImgSrc_f[i] = (float *)malloc(512 * sizeof(float));
for(int i=0;i<512;i++)
    for(int j=0;j<512;j++)
    {
        ImgSrc_f[i][j]=ImgSrc.at<float>(i,j);
    }
//copy from host memory to device
cudaMemcpy2DToArray(Src, 0, 0,ImgSrc_f,512 * sizeof(float),512 *sizeof(float), 512,cudaMemcpyHostToDevice);

但我得到了这个例外：

Access violation reading location 0x0000000000281000

Answer 1

ImgSrc_f没有指向连续的512x512内存块。尝试更改

float *ImgSrc_f[512];
for (int i=0; i<512; i++)
         ImgSrc_f[i] = (float *)malloc(512 * sizeof(float));
for(int i=0;i<512;i++)
    for(int j=0;j<512;j++)
    {
        ImgSrc_f[i][j]=ImgSrc.at<float>(i,j);
    }

类似

float *ImgSrc_f;
ImgSrc_f = (float *)malloc(512 * 512 * sizeof(float));
for(int i=0;i<512;i++)
    for(int j=0;j<512;j++)
    {
        ImgSrc_f[i * 512 + j]=ImgSrc.at<float>(i,j);
    }

cudaMemcpy2DToArray expects指向单个连续内存块的源指针。

访问cudaMemcpy2DToArray时访问冲突读取位置

1 个答案: