我是CUDA和C的新手,我可以在以下方面提供一些帮助: 我想将Cpu的GpuMats数组传递给CUDA内核:
这是我的内核代码:
__global__
void disparityFromDiffMapsKernel(cuda::PtrStepSzi* differenceMapsArray,
int arraySize,
cuda::PtrStepSzi disparityMap){
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
//check if thread is inside the image
if(x > differenceMapsArray[0].cols || y > differenceMapsArray[0].rows){
return;
}
//do stuff
}
这是我初始化数组并调用内核的代码:
cuda::PtrStepSzi diffMaps[diffMapsSize];
for(int i = 0; i <= offset; i++){
cuda::GpuMat diffMap(leftImageGPU.size(),CV_32SC1);
cuda::PtrStepSzi diffMapPtr = diffMap;
diffMaps[i] = diffMapPtr;
}
disparityFromDiffMapsKernel<<<numBlocks,threadsPerBlock>>>(diffMaps,diffMapsSize,disparityImageGPU); //gpu mat is initialized before
运行此代码时,出现以下opencv错误:
OpenCV(3.4.1) Error: Gpu API call (an illegal memory access was encountered)
我将非常感谢您的帮助!
答案 0 :(得分:1)
我找到了解决问题的方法,方法是通过cudaMalloc
和cudaMemcpy
将数组移动到gpu内存(感谢@sgarizvi的提示)
这是最终代码,以防有人遇到类似问题:
// reserve memory for the diffmap ptrs arrays
cuda::PtrStepSzi* cpuDiffMapPtrs;
cpuDiffMapPtrs = (cuda::PtrStepSzi*) malloc(diffMapsSize * sizeof(cuda::PtrStepSzi));
cuda::PtrStepSzi* gpuDiffMapPtrs;
cudaMalloc(&gpuDiffMapPtrs, diffMapsSize * sizeof(cuda::PtrStepSzi));
//fill cpu array with ptrs to gpu mats
for(int i = 0; i< diffMapsSize; i++){
cuda::GpuMat diffMap(leftImageGPU.size(),CV_32SC1);
//do stuff with the gpu mats
cpuDiffMapPtrs[i] = diffMap;
}
//copy cpu array to gpu
cudaMemcpy(gpuDiffMapPtrs,cpuDiffMapPtrs,diffMapsSize * sizeof(cuda::PtrStepSzi), cudaMemcpyHostToDevice);
disparityFromDiffMapsKernel<<<numBlocks,threadsPerBlock>>>(gpuDiffMapPtrs,diffMapsSize,halfKernelSize,disparityImageGPU);
// free the allocated memory
cudaFree(gpuDiffMapPtrs);
free(cpuDiffMapPtrs);