如何将cudaArray(Cuda)复制到GpuMat(OpenCV)?

时间:2013-11-01 01:40:37

标签: opengl opencv cuda

我尝试了cudaMemcpy2DFromArray和cudaMemcpy2D,但它们都没有正常工作。由于工作不正常,我的意思是GpuMat确实从cudaArray复制了一些东西,但水平刻度是错误的。

代码段如下:

cudaArray *colorArr;
checkCudaErrors( cudaGraphicsMapResources( 1, &cudaResourceColor, 0 ) );
checkCudaErrors( cudaGraphicsSubResourceGetMappedArray( &colorArr, cudaResourceColor, 0, 0 ) );

cv::gpu::GpuMat gpuColorMat(Size(w,h), CV_32FC3);   

// Tried method 1: the following didn't work correctly
checkCudaErrors( cudaMemcpy2DFromArray( gpuColorMat.data, gpuColorMat.step, colorArr, 
    0, 0, gpuColorMat.cols*sizeof(float3), gpuColorMat.rows, cudaMemcpyDeviceToDevice ) );

// Tried method 2: also didn't work correctly. Same error as the first method.
float3 *cuarr; 
checkCudaErrors( cudaMalloc( (void**)&cuarr, w*h*sizeof(float3) ) );
checkCudaErrors( cudaMemcpy2DFromArray( cuarr, w*h, colorArr, 0, 0, w*sizeof(float3), h, cudaMemcpyDeviceToDevice ) );
checkCudaErrors( cudaMemcpy2D( (float*)gpuColorMat.data, gpuColorMat.step, cuarr, w*sizeof(float3), w*sizeof(float3), h, cudaMemcpyDeviceToDevice ) );

// unmap buffer objects
checkCudaErrors( cudaGraphicsUnmapResources( 1, &cudaResourceColor, 0 ) );

任何人都可以帮我吗?

2 个答案:

答案 0 :(得分:2)

我终于成功了。我在下面分享我的代码:

.cu文件:执行设备阵列复制。渲染图像绑定到纹理inTex,并将其复制到目标float3 * dst。

texture<float4, 2, cudaReadModeElementType> inTex;

__global__ void CuDeviceArrayCopyFromTexture( float3* dst, int dstStep, int width, int height )     
{
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if ( x > width || y > height ) return;

    float4 res = tex2D(inTex, x, y);
    float3* row_y = (float3*)((char*)dst + y * dstStep);
    row_y[x] = make_float3(res.x, res.y, res.z);
}

// round up n/m
inline int iDivUp(int n, int m)
{
    return (n + m - 1) / m;
}

void DeviceArrayCopyFromTexture( float3* dst, int dstStep, int width, int height ) 
{
    dim3 threads( 64, 1 );
    dim3 grid = dim3( iDivUp( width, threads.x ), height/threads.y );
    CuDeviceArrayCopyFromTexture <<< grid, threads >>> ( dst, dstStep, width, height );
}

void BindToTexture( cudaArray *cuArr )
{
     checkCudaErrors( cudaBindTextureToArray( inTex, cuArr ) );
}

.cpp文件:设置gl渲染纹理,绑定到cuda纹理并调用设备数组复制方法。

glActiveTexture(GL_TEXTURE0);
glGenTextures(1, &fboColorTex);
glBindTexture(GL_TEXTURE_2D, fboColorTex);
// I used RGB16F and RGB32F, both not working. So I changed to GL_RGBA16F and it could be mapped to cudaArray as float4 element.
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
checkCudaErrors( cudaGraphicsGLRegisterImage( &cudaResourceColor, fboColorTex, GL_TEXTURE_2D, cudaGraphicsMapFlagsReadOnly ) );

extern void BindToTexture( cudaArray *cuArr );
extern void DeviceArrayCopyFromTexture( float3* dst, int dstStep, int width, int height );

static GpuMat gpuMat( Size(w,h), CV_32FC3 );
cudaArray *cuArr;

// Copy color buffer
checkCudaErrors( cudaGraphicsMapResources( 1, &cudaResourceColor, 0 ) );
checkCudaErrors( cudaGraphicsSubResourceGetMappedArray( &cuArr, cudaResourceColor, 0, 0 ) );

BindToTexture( cuArr );
DeviceArrayCopyFromTexture( (float3*)gpuMat.data, gpuMat.step, gpuMat.cols, gpuMat.rows  );

checkCudaErrors( cudaGraphicsUnmapResources( 1, &cudaResourceColor, 0 ) );

参考文献:

  1. http://answers.opencv.org/question/12958/read-rendered-images-using-gpumat-and-cuda/

  2. CUDA Samples \ v5.5 \ 3_Imaging \ postProcessGL

答案 1 :(得分:1)

GpuMat :: step是元素数,而pitch是以字节为单位,所以尝试从

更改
cudaMemcpy2DFromArray( gpuColorMat.data, gpuColorMat.step,                  colorArr, 0, 0, gpuColorMat.cols*sizeof(float3), gpuColorMat.rows, cudaMemcpyDeviceToDevice );

cudaMemcpy2DFromArray( gpuColorMat.data, gpuColorMat.step * sizeof(float3), colorArr, 0, 0, gpuColorMat.cols*sizeof(float3), gpuColorMat.rows, cudaMemcpyDeviceToDevice );