我尝试了cudaMemcpy2DFromArray和cudaMemcpy2D,但它们都没有正常工作。由于工作不正常,我的意思是GpuMat确实从cudaArray复制了一些东西,但水平刻度是错误的。
代码段如下:
cudaArray *colorArr;
checkCudaErrors( cudaGraphicsMapResources( 1, &cudaResourceColor, 0 ) );
checkCudaErrors( cudaGraphicsSubResourceGetMappedArray( &colorArr, cudaResourceColor, 0, 0 ) );
cv::gpu::GpuMat gpuColorMat(Size(w,h), CV_32FC3);
// Tried method 1: the following didn't work correctly
checkCudaErrors( cudaMemcpy2DFromArray( gpuColorMat.data, gpuColorMat.step, colorArr,
0, 0, gpuColorMat.cols*sizeof(float3), gpuColorMat.rows, cudaMemcpyDeviceToDevice ) );
// Tried method 2: also didn't work correctly. Same error as the first method.
float3 *cuarr;
checkCudaErrors( cudaMalloc( (void**)&cuarr, w*h*sizeof(float3) ) );
checkCudaErrors( cudaMemcpy2DFromArray( cuarr, w*h, colorArr, 0, 0, w*sizeof(float3), h, cudaMemcpyDeviceToDevice ) );
checkCudaErrors( cudaMemcpy2D( (float*)gpuColorMat.data, gpuColorMat.step, cuarr, w*sizeof(float3), w*sizeof(float3), h, cudaMemcpyDeviceToDevice ) );
// unmap buffer objects
checkCudaErrors( cudaGraphicsUnmapResources( 1, &cudaResourceColor, 0 ) );
任何人都可以帮我吗?
答案 0 :(得分:2)
我终于成功了。我在下面分享我的代码:
.cu文件:执行设备阵列复制。渲染图像绑定到纹理inTex,并将其复制到目标float3 * dst。
texture<float4, 2, cudaReadModeElementType> inTex;
__global__ void CuDeviceArrayCopyFromTexture( float3* dst, int dstStep, int width, int height )
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if ( x > width || y > height ) return;
float4 res = tex2D(inTex, x, y);
float3* row_y = (float3*)((char*)dst + y * dstStep);
row_y[x] = make_float3(res.x, res.y, res.z);
}
// round up n/m
inline int iDivUp(int n, int m)
{
return (n + m - 1) / m;
}
void DeviceArrayCopyFromTexture( float3* dst, int dstStep, int width, int height )
{
dim3 threads( 64, 1 );
dim3 grid = dim3( iDivUp( width, threads.x ), height/threads.y );
CuDeviceArrayCopyFromTexture <<< grid, threads >>> ( dst, dstStep, width, height );
}
void BindToTexture( cudaArray *cuArr )
{
checkCudaErrors( cudaBindTextureToArray( inTex, cuArr ) );
}
.cpp文件:设置gl渲染纹理,绑定到cuda纹理并调用设备数组复制方法。
glActiveTexture(GL_TEXTURE0);
glGenTextures(1, &fboColorTex);
glBindTexture(GL_TEXTURE_2D, fboColorTex);
// I used RGB16F and RGB32F, both not working. So I changed to GL_RGBA16F and it could be mapped to cudaArray as float4 element.
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
checkCudaErrors( cudaGraphicsGLRegisterImage( &cudaResourceColor, fboColorTex, GL_TEXTURE_2D, cudaGraphicsMapFlagsReadOnly ) );
extern void BindToTexture( cudaArray *cuArr );
extern void DeviceArrayCopyFromTexture( float3* dst, int dstStep, int width, int height );
static GpuMat gpuMat( Size(w,h), CV_32FC3 );
cudaArray *cuArr;
// Copy color buffer
checkCudaErrors( cudaGraphicsMapResources( 1, &cudaResourceColor, 0 ) );
checkCudaErrors( cudaGraphicsSubResourceGetMappedArray( &cuArr, cudaResourceColor, 0, 0 ) );
BindToTexture( cuArr );
DeviceArrayCopyFromTexture( (float3*)gpuMat.data, gpuMat.step, gpuMat.cols, gpuMat.rows );
checkCudaErrors( cudaGraphicsUnmapResources( 1, &cudaResourceColor, 0 ) );
参考文献:
http://answers.opencv.org/question/12958/read-rendered-images-using-gpumat-and-cuda/
CUDA Samples \ v5.5 \ 3_Imaging \ postProcessGL
答案 1 :(得分:1)
GpuMat :: step是元素数,而pitch
是以字节为单位,所以尝试从
cudaMemcpy2DFromArray( gpuColorMat.data, gpuColorMat.step, colorArr, 0, 0, gpuColorMat.cols*sizeof(float3), gpuColorMat.rows, cudaMemcpyDeviceToDevice );
到
cudaMemcpy2DFromArray( gpuColorMat.data, gpuColorMat.step * sizeof(float3), colorArr, 0, 0, gpuColorMat.cols*sizeof(float3), gpuColorMat.rows, cudaMemcpyDeviceToDevice );