OpenGL + CUDA互操作 - 图像不在窗口中显示

时间:2014-08-26 18:05:01

标签: opencv opengl cuda interop

背景:我使用OpenCV从磁盘读取图像,使用CUDA将其传递给GPU,现在,我正在尝试让OpenGL渲染图像。

我这里没有使用GLUT,因为我编译了我的代码并获得了32位Windows来创建一个新窗口,我将在其中渲染图像。现在,当我简单地将flipped.data传递给glTexImage2D()函数时,我翻转了OpenCV图像并获得了OpenGL来很好地渲染图像。但是,当我使用CUDA + OpenGL时,不会渲染相同的图像。

我的实际图像比当前图像大。我正在使用OpenGL像素缓冲对象和OpenGL纹理来渲染图像。利用纹理允许我指定要显示的图像部分。我的灰度图像的尺寸为w1024 x h256,其深度为8位(unsigned char / GL_UNSIGNED_BYTE)。

问题:我无法弄清楚代码中出了什么问题。我尝试仔细遵循CUDA C编程指南,并使用PBO和纹理以及实际输入数据注册/映射CUDA资源。由于我的输入图像数据来自OpenCV,我只是将flipped的数据复制到设备指针dev_inp中。我(正确?)也使用dev_inpcudaGraphicsResourceGetMappedPointer()映射到CUDA资源。然而,窗口没有任何显示,并保持黑色。没有视口更改,我在glBegin().. glEnd()指定的坐标是正确的,因为它们正确地将flipped的数据映射到纹理。

我在这里错过了其他什么吗?我是否错误地将CUDA资源映射到PBO或设备指针?

OpenGL + CUDA互操作部分:这部分只是我代码中的CUDA + OpenGL互操作。函数DrawOpenGLScene()是从WindProc()方法调用的。

void DrawOpenGLScene()
{

    initCUDADevice(); 

    Mat image, flipped;
    image = imread("K:/Ultrasound experiment images/PA_160.png", CV_LOAD_IMAGE_GRAYSCALE);   // Read the file from disk

    if(!image.data)                              // Check for invalid input
    {
        cout <<  "Could not open or find the image" << std::endl ;


    }

    cv::flip(image, flipped, 0);

    imshow("flip", image);  // displays output

    //cout << "depth: " << flipped.depth() << endl;

    // ===================================================================================
    // opengl setup

    // first, the context was created 
    // now, clear the window with the rendering context
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
    glLoadIdentity();

    // ====================================================================================
    // generate the pixel buffer object (PBO)

    // Generate a buffer ID called a PBO (Pixel Buffer Object)
    glGenBuffers(1, &pbo);
    // Make this the current UNPACK buffer (OpenGL is state-based)
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo);
    // Allocate data for the buffer. 4-channel 8-bit image
    glBufferData(GL_PIXEL_UNPACK_BUFFER, sizeof(unsigned char) * flipped.rows * flipped.cols, NULL, GL_DYNAMIC_COPY);
    //gpuErrchk(cudaGLRegisterBufferObject( pbo ));
    gpuErrchk(cudaGraphicsGLRegisterBuffer(&cuda_resource, pbo, cudaGraphicsMapFlagsNone)); 

    // ====================================================================================
    // create the texture object 

    // enable 2D texturing
    glEnable(GL_TEXTURE_2D);

    // bind the texture     
    glGenTextures(1, &tex);
    glBindTexture(GL_TEXTURE_2D, tex);

    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);

    //glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
    // put flipped.data at the end, and it'll work for normal texturing
    glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE,  image.cols, image.rows,  0, GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL);

    // put tex at the end, and it'll work for normal texturing
    glBindTexture(GL_TEXTURE_2D, 0);

    // ====================================================================================
    // copy data from openCV 

    unsigned char *dev_inp; 

    gpuErrchk( cudaMalloc((void**)&dev_inp, sizeof(unsigned char)*flipped.rows*flipped.cols) );
    //cudaGLMapBufferObject((void**)dev_inp, pbo);

    gpuErrchk( cudaGraphicsMapResources(1, &cuda_resource, 0) );

    size_t size; // = sizeof(unsigned char)*flipped.rows*flipped.cols; 
    gpuErrchk( cudaGraphicsResourceGetMappedPointer((void **)&dev_inp, &size, cuda_resource) );

    gpuErrchk( cudaMemcpy(dev_inp, flipped.data, sizeof(unsigned char)*flipped.rows*flipped.cols, cudaMemcpyHostToDevice) );

    //cudaGLUnmapBufferObject(pbo);
    gpuErrchk( cudaGraphicsUnmapResources(1, &cuda_resource, 0) ); 

    // ====================================================================================
    // bind pbo and texture to render data now 

    glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo);

    glBindTexture(GL_TEXTURE_2D, tex);

    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, flipped.cols, flipped.rows, GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL);

    glBegin(GL_QUADS);
        glTexCoord2f(0.0f, 0.0f); glVertex3f(-1.0f, -1.0f,  1.0f);  // Bottom Left Of The Texture and Quad
        glTexCoord2f(1.0f, 0.0f); glVertex3f( 1.0f, -1.0f,  1.0f);  // Bottom Right Of The Texture and Quad
        glTexCoord2f(1.0f, 1.0f); glVertex3f( 1.0f,  1.0f,  1.0f);  // Top Right Of The Texture and Quad
        glTexCoord2f(0.0f, 1.0f); glVertex3f(-1.0f,  1.0f,  1.0f);  // Top Left Of The Texture and Quad
    glEnd();


    glFlush();  // force rendering to happen 

    //glBindTexture(GL_TEXTURE_2D, 0);

}

整个代码

LRESULT CALLBACK WndProc(HWND, UINT, WPARAM, LPARAM);
void DrawOpenGLScene(void);
HGLRC SetUpOpenGLContext(HWND hWnd);

GLuint tex; 
GLuint pbo;
struct cudaGraphicsResource *cuda_resource;


int WINAPI WinMain (HINSTANCE hInstance, HINSTANCE hPrevInstance,
                    LPSTR lpszCmdLine, int nCmdShow)

{    

    static char szClassName[] = "Myclass";
    static char szTitle[]="A Simple Win32 API OpenGL Program";
    WNDCLASS wc; 
    MSG      msg;  
    HWND     hWnd;

    wc.style = CS_HREDRAW | CS_VREDRAW;
    wc.lpfnWndProc = (WNDPROC)WndProc;
    wc.cbClsExtra = 0;
    wc.cbWndExtra = 0;
    wc.hInstance = hInstance;
    wc.hIcon = NULL; 
    wc.hCursor = LoadCursor(NULL, IDC_ARROW);
    wc.hbrBackground = (HBRUSH)GetStockObject (BLACK_BRUSH);
    wc.lpszMenuName = NULL;
    wc.lpszClassName = szClassName;
    if (!RegisterClass (&wc))
        return 0;

    hWnd = CreateWindow(szClassName, szTitle, 
                    WS_OVERLAPPEDWINDOW |
                        // NEED THESE for OpenGL calls to work!
            WS_CLIPCHILDREN | WS_CLIPSIBLINGS,
                            0, 0, 1024, 256,
            NULL, NULL, hInstance, NULL);

    ShowWindow(hWnd, nCmdShow);
    UpdateWindow( hWnd );
    while (GetMessage(&msg, NULL, 0, 0)) 
    {
        TranslateMessage( &msg );
        DispatchMessage( &msg );
    }

    return(msg.wParam); 

}



LRESULT CALLBACK WndProc( HWND hWnd, UINT msg,
                     WPARAM wParam, LPARAM lParam )
{
    HDC hDC;
    static HGLRC hRC; // Note this is STATIC!
    PAINTSTRUCT ps;

    switch (msg)
    {
    case WM_CREATE:
        // Select a pixel format and create a rendering context
        hRC = SetUpOpenGLContext(hWnd);
        break;

    case WM_PAINT:
        // Draw the scene
        // Get a DC, make RC current & associate it with this DC
        hDC = BeginPaint(hWnd, &ps);
        wglMakeCurrent(hDC, hRC);
        DrawOpenGLScene();  // Draw 
        // We're done with the RC, so deselect it
        wglMakeCurrent(NULL, NULL);
        EndPaint(hWnd, &ps);
        break;       

    case WM_DESTROY:
        //cudaGLUnregisterBufferObject(pbo);     
        cudaGraphicsUnregisterResource(cuda_resource); 

        // Clean up and terminate
        wglDeleteContext(hRC);

        PostQuitMessage(0);
        break;

            default:
                    return DefWindowProc(hWnd, msg, wParam, lParam);
    }

    return (0);
}

//*******************************************************
//  SetUpOpenGL sets the pixel format and a rendering
//  context then returns the RC
//*******************************************************

HGLRC SetUpOpenGLContext(HWND hWnd)
{
    static PIXELFORMATDESCRIPTOR pfd = {
        sizeof (PIXELFORMATDESCRIPTOR), // strcut size 
        1,                              // Version number
        PFD_DRAW_TO_WINDOW |    // Flags, draw to a window,
            PFD_SUPPORT_OPENGL, // use OpenGL
        PFD_TYPE_RGBA,          // RGBA pixel values
        24,                     // 24-bit color
        0, 0, 0,                // RGB bits & shift sizes.
        0, 0, 0,                // Don't care about them
        0, 0,                   // No alpha buffer info
        0, 0, 0, 0, 0,          // No accumulation buffer
        32,                     // 32-bit depth buffer
        0,                      // No stencil buffer
        0,                      // No auxiliary buffers
        PFD_MAIN_PLANE,         // Layer type
        0,                      // Reserved (must be 0)
        0,                      // No layer mask
        0,                      // No visible mask
        0                       // No damage mask
    };

    int nMyPixelFormatID;
    HDC hDC;
    HGLRC hRC;

    hDC = GetDC(hWnd);
    nMyPixelFormatID = ChoosePixelFormat(hDC, &pfd);
    SetPixelFormat(hDC, nMyPixelFormatID, &pfd);
    hRC = wglCreateContext(hDC);
    ReleaseDC(hWnd, hDC);
    return hRC;
}

//***********************************************************
//  initCUDADevice uses CUDA commands to initiate the CUDA
//  enabled graphics card. This is prior to resource mapping,
//  and rendering.
//***********************************************************

void initCUDADevice() { 

    gpuErrchk(cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ));    

}

//******************************************************** 
//  DrawOpenGLScene uses OpenGL commands to draw the scene
//  This is where we put the OpenGL drawing commands
//********************************************************

void DrawOpenGLScene()
{

    initCUDADevice(); 

    Mat image, flipped;
    image = imread("K:/Ultrasound experiment images/PA_160.png", CV_LOAD_IMAGE_GRAYSCALE);   // Read the file from disk

    if(!image.data)                              // Check for invalid input
    {
        cout <<  "Could not open or find the image" << std::endl ;


    }

    cv::flip(image, flipped, 0);

    imshow("flip", image);  // displays output

    //cout << "depth: " << flipped.depth() << endl;

    // ===================================================================================
    // opengl setup

    // first, the context was created 
    // now, clear the window with the rendering context
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
    glLoadIdentity();

    // ====================================================================================
    // generate the pixel buffer object (PBO)

    // Generate a buffer ID called a PBO (Pixel Buffer Object)
    glGenBuffers(1, &pbo);
    // Make this the current UNPACK buffer (OpenGL is state-based)
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo);
    // Allocate data for the buffer. 4-channel 8-bit image
    glBufferData(GL_PIXEL_UNPACK_BUFFER, sizeof(unsigned char) * flipped.rows * flipped.cols, NULL, GL_DYNAMIC_COPY);
    //gpuErrchk(cudaGLRegisterBufferObject( pbo ));
    gpuErrchk(cudaGraphicsGLRegisterBuffer(&cuda_resource, pbo, cudaGraphicsMapFlagsNone)); 

    // ====================================================================================
    // create the texture object 

    // enable 2D texturing
    glEnable(GL_TEXTURE_2D);

    // bind the texture     
    glGenTextures(1, &tex);
    glBindTexture(GL_TEXTURE_2D, tex);

    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);

    //glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
    // put flipped.data at the end, and it'll work for normal texturing
    glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE,  image.cols, image.rows,  0, GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL);

    // put tex at the end, and it'll work for normal texturing
    glBindTexture(GL_TEXTURE_2D, 0);

    // ====================================================================================
    // copy data from openCV 

    unsigned char *dev_inp; 

    gpuErrchk( cudaMalloc((void**)&dev_inp, sizeof(unsigned char)*flipped.rows*flipped.cols) );
    //cudaGLMapBufferObject((void**)dev_inp, pbo);

    gpuErrchk( cudaGraphicsMapResources(1, &cuda_resource, 0) );

    size_t size; // = sizeof(unsigned char)*flipped.rows*flipped.cols; 
    gpuErrchk( cudaGraphicsResourceGetMappedPointer((void **)&dev_inp, &size, cuda_resource) );

    gpuErrchk( cudaMemcpy(dev_inp, flipped.data, sizeof(unsigned char)*flipped.rows*flipped.cols, cudaMemcpyHostToDevice) );

    //cudaGLUnmapBufferObject(pbo);
    gpuErrchk( cudaGraphicsUnmapResources(1, &cuda_resource, 0) ); 

    // ====================================================================================
    // bind pbo and texture to render data now 

    glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo);

    glBindTexture(GL_TEXTURE_2D, tex);

    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, flipped.cols, flipped.rows, GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL);

    glBegin(GL_QUADS);
        glTexCoord2f(0.0f, 0.0f); glVertex3f(-1.0f, -1.0f,  1.0f);  // Bottom Left Of The Texture and Quad
        glTexCoord2f(1.0f, 0.0f); glVertex3f( 1.0f, -1.0f,  1.0f);  // Bottom Right Of The Texture and Quad
        glTexCoord2f(1.0f, 1.0f); glVertex3f( 1.0f,  1.0f,  1.0f);  // Top Right Of The Texture and Quad
        glTexCoord2f(0.0f, 1.0f); glVertex3f(-1.0f,  1.0f,  1.0f);  // Top Left Of The Texture and Quad
    glEnd();


    glFlush();  // force rendering to happen 

    //glBindTexture(GL_TEXTURE_2D, 0);

}

1 个答案:

答案 0 :(得分:0)

如果其他人遇到同样的问题,这个线程可以帮助他们。 我通过更改DrawOpenGLScene()中的几个电话解决了我的问题。

事实证明cudaGraphicsResourceGetMappedPointer()返回指向OpenGL PBO并从OpenGL PBO派生的指针,并将该指针放在dev_inp中。它会根据之前建立的size = sizeof(unsigned char) * flipped.rows * flipped.colsdev_inp调用为glBufferData()内部分配cudaGraphicsGLRegisterBuffer()个内存。

完成此操作后,我之前使用cudaMalloc()分配的内存现在不再存在,因为调用cudaGraphicsResourceGetMappedPointer()会将指针放在dev_inp中。删除cudaMalloc()cudaFree()允许程序按原计划运行。

为了释放内存,应该解除分配PBO,因为OpenGL是内存的“所有者”,而CUDA只是共享对OpenGL拥有的内存的访问权限。

修改后的DrawOpenGLScene()例程粘贴在下面:

#define GET_PROC_ADDRESS( str ) wglGetProcAddress( str )

PFNGLBINDBUFFERARBPROC    glBindBuffer     = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers  = NULL;
PFNGLGENBUFFERSARBPROC    glGenBuffers     = NULL;
PFNGLBUFFERDATAARBPROC    glBufferData     = NULL;

void initCUDADevice() { 

    gpuErrchk(cudaGLSetGLDevice( cutGetMaxGflopsDeviceId() ));    

}    

//******************************************************** 
//  DrawOpenGLScene uses OpenGL commands to draw the scene
//  This is where we put the OpenGL drawing commands
//********************************************************

void DrawOpenGLScene()
{

    // Clear Color and Depth Buffers
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
    // Reset transformations
    glLoadIdentity();

    // ====================================================================================
    // initiate GPU by setting it correctly 
    initCUDADevice(); 

    // ====================================================================================
    // read the image that needs to be textured 

    Mat image, flipped;
    image = imread("K:/OCT experiment images/PA_175.png", CV_LOAD_IMAGE_GRAYSCALE);   // Read the file from disk

    if(!image.data)                              // Check for invalid input
    {
        cout <<  "Could not open or find the image" << std::endl ;


    }

    cv::flip(image, flipped, 0);

    imshow("OpenCV - image", image);    // displays output

    // ====================================================================================
    // allocate the PBO, texture, and CUDA resource

    glBindBuffer    = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
    glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
    glGenBuffers    = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
    glBufferData    = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");

    // ====================================================================================
    // generate the pixel buffer object (PBO)

    // Generate a buffer ID called a PBO (Pixel Buffer Object)
    glGenBuffers(1, &pbo);

    // Make this the current UNPACK buffer (OpenGL is state-based)
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo);

    // Allocate data for the buffer. 4-channel 8-bit image
    glBufferData(GL_PIXEL_UNPACK_BUFFER, sizeof(unsigned char) * flipped.rows * flipped.cols, NULL, GL_STREAM_DRAW);
    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);

    gpuErrchk(cudaGraphicsGLRegisterBuffer(&cuda_resource, pbo, cudaGraphicsMapFlagsNone)); 

    // ====================================================================================
    // create the texture object 

    // enable 2D texturing
    glEnable(GL_TEXTURE_2D);

    // generate and bind the texture    
    glGenTextures(1, &tex);
    glBindTexture(GL_TEXTURE_2D, tex);

    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);

    // put flipped.data at the end for cpu rendering 
    glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE,  image.cols, image.rows,  0, GL_LUMINANCE, GL_UNSIGNED_BYTE, 0 );

    // put tex at the end for cpu rendering 
    glBindTexture(GL_TEXTURE_2D, 0);

    // ====================================================================================
    // copy OpenCV flipped image data into the device pointer

    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

    unsigned char *dev_inp; 

    //gpuErrchk( cudaMalloc((void**)&dev_inp, sizeof(unsigned char)*flipped.rows*flipped.cols) );

    gpuErrchk( cudaGraphicsMapResources(1, &cuda_resource, 0) );

    size_t size; 
    gpuErrchk( cudaGraphicsResourceGetMappedPointer((void **)&dev_inp, &size, cuda_resource) );

    gpuErrchk( cudaMemcpy(dev_inp, flipped.data, sizeof(unsigned char)*flipped.rows*flipped.cols, cudaMemcpyHostToDevice) );

    gpuErrchk( cudaGraphicsUnmapResources(1, &cuda_resource, 0) ); 

    // ====================================================================================
    // bind pbo and texture to render data now 

    glBindBuffer( GL_PIXEL_UNPACK_BUFFER, pbo);
    //
    glBindTexture(GL_TEXTURE_2D, tex);

    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, flipped.cols, flipped.rows, GL_LUMINANCE, GL_UNSIGNED_BYTE, NULL);

    gpuErrchk( cudaGraphicsUnregisterResource(cuda_resource));
    gpuErrchk( cudaThreadSynchronize());

    //gpuErrchk(cudaFree(dev_inp));

    // ====================================================================================
    // map the texture coords to the vertex coords 

    glBegin(GL_QUADS);
    // Front Face
    glTexCoord2f(0.0f, 0.0f); glVertex3f(-1.0f, -1.0f,  1.0f);  // Bottom Left Of The Texture and Quad
    glTexCoord2f(1.0f, 0.0f); glVertex3f( 1.0f, -1.0f,  1.0f);  // Bottom Right Of The Texture and Quad
    glTexCoord2f(1.0f, 1.0f); glVertex3f( 1.0f,  1.0f,  1.0f);  // Top Right Of The Texture and Quad
    glTexCoord2f(0.0f, 1.0f); glVertex3f(-1.0f,  1.0f,  1.0f);  // Top Left Of The Texture and Quad

    glEnd();

    glFlush();  // force rendering

    glDisable(GL_TEXTURE_2D);

    glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
    glDeleteBuffers(1, &pbo);