如何使用GPU绘制OpenGL像素

时间:2018-08-07 00:36:14

标签: c++ opengl cuda

简介:
我正在制作一个粉末玩具,该玩具利用并行处理进行游戏物理处理,处理的是500 x 500的粉末区域。该游戏几乎对GPU上的粒子执行所有操作,但它使用CPU渲染粒子(速度大大降低)。如何在GPU而不是CPU上渲染粒子?我主要将粒子数据保留在GPU上,因为大多数操作都在GPU上进行,而cudaMemcpy相当慢,导致项目在主机内存上时无法控制地滞后。

代码:
这是我的显示功能

void display()
{
    // Measure performance
    mainloopMeasurePerformanceStart(1);

    // Clear the screen
    glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
    glMatrixMode(GL_PROJECTION);
    glLoadIdentity();

    // Copy particle data to render
    cudaMemcpy(&particles, d_particles, sizeof(particles), cudaMemcpyDeviceToHost);

    // Loop over the sand particles
    for(int i=0;i<250000;i++)
    {
        // Is the sand particle alive
        if(particles[i].alive)
        {
            // Get the position
            int pos[2];
            id_to_pos(i,pos);

            // Draw the pixel
            glColor3f(particles[i].color[0],particles[i].color[1],particles[i].color[2]);
            glBegin(GL_QUADS);
                glVertex2d((pos[0]/500.0-0.5)*2,(pos[1]/500.0-0.5)*2);
                glVertex2d((pos[0]/500.0-0.5+0.002)*2,(pos[1]/500.0-0.5)*2);
                glVertex2d((pos[0]/500.0-0.5+0.002)*2,(pos[1]/500.0-0.5+0.002)*2);
                glVertex2d((pos[0]/500.0-0.5)*2,(pos[1]/500.0-0.5+0.002)*2);
            glEnd();
        }
    }

    // Get the mouse position
    int m_posX, m_posY;
    mousePos(&m_posX, &m_posY);

    // Draw the cursor
    glColor3f(1.0f, 1.0f, 1.0f);
    for(int i=0;i<360;i++)
    {
        // Calculate the position
        double pos[2];
        pos[0] = sin(2*PI/360*i)*cursor_radius+m_posX;
        pos[1] = cos(2*PI/360*i)*cursor_radius+m_posY;

        glBegin(GL_QUADS);
            glVertex2d((pos[0]/500.0-0.5)*2,(pos[1]/500.0-0.5)*2);
            glVertex2d((pos[0]/500.0-0.5+0.002)*2,(pos[1]/500.0-0.5)*2);
            glVertex2d((pos[0]/500.0-0.5+0.002)*2,(pos[1]/500.0-0.5+0.002)*2);
            glVertex2d((pos[0]/500.0-0.5)*2,(pos[1]/500.0-0.5+0.002)*2);
        glEnd();
    }

    // Swap the front and back frame buffers
    glutSwapBuffers();

    // Measure performance
    mainloopMeasurePerformanceEnd();
}

处理砂的地方:

__global__ void do_sand(
    Sand *particles, bool *mouseStates, unsigned long seed,
    int m_pos_x, int m_pos_y, double cursor_radius
){
    // Get the overall ID
    int id = blockIdx.x*100+threadIdx.x;

    // Convert the ID to a position
    int pos[2];
    id_to_pos(id,pos);

    // Convert the mouse position to an array
    int m_pos[2];
    m_pos[0] = m_pos_x;
    m_pos[1] = m_pos_y;

    // Is the sand particle alive
    if(particles[id].alive)
    {
        // Is there sand being cleared and is this particle in range
        if(mouseStates[GLUT_RIGHT_BUTTON] && distance_between(pos, m_pos) < cursor_radius)
        {
            // Delete this particle
            particles[id].alive = false;
        }

        // Do physics
        bool done = false;
        int check;

        switch(particles[id].model)
        {
            // Powder
            case 'P':
            {
                // Is vertical movement valid
                if(pos[1]-1 >= 0 && !done)
                {
                    // Get the ID
                    check = pos_to_id(pos[0], pos[1]-1);

                    // Is this space free
                    if(!particles[check].alive)
                    {
                        // Move the particle
                        particles[check] = particles[id];
                        particles[id].alive = false;
                        done = true;
                    }
                }

                // Randomly pick the sands course
                int choice;
                if((seed * id * 5423) % 2 == 0) choice=1;
                else choice=-1;

                // Check left movement
                if(pos[0]-choice < 500 && pos[0]-choice >= 0 && pos[1]-1 >= 0 && !done)
                {
                    // Get the ID
                    check = pos_to_id(pos[0]-choice,pos[1]-1);

                    // Is this space free
                    if(
                        !particles[check].alive &&
                        !particles[pos_to_id(pos[0]-choice,pos[1])].alive &&
                        !(
                            particles[pos_to_id(pos[0]-choice*2,pos[1])].alive &&
                            particles[pos_to_id(pos[0]-choice*2,pos[1]-1)].alive
                        )
                    ){
                        // Move the particle
                        particles[check] = particles[id];
                        particles[id].alive = false;
                        done = true;
                    }
                }

                // Check right movement
                if(pos[0]+choice < 500 && pos[0]+choice >= 0 && pos[1]-1 >= 0 && !done)
                {
                    // Get the ID
                    check = pos_to_id(pos[0]+choice,pos[1]-1);

                    // Is this space free
                    if(
                        !particles[check].alive &&
                        !particles[pos_to_id(pos[0]+choice,pos[1])].alive &&
                        !(
                            particles[pos_to_id(pos[0]+choice*2,pos[1])].alive &&
                            particles[pos_to_id(pos[0]+choice*2,pos[1]-1)].alive
                        )
                    ){
                        // Move the particle
                        particles[check] = particles[id];
                        particles[id].alive = false;
                        done = true;
                    }
                }
            }

            // Fluid
            case 'F':
            {

            }
        }
    }

    // Is there sand being added and is this particle in range
    else if(mouseStates[GLUT_LEFT_BUTTON] && distance_between(pos, m_pos) < cursor_radius)
    {
        // Make this particle
        particles[id].alive = true;
        particles[id].color[0] = 0.0f;
        particles[id].color[1] = 0.0f;
        particles[id].color[2] = 0.6f;
        particles[id].model = 'P';
    }
}

2 个答案:

答案 0 :(得分:2)

自从首次发布以来,CUDA就已经支持OpenGL互操作性(也支持Direct3D)。 documented很好,如果您已经安装了CUDA示例,那么您可以研究几种竞争的sample codes

简而言之,您可以将现有的OpenGL自助对象映射到CUDA地址空间中,以便计算内核可以读写OpenGL内存,从CUDA释放内存,然后照常从CUDA修改后的缓冲区进行渲染。这样做有很多开销,但是性能可能仍然比将数据复制到主机进行渲染要好。

根据建议,您可以在this Nvidia提供的演示文稿中阅读完整的介绍。

答案 1 :(得分:0)

如果我创建一个字节数组(也可以是int或其他东西)并使用3个值(或RGBA使用4个值)上传,我想出了如何创建纹理并使用CUDA渲染它们以提高速度的方法。彼此定位以形成图像,我可以将其加载到OpenGL中。

GLubyte data[width*height*3] = {
    R, G, B,
    R, G, B,
    R, G, B
}

正如前面提到的,我本可以使用OpenGL缓冲区对象,但是图像似乎可以在屏幕上显示每个像素,并且我在网上查找有关缓冲区对象的信息时遇到了麻烦。

以下是我的显示代码段:

// Setup the pixel varibles
GLubyte *pixels = new GLubyte[sxy[0]*sxy[1]*3]();

// Get the mouse pos
int m_x, m_y;
mousePos(&m_x,&m_y);

// Render on CPU
if(cpu_only) render_pixels_cpu(
    particles,pixels,sxy,
    m_x,m_y,cursor_radius
);

else
{
    // Load the pixels on the GPU
    int N = 512;
    render_pixels<<<2048,N>>>(
        N,d_particles,d_pixels,
        d_sxy,m_x,m_y,cursor_radius
    );

    // Copy the pixel data over
    cudaMemcpy(pixels, d_pixels, sizeof(GLubyte)*sxy[0]*sxy[1]*3, cudaMemcpyDeviceToHost);
}

// Generate and bind the texture
GLuint tex;
glGenTextures(1, &tex);
glBindTexture(GL_TEXTURE_2D, tex);
glTexImage2D( GL_TEXTURE_2D, 0, GL_RGB, sxy[0], sxy[1], 0, GL_RGB, GL_UNSIGNED_BYTE, pixels );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );

// Free the pixels
delete pixels;

// Draw quads
glBegin(GL_QUADS);
    glTexCoord2d( 0.0, 0.0);    glVertex2d(-1.0,-1.0);
    glTexCoord2d( 1.0, 0.0);    glVertex2d( 1.0,-1.0);
    glTexCoord2d( 1.0, 1.0);    glVertex2d( 1.0, 1.0);
    glTexCoord2d( 0.0, 1.0);    glVertex2d(-1.0, 1.0);
glEnd();

// Unbind the texture
glBindTexture(GL_TEXTURE_2D, NULL);

// Delete the texture
glDeleteTextures(1, &tex);

CUDA代码:

__global__ void render_pixels(
    int N, Sand* particles, GLubyte* pixels, int* sxy,
    int m_x, int m_y, double m_radius
){
    // Get the overall ID
    int id = blockIdx.x*N+threadIdx.x;

    // Return if out of range
    if(i>sxy[0]*sxy[1])return;

    // Get the position
    int pos[2];
    id_to_pos(i,pos,sxy);

    // Calculate the image id
    int id = (pos[1]*sxy[0])+pos[0];

    // Convert the mouse pos to a position
    int mpos[2] = {m_x, m_y};

    // Calculate the distance
    double distance = distance_between(pos, mpos);

    // Is the position in range with the mouse
    if((int)distance==(int)m_radius&&m_x>-1&&m_y>-1)
    {
        // Create a circle here
        pixels[(id*3)+0] = (GLubyte)255;
        pixels[(id*3)+1] = (GLubyte)255;
        pixels[(id*3)+2] = (GLubyte)255;
    }

    else
    {
        // Set the colours
        pixels[(id*3)+0] = (GLubyte)(particles[i].color[0]*255);
        pixels[(id*3)+1] = (GLubyte)(particles[i].color[1]*255);
        pixels[(id*3)+2] = (GLubyte)(particles[i].color[2]*255);
    }
}