简介:
我正在制作一个粉末玩具,该玩具利用并行处理进行游戏物理处理,处理的是500 x 500的粉末区域。该游戏几乎对GPU上的粒子执行所有操作,但它使用CPU渲染粒子(速度大大降低)。如何在GPU而不是CPU上渲染粒子?我主要将粒子数据保留在GPU上,因为大多数操作都在GPU上进行,而cudaMemcpy相当慢,导致项目在主机内存上时无法控制地滞后。
代码:
这是我的显示功能
void display()
{
// Measure performance
mainloopMeasurePerformanceStart(1);
// Clear the screen
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glMatrixMode(GL_PROJECTION);
glLoadIdentity();
// Copy particle data to render
cudaMemcpy(&particles, d_particles, sizeof(particles), cudaMemcpyDeviceToHost);
// Loop over the sand particles
for(int i=0;i<250000;i++)
{
// Is the sand particle alive
if(particles[i].alive)
{
// Get the position
int pos[2];
id_to_pos(i,pos);
// Draw the pixel
glColor3f(particles[i].color[0],particles[i].color[1],particles[i].color[2]);
glBegin(GL_QUADS);
glVertex2d((pos[0]/500.0-0.5)*2,(pos[1]/500.0-0.5)*2);
glVertex2d((pos[0]/500.0-0.5+0.002)*2,(pos[1]/500.0-0.5)*2);
glVertex2d((pos[0]/500.0-0.5+0.002)*2,(pos[1]/500.0-0.5+0.002)*2);
glVertex2d((pos[0]/500.0-0.5)*2,(pos[1]/500.0-0.5+0.002)*2);
glEnd();
}
}
// Get the mouse position
int m_posX, m_posY;
mousePos(&m_posX, &m_posY);
// Draw the cursor
glColor3f(1.0f, 1.0f, 1.0f);
for(int i=0;i<360;i++)
{
// Calculate the position
double pos[2];
pos[0] = sin(2*PI/360*i)*cursor_radius+m_posX;
pos[1] = cos(2*PI/360*i)*cursor_radius+m_posY;
glBegin(GL_QUADS);
glVertex2d((pos[0]/500.0-0.5)*2,(pos[1]/500.0-0.5)*2);
glVertex2d((pos[0]/500.0-0.5+0.002)*2,(pos[1]/500.0-0.5)*2);
glVertex2d((pos[0]/500.0-0.5+0.002)*2,(pos[1]/500.0-0.5+0.002)*2);
glVertex2d((pos[0]/500.0-0.5)*2,(pos[1]/500.0-0.5+0.002)*2);
glEnd();
}
// Swap the front and back frame buffers
glutSwapBuffers();
// Measure performance
mainloopMeasurePerformanceEnd();
}
处理砂的地方:
__global__ void do_sand(
Sand *particles, bool *mouseStates, unsigned long seed,
int m_pos_x, int m_pos_y, double cursor_radius
){
// Get the overall ID
int id = blockIdx.x*100+threadIdx.x;
// Convert the ID to a position
int pos[2];
id_to_pos(id,pos);
// Convert the mouse position to an array
int m_pos[2];
m_pos[0] = m_pos_x;
m_pos[1] = m_pos_y;
// Is the sand particle alive
if(particles[id].alive)
{
// Is there sand being cleared and is this particle in range
if(mouseStates[GLUT_RIGHT_BUTTON] && distance_between(pos, m_pos) < cursor_radius)
{
// Delete this particle
particles[id].alive = false;
}
// Do physics
bool done = false;
int check;
switch(particles[id].model)
{
// Powder
case 'P':
{
// Is vertical movement valid
if(pos[1]-1 >= 0 && !done)
{
// Get the ID
check = pos_to_id(pos[0], pos[1]-1);
// Is this space free
if(!particles[check].alive)
{
// Move the particle
particles[check] = particles[id];
particles[id].alive = false;
done = true;
}
}
// Randomly pick the sands course
int choice;
if((seed * id * 5423) % 2 == 0) choice=1;
else choice=-1;
// Check left movement
if(pos[0]-choice < 500 && pos[0]-choice >= 0 && pos[1]-1 >= 0 && !done)
{
// Get the ID
check = pos_to_id(pos[0]-choice,pos[1]-1);
// Is this space free
if(
!particles[check].alive &&
!particles[pos_to_id(pos[0]-choice,pos[1])].alive &&
!(
particles[pos_to_id(pos[0]-choice*2,pos[1])].alive &&
particles[pos_to_id(pos[0]-choice*2,pos[1]-1)].alive
)
){
// Move the particle
particles[check] = particles[id];
particles[id].alive = false;
done = true;
}
}
// Check right movement
if(pos[0]+choice < 500 && pos[0]+choice >= 0 && pos[1]-1 >= 0 && !done)
{
// Get the ID
check = pos_to_id(pos[0]+choice,pos[1]-1);
// Is this space free
if(
!particles[check].alive &&
!particles[pos_to_id(pos[0]+choice,pos[1])].alive &&
!(
particles[pos_to_id(pos[0]+choice*2,pos[1])].alive &&
particles[pos_to_id(pos[0]+choice*2,pos[1]-1)].alive
)
){
// Move the particle
particles[check] = particles[id];
particles[id].alive = false;
done = true;
}
}
}
// Fluid
case 'F':
{
}
}
}
// Is there sand being added and is this particle in range
else if(mouseStates[GLUT_LEFT_BUTTON] && distance_between(pos, m_pos) < cursor_radius)
{
// Make this particle
particles[id].alive = true;
particles[id].color[0] = 0.0f;
particles[id].color[1] = 0.0f;
particles[id].color[2] = 0.6f;
particles[id].model = 'P';
}
}
答案 0 :(得分:2)
自从首次发布以来,CUDA就已经支持OpenGL互操作性(也支持Direct3D)。 documented很好,如果您已经安装了CUDA示例,那么您可以研究几种竞争的sample codes。
简而言之,您可以将现有的OpenGL自助对象映射到CUDA地址空间中,以便计算内核可以读写OpenGL内存,从CUDA释放内存,然后照常从CUDA修改后的缓冲区进行渲染。这样做有很多开销,但是性能可能仍然比将数据复制到主机进行渲染要好。
根据建议,您可以在this Nvidia提供的演示文稿中阅读完整的介绍。
答案 1 :(得分:0)
如果我创建一个字节数组(也可以是int或其他东西)并使用3个值(或RGBA使用4个值)上传,我想出了如何创建纹理并使用CUDA渲染它们以提高速度的方法。彼此定位以形成图像,我可以将其加载到OpenGL中。
GLubyte data[width*height*3] = {
R, G, B,
R, G, B,
R, G, B
}
正如前面提到的,我本可以使用OpenGL缓冲区对象,但是图像似乎可以在屏幕上显示每个像素,并且我在网上查找有关缓冲区对象的信息时遇到了麻烦。
以下是我的显示代码段:
// Setup the pixel varibles
GLubyte *pixels = new GLubyte[sxy[0]*sxy[1]*3]();
// Get the mouse pos
int m_x, m_y;
mousePos(&m_x,&m_y);
// Render on CPU
if(cpu_only) render_pixels_cpu(
particles,pixels,sxy,
m_x,m_y,cursor_radius
);
else
{
// Load the pixels on the GPU
int N = 512;
render_pixels<<<2048,N>>>(
N,d_particles,d_pixels,
d_sxy,m_x,m_y,cursor_radius
);
// Copy the pixel data over
cudaMemcpy(pixels, d_pixels, sizeof(GLubyte)*sxy[0]*sxy[1]*3, cudaMemcpyDeviceToHost);
}
// Generate and bind the texture
GLuint tex;
glGenTextures(1, &tex);
glBindTexture(GL_TEXTURE_2D, tex);
glTexImage2D( GL_TEXTURE_2D, 0, GL_RGB, sxy[0], sxy[1], 0, GL_RGB, GL_UNSIGNED_BYTE, pixels );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
// Free the pixels
delete pixels;
// Draw quads
glBegin(GL_QUADS);
glTexCoord2d( 0.0, 0.0); glVertex2d(-1.0,-1.0);
glTexCoord2d( 1.0, 0.0); glVertex2d( 1.0,-1.0);
glTexCoord2d( 1.0, 1.0); glVertex2d( 1.0, 1.0);
glTexCoord2d( 0.0, 1.0); glVertex2d(-1.0, 1.0);
glEnd();
// Unbind the texture
glBindTexture(GL_TEXTURE_2D, NULL);
// Delete the texture
glDeleteTextures(1, &tex);
CUDA代码:
__global__ void render_pixels(
int N, Sand* particles, GLubyte* pixels, int* sxy,
int m_x, int m_y, double m_radius
){
// Get the overall ID
int id = blockIdx.x*N+threadIdx.x;
// Return if out of range
if(i>sxy[0]*sxy[1])return;
// Get the position
int pos[2];
id_to_pos(i,pos,sxy);
// Calculate the image id
int id = (pos[1]*sxy[0])+pos[0];
// Convert the mouse pos to a position
int mpos[2] = {m_x, m_y};
// Calculate the distance
double distance = distance_between(pos, mpos);
// Is the position in range with the mouse
if((int)distance==(int)m_radius&&m_x>-1&&m_y>-1)
{
// Create a circle here
pixels[(id*3)+0] = (GLubyte)255;
pixels[(id*3)+1] = (GLubyte)255;
pixels[(id*3)+2] = (GLubyte)255;
}
else
{
// Set the colours
pixels[(id*3)+0] = (GLubyte)(particles[i].color[0]*255);
pixels[(id*3)+1] = (GLubyte)(particles[i].color[1]*255);
pixels[(id*3)+2] = (GLubyte)(particles[i].color[2]*255);
}
}