我想用一个点网格填充屏幕。我想要的性能与将许多像素绘制为连续四边形(或用glViewport剪切的等效三角形)的速度大致相同。使用GL_POINT
原语(通过gl_VertexID定位,而不是attribs)或glPolygonStipple
是可能的,但仍然有点慢。这是我想要的一个例子(虽然绘制的黑点可能更稀疏):
有没有其他方法来绘制这个网格?
(与相同像素数的较小四边形相似的时间)
如果光栅化器是可编程的,那不是很好吗?
这一点的主要目的是能够从片段着色器中写入此网格图案中的模板和颜色缓冲区。
修改的
一些渲染时间:
全屏我是1680x1050,GTX670。计算时间为每帧10,000次,无深度测试。我使用glViewport绘制了一个带有大三角形和剪辑的四边形。
coord%4>0
调用丢弃:0.112ms glPolygonStipple
创建%4
模式:0.009ms 对于更稀疏的网格,差异越大,例如%16
。
修改的
好的,我把一个小例子拼凑在了一起。需要glut
和glew
个库:
#include <GL/glew.h>
#include <GL/gl.h>
#include <GL/glut.h>
#include <memory.h>
#include <assert.h>
#include <stdio.h>
#define RESOLUTION_X 1680
#define RESOLUTION_Y 1050
#define USE_32_BIT 0
#define TEST_LOOP 1000 //number of quads to draw per frame
#define WARMUP_MS 1000 //time between switching methods
#define TEST_MS 4000 //time to benchmark for
#define TESTS 6
#define DRAW_GRAPH 1
#define SCALE_MS 0.2f //for drawing the graph
GLuint fbo, colourTex, vbo, shader, shaderPoints, shaderDiscard;
int viewport[2];
int test = 0;
int results_time[TESTS];
int results_frames[TESTS];
float colours[TESTS][3] = {
{1,0,0},
{1,1,0},
{1,0,1},
{0,1,0},
{0,1,1},
{0,0,1},
};
const char* names[TESTS] = {
"full",
"full discard",
"full stipple",
"draw points",
"quarter",
"one"
};
float triangleVerts[9] = {-1,-1,0,-1,4,0,4,-1,0};
const char* vertexShaderSrc = "#version 150\nin vec4 v;\nvoid main() {gl_Position = v;}\n";
const char* vertexShaderPointsSrc = "#version 150\nuniform ivec2 s;\nvoid main() {ivec2 p = ivec2(gl_VertexID%(s.x/4),gl_VertexID/(s.x/4)); gl_Position = vec4(2.0*(p*4+0.5)/s-1.0, 0, 1);}\n";
const char* fragmentShaderSrc = "#version 150\nout vec4 c;\nvoid main() {c = vec4(1,0,0,1);}\n";
const char* fragmentShaderDiscardSrc = "#version 150\nout vec4 c;\nvoid main() {if (int(gl_FragCoord.x)%4>0||int(gl_FragCoord.y)%4>0) discard; c = vec4(1,0,0,1);}\n";
void setupDraw(GLuint program, int x, int y)
{
glUseProgram(program);
glViewport(0, 0, x, y);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
GLuint loc = glGetAttribLocation(program, "v");
glEnableVertexAttribArray(loc);
glVertexAttribPointer(loc, 3, GL_FLOAT, GL_FALSE, 0, 0);
}
void polygonStippleGrid(int x, int y)
{
unsigned char tilePattern[32*32];
memset(tilePattern, 0, sizeof(tilePattern));
for (int j = 0; j < 32; j += y)
{
for (int i = 0; i < 32; i += x)
{
int index = (j * 32 + i);
tilePattern[index / 8] |= 1 << (index % 8);
}
}
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
glPolygonStipple(tilePattern);
}
void display()
{
static int lastTime = -1;
int elapsed = glutGet(GLUT_ELAPSED_TIME);
if (lastTime == -1) lastTime = elapsed;
int dt = elapsed - lastTime;
lastTime = elapsed;
static int warmup = WARMUP_MS + 2000;
static int running = TEST_MS;
warmup -= dt;
if (warmup <= 0 && test < TESTS)
{
running -= dt;
results_time[test] += dt;
results_frames[test] += 1;
if (running <= 0)
{
printf("%s %s %.6fms\n", names[test], USE_32_BIT?"rgba32":"rgba8", results_time[test]/(float)(results_frames[test] * TEST_LOOP));
test += 1;
warmup = WARMUP_MS;
running = TEST_MS;
}
}
#if DRAW_GRAPH
glBindFramebuffer(GL_FRAMEBUFFER, 0);
glViewport(0, 0, viewport[0], viewport[1]);
glClear(GL_COLOR_BUFFER_BIT);
float s = 2.0f / TESTS;
glBegin(GL_QUADS);
for (int i = 0; i < TESTS; ++i)
{
if (!results_frames[i]) continue;
glColor3fv(colours[i]);
float x = -1.0f + 2.0f * i / (float)TESTS;
float y = -1.0f + 2.0f * (results_time[i]/(float)(results_frames[i] * TEST_LOOP)) / SCALE_MS;
glVertex2f(x, -1.0f); glVertex2f(x, y); glVertex2f(x + s, y); glVertex2f(x + s, -1.0f);
}
glEnd();
#endif
glBindFramebuffer(GL_FRAMEBUFFER, fbo);
switch (test)
{
case 0: //straight full screen quad
setupDraw(shader, RESOLUTION_X, RESOLUTION_Y);
for (int i = 0; i < TEST_LOOP; ++i)
glDrawArrays(GL_TRIANGLES, 0, 3);
break;
case 1: //full screen quad, discarding pixels in the frag shader
setupDraw(shaderDiscard, RESOLUTION_X, RESOLUTION_Y);
for (int i = 0; i < TEST_LOOP; ++i)
glDrawArrays(GL_TRIANGLES, 0, 3);
break;
case 2: //using polygon stipple to mask out fragments
polygonStippleGrid(4, 4);
glEnable(GL_POLYGON_STIPPLE);
setupDraw(shader, RESOLUTION_X, RESOLUTION_Y);
for (int i = 0; i < TEST_LOOP; ++i)
glDrawArrays(GL_TRIANGLES, 0, 3);
glDisable(GL_POLYGON_STIPPLE);
break;
case 3: //drawing points, but computing the position in the vertex shader
glUseProgram(shaderPoints);
glUniform2i(glGetUniformLocation(shaderPoints, "s"), RESOLUTION_X, RESOLUTION_Y);
for (int i = 0; i < TEST_LOOP; ++i)
glDrawArrays(GL_POINTS, 0, (RESOLUTION_X/4)*(RESOLUTION_Y/4));
break;
case 4: //a quad one quarter of the screen (as a speed comparison)
setupDraw(shader, RESOLUTION_X / 4, RESOLUTION_Y / 4);
for (int i = 0; i < TEST_LOOP; ++i)
glDrawArrays(GL_TRIANGLES, 0, 3);
break;
case 5: //a 1x1 quad (as a speed comparison)
setupDraw(shader,1, 1);
for (int i = 0; i < TEST_LOOP; ++i)
glDrawArrays(GL_TRIANGLES, 0, 3);
break;
default: break;
}
glUseProgram(0);
glDisableVertexAttribArray(0); //HACK: assumes location is always zero
//printf("%i %i %i\n", test, warmup, running);
glFinish();
glutSwapBuffers();
glutPostRedisplay();
assert(glGetError() == GL_NO_ERROR);
}
void reshape(int x, int y)
{
viewport[0] = x;
viewport[1] = y;
}
int main(int argc, char **argv)
{
memset(results_time, 0, sizeof(results_time));
memset(results_frames, 0, sizeof(results_frames));
//init glut
glutInit(&argc, argv);
glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA);
glutCreateWindow("quadtest");
glutReshapeFunc(reshape);
glutDisplayFunc(display);
glewInit();
//init gl stuff
glGenTextures(1, &colourTex);
glBindTexture(GL_TEXTURE_2D, colourTex);
#if USE_32_BIT
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, RESOLUTION_X, RESOLUTION_Y, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
#else
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, RESOLUTION_X, RESOLUTION_Y, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
#endif
/*
GLuint stencilRB;
glGenRenderbuffers(1, &stencilRB);
glBindRenderbuffer(GL_RENDERBUFFER, stencilRB);
glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_STENCIL, RESOLUTION_X, RESOLUTION_Y);
*/
glGenFramebuffers(1, &fbo);
glBindFramebuffer(GL_FRAMEBUFFER, fbo);
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colourTex, 0);
//glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_RENDERBUFFER, stencilRB);
assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
glGenBuffers(1, &vbo);
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, sizeof(triangleVerts), triangleVerts, GL_STATIC_DRAW);
GLuint v = glCreateShader(GL_VERTEX_SHADER);
GLuint vp = glCreateShader(GL_VERTEX_SHADER);
GLuint f = glCreateShader(GL_FRAGMENT_SHADER);
GLuint fd = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(v, 1, &vertexShaderSrc, NULL);
glShaderSource(vp, 1, &vertexShaderPointsSrc, NULL);
glShaderSource(f, 1, &fragmentShaderSrc, NULL);
glShaderSource(fd, 1, &fragmentShaderDiscardSrc, NULL);
GLint ok = GL_TRUE;
shader = glCreateProgram();
glAttachShader(shader, v);
glAttachShader(shader, f);
glLinkProgram(shader);
glGetProgramiv(shader, GL_LINK_STATUS, &ok);
assert(ok == GL_TRUE);
/*
char log[512];
int n;
glGetShaderInfoLog(v, 512, &n, log);
printf("%s\n", log);
glGetProgramInfoLog(shader, 512, &n, log);
printf("%s\n", log);
*/
shaderPoints = glCreateProgram();
glAttachShader(shaderPoints, vp);
glAttachShader(shaderPoints, f);
glLinkProgram(shaderPoints);
glGetProgramiv(shaderPoints, GL_LINK_STATUS, &ok);
assert(ok == GL_TRUE);
shaderDiscard = glCreateProgram();
glAttachShader(shaderDiscard, v);
glAttachShader(shaderDiscard, fd);
glLinkProgram(shaderDiscard);
glGetProgramiv(shaderDiscard, GL_LINK_STATUS, &ok);
assert(ok == GL_TRUE);
glDisable(GL_DEPTH_TEST);
assert(glGetError() == GL_NO_ERROR);
glutMainLoop();
return 0;
}
有趣的是,使用GL_RGBA32F
32位颜色会影响性能,同时也会将丢弃方法的开销恢复到与全屏四边形大致相同的效果。 glPolygonStipple
方法在这种情况下给出了显着的改进,比8位更有效。与之前的glPolygonStipple
结果存在差异,我可以重现两者并且还没有缩小差异。
GL_RGBA
的输出:
full rgba8 0.059ms
full discard rgba8 0.112ms
full stipple rgba8 0.050ms
draw points rgba8 0.079ms
quarter rgba8 0.004ms
one rgba8 <0.001ms
GL_RGBA32F
的输出:
full rgba32 0.240ms
full discard rgba32 0.241ms
full stipple rgba32 0.101ms
draw points rgba32 0.091ms
quarter rgba32 0.015ms
one rgba32 <0.001ms
gl_VertexID
的绘图点和定位将超过glPolygonStipple
的{{1}}。我认为这种趋势会继续用于更昂贵的着色器(或者至少是内存密集型)。
答案 0 :(得分:3)
是否有其他方法可以绘制此网格?
究竟是这个网格?那么在这种情况下,你的网格的周期为4,x方向偏移为-1,y方向偏移为-1。因此,生成它的片段着色器(丢弃“黑色”像素)将是
void main()
{
if( ((gl_FragPosition.x-1) % 4) == 0 && ((gl_FragPosition.y-2) % 4) == 0 )
discard;
gl_FragColor = vec4(1,1,1,1);
}
将模板操作设置为始终替换模板值,将模板缓冲区设置为您的ref
值,无丢弃像素。
如果你不能通过某种公式来表达你的网格,那么,改为使用纹理。
答案 1 :(得分:2)
稀疏网格的分散内存写入可能仅仅意味着无法避免的更多开销。
GL_POINT
s glPolygonStipple
你做了什么如果片段着色器很昂贵,不要使用discard
方法 [1] 。这真的很愚蠢,因为你用很多没有做任何事情的线程来阻塞管道。
<子> [1]执行或使用大量寄存器或本地存储器需要很长时间 子>