我正在使用OpenGL实例化方法进行测试,并且测试了glDrawArraysInstanced和glDrawElementsInstanced函数,其余代码几乎相同。测试是渲染100万个彩色正方形(缩放为像素并适合屏幕大小),它们也可以旋转和平移,并观察FPS差异。 我得到的结果是:
根据我的研究,我没有得出哪个函数应该更快的结论,但是我更喜欢使用glDrawElementsInstanced。
所以我的问题是在这种情况下(或一般而言)是否有一种改进glDrawElementsInstanced的方法,以使使用它的绘图等效于或快于glDrawArraysInstanced。 这是我用于glDrawArraysInstanced的代码:
int main(int argc, char** argv) {
int width = 1000, height = 600;
SDLWindow window(width, height, "window");
const char* glsl_version = "#version 330";
srand((unsigned int)time(NULL));
Shader shader("shaders/shader.vert", "shaders/shader.frag");
float quadVertices[] = {
// positions
-1.0f, 1.0f,
1.0f, -1.0f,
-1.0f, -1.0f,
-1.0f, 1.0f,
1.0f, -1.0f,
1.0f, 1.0f
};
float colors[] = {
1.0f, 0.0f, 0.0f,
0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 1.0f,
1.0f, 0.0f, 0.0f,
0.0f, 1.0f, 0.0f,
0.0f, 1.0f, 1.0f
};
// Gera VAO e buffers
GLuint VBO, VAO, TRANSFORM;
unsigned int amount = 1000000;
glm::mat4 *modelMatrices;
modelMatrices = new glm::mat4[amount];
GLsizei vec4Size = sizeof(glm::vec4);
glGenVertexArrays(1, &VAO);
glGenBuffers(1, &VBO);
glGenBuffers(1, &TRANSFORM);
glBindVertexArray(VAO);
// configuração de VBO
glBindBuffer(GL_ARRAY_BUFFER, VBO);
glBufferData(GL_ARRAY_BUFFER, sizeof(quadVertices) + sizeof(colors), nullptr, GL_STREAM_DRAW);
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(quadVertices), quadVertices);
glBufferSubData(GL_ARRAY_BUFFER, sizeof(quadVertices), sizeof(colors), colors);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 2 * sizeof(float), (void*)0);
glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(float), (void*)(sizeof(quadVertices)));
glEnableVertexAttribArray(0);
glEnableVertexAttribArray(1);
// configuração de TRANSFORM
glBindBuffer(GL_ARRAY_BUFFER, TRANSFORM);
glBufferData(GL_ARRAY_BUFFER, amount * sizeof(glm::vec2) + amount * sizeof(glm::vec2) + amount * sizeof(glm::vec4), nullptr, GL_STREAM_DRAW);
glVertexAttribPointer(2, 2, GL_FLOAT, GL_FALSE, sizeof(glm::vec2), (void*)0);
glVertexAttribPointer(3, 2, GL_FLOAT, GL_FALSE, sizeof(glm::vec2), (void*)(sizeof(glm::vec2) * amount));
glVertexAttribPointer(4, 4, GL_FLOAT, GL_FALSE, sizeof(glm::vec4), (void*)(sizeof(glm::vec2) * amount + sizeof(glm::vec2) * amount));
glEnableVertexAttribArray(2);
glEnableVertexAttribArray(3);
glEnableVertexAttribArray(4);
glVertexAttribDivisor(2, 1);
glVertexAttribDivisor(3, 1);
glVertexAttribDivisor(4, 1);
glBindVertexArray(0);
MathUtils mu;
vector <float> randNumbers;
float amountSqrt = glm::sqrt(amount);
for (int i = 0; i < amount; i++)
{
randNumbers.push_back(mu.GenRandFloat(-1.0f, 1.0f) * amountSqrt);
}
glm::vec2 *scale = new glm::vec2[amount];
glm::vec2 *translate = new glm::vec2[amount];
glm::vec4 *rotate = new glm::vec4[amount];
void *transformBfrPtr = nullptr;
bool quit = 0;
while (!quit)
{
glClearColor(1.0, 1.0, 1.0, 1.0);
glClear(GL_COLOR_BUFFER_BIT);
shader.use();
glBindVertexArray(VAO);
glBindBuffer(GL_ARRAY_BUFFER, TRANSFORM);
transformBfrPtr = glMapBufferRange(GL_ARRAY_BUFFER, 0, sizeof(glm::vec2) * amount, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
for (int i = 0; i < amount; i++)
{
scale[i][0] = 1.0f / amountSqrt;
scale[i][1] = 1.0f / amountSqrt;
}
std::memcpy(transformBfrPtr, scale, sizeof(scale[0]) * amount);
glUnmapBuffer(GL_ARRAY_BUFFER);
transformBfrPtr = glMapBufferRange(GL_ARRAY_BUFFER, sizeof(glm::vec2) * amount, sizeof(glm::vec2) * amount, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
for (int i = 0; i < amount; i++)
{
translate[i][0] = randNumbers[i];
translate[i][1] = randNumbers[amount - i];
}
std::memcpy(transformBfrPtr, translate, sizeof(translate[0]) * amount);
glUnmapBuffer(GL_ARRAY_BUFFER);
transformBfrPtr = glMapBufferRange(GL_ARRAY_BUFFER, sizeof(glm::vec2) * amount + sizeof(glm::vec2) * amount, sizeof(glm::vec4) * amount, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
float var = SDL_GetTicks() / 1000.0f;
for (int i = 0; i < amount; i++)
{
rotate[i][0] = cos(var);
rotate[i][1] = sin(var);
rotate[i][2] = -sin(var);
rotate[i][3] = cos(var);
}
std::memcpy(transformBfrPtr, rotate, sizeof(rotate[0]) * amount);
glUnmapBuffer(GL_ARRAY_BUFFER);
glDrawArraysInstanced(GL_TRIANGLES, 0, 6, amount);
switch (window.SDL_ManageEvent())
{
case SDL_QUIT:
SDL_Quit();
glDeleteBuffers(1, &VBO);
glDeleteVertexArrays(1, &VAO);
break;
}
window.SDL_SwapWindow();
}
return 0;
}
以及使用glDrawElementsInstanced的代码(几乎相同,但带有索引):
int main(int argc, char** argv) {
int width = 1000, height = 600;
SDLWindow window(width, height, "window");
const char* glsl_version = "#version 330";
srand((unsigned int)time(NULL));
Shader shader("shaders/shader.vert", "shaders/shader.frag");
float quadVertices[] = {
// positions
-1.0f, 1.0f,
1.0f, -1.0f,
-1.0f, -1.0f,
1.0f, 1.0f
};
float colors[] = {
1.0f, 0.0f, 0.0f,
0.0f, 1.0f, 0.0f,
0.0f, 0.0f, 1.0f,
0.0f, 1.0f, 1.0f
};
unsigned int indices[] = {
0, 1, 2,
0, 1, 3
};
// Gera VAO e buffers
GLuint VBO, VAO, TRANSFORM, EBO;
unsigned int amount = 1000000;
glm::mat4 *modelMatrices;
modelMatrices = new glm::mat4[amount];
GLsizei vec4Size = sizeof(glm::vec4);
glGenVertexArrays(1, &VAO);
glGenBuffers(1, &VBO);
glGenBuffers(1, &TRANSFORM);
glGenBuffers(1, &EBO);
glBindVertexArray(VAO);
// configuração de VBO
glBindBuffer(GL_ARRAY_BUFFER, VBO);
glBufferData(GL_ARRAY_BUFFER, sizeof(quadVertices) + sizeof(colors), nullptr, GL_STREAM_DRAW);
glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(quadVertices), quadVertices);
glBufferSubData(GL_ARRAY_BUFFER, sizeof(quadVertices), sizeof(colors), colors);
glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 2 * sizeof(float), (void*)0);
glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, 3 * sizeof(float), (void*)(sizeof(quadVertices)));
glEnableVertexAttribArray(0);
glEnableVertexAttribArray(1);
// configuração de EBO
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, EBO);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STREAM_DRAW);
// configuração de TRANSFORM
glBindBuffer(GL_ARRAY_BUFFER, TRANSFORM);
glBufferData(GL_ARRAY_BUFFER, amount * sizeof(glm::vec2) + amount * sizeof(glm::vec2) + amount * sizeof(glm::vec4), nullptr, GL_STREAM_DRAW);
glVertexAttribPointer(2, 2, GL_FLOAT, GL_FALSE, sizeof(glm::vec2), (void*)0);
glVertexAttribPointer(3, 2, GL_FLOAT, GL_FALSE, sizeof(glm::vec2), (void*)(sizeof(glm::vec2) * amount));
glVertexAttribPointer(4, 4, GL_FLOAT, GL_FALSE, sizeof(glm::vec4), (void*)(sizeof(glm::vec2) * amount + sizeof(glm::vec2) * amount));
glEnableVertexAttribArray(2);
glEnableVertexAttribArray(3);
glEnableVertexAttribArray(4);
glVertexAttribDivisor(2, 1);
glVertexAttribDivisor(3, 1);
glVertexAttribDivisor(4, 1);
glBindVertexArray(0);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
MathUtils mu;
vector <float> randNumbers;
float amountSqrt = glm::sqrt(amount);
for (int i = 0; i < amount; i++)
{
randNumbers.push_back(mu.GenRandFloat(-1.0f, 1.0f) * amountSqrt);
}
glm::vec2 *scale = new glm::vec2[amount];
glm::vec2 *translate = new glm::vec2[amount];
glm::vec4 *rotate = new glm::vec4[amount];
void *transformBfrPtr = nullptr;
bool quit = 0;
while (!quit)
{
glClearColor(1.0, 1.0, 1.0, 1.0);
glClear(GL_COLOR_BUFFER_BIT);
shader.use();
glBindVertexArray(VAO);
glBindBuffer(GL_ARRAY_BUFFER, TRANSFORM);
transformBfrPtr = glMapBufferRange(GL_ARRAY_BUFFER, 0, sizeof(glm::vec2) * amount, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
for (int i = 0; i < amount; i++)
{
scale[i][0] = 1.0f / amountSqrt;
scale[i][1] = 1.0f / amountSqrt;
}
std::memcpy(transformBfrPtr, scale, sizeof(scale[0]) * amount);
glUnmapBuffer(GL_ARRAY_BUFFER);
transformBfrPtr = glMapBufferRange(GL_ARRAY_BUFFER, sizeof(glm::vec2) * amount, sizeof(glm::vec2) * amount, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
for (int i = 0; i < amount; i++)
{
translate[i][0] = randNumbers[i];
translate[i][1] = randNumbers[amount - i];
}
std::memcpy(transformBfrPtr, translate, sizeof(translate[0]) * amount);
glUnmapBuffer(GL_ARRAY_BUFFER);
transformBfrPtr = glMapBufferRange(GL_ARRAY_BUFFER, sizeof(glm::vec2) * amount + sizeof(glm::vec2) * amount, sizeof(glm::vec4) * amount, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
float var = SDL_GetTicks() / 1000.0f;
for (int i = 0; i < amount; i++)
{
rotate[i][0] = cos(var);
rotate[i][1] = sin(var);
rotate[i][2] = -sin(var);
rotate[i][3] = cos(var);
}
std::memcpy(transformBfrPtr, rotate, sizeof(rotate[0]) * amount);
glUnmapBuffer(GL_ARRAY_BUFFER);
glDrawElementsInstanced(GL_TRIANGLES, 6, GL_UNSIGNED_INT, 0, amount);
switch (window.SDL_ManageEvent())
{
case SDL_QUIT:
SDL_Quit();
glDeleteBuffers(1, &VBO);
glDeleteVertexArrays(1, &VAO);
break;
}
window.SDL_SwapWindow();
}
return 0;
}
顶点着色器代码是否有帮助:
#version 330 core
layout (location = 0) in vec2 aPos;
layout (location = 1) in vec3 aColor;
layout (location = 2) in vec2 scale;
layout (location = 3) in vec2 translation;
layout (location = 4) in vec4 rotation;
out vec3 color;
void main()
{
mat2 rotation = mat2(rotation.xy, rotation.zw);
gl_Position = vec4(((aPos * rotation + translation) * scale) , 0.0, 1.0);
color = aColor;
}
谢谢。
更新: 好的,看来我有一个与此无关的问题。我在另一台机器上测试了这两个代码,并对结果感到惊讶。首先介绍一下背景: 上面的测试是在此(PC)设置上进行的: 16GB内存,i7 4790k,GTX 970。
然后,我使用此设置在笔记本电脑上进行了相同的测试: 8GB内存,i5 8250u,Geforce MX150(2GB)
结果:
我还注意到,在笔记本电脑中,GPU的使用率一直保持在99%左右,而在我的PC中,glDrawElementsInstanced和glDrawArraysInstanced的摆动率约为60%。在这两种情况下,所有内核的CPU使用率都在60%以下(因此CPU瓶颈毫无疑问)。 我试图卸载GPU驱动程序,并使用DDU进行干净的重新安装,但此操作无效。 我还检查了两个系统中的Nvidia控制程序,设置完全相同。我在两台机器上都安装了Windows 10,并且将它们设置为在Windows控制痛苦中获得最大性能。 我的第一个问题得到了部分回答,因为我在不同的机器上有非常不同的FPS结果(在绘制调用函数本身之间)。 这样的差异怎么可能?不客气。