我遇到了一个非常奇怪的性能问题。到目前为止,我已经将问题简化为:我使用glDrawElementsInstanced
在网格中渲染20x20x20立方体,只要我的相机远离原点,它就可以正常工作,但是当它越接近原点,它开始停止磨损。
我通过以下方式定义我的模型视图投影矩阵:
float distance=3.8;
Projection = glm::perspective(65.0f, (float)(width)/height, 0.1f, 300.0f);
View = glm::lookAt( glm::vec3(0,0,-distance),
glm::vec3(0,0,10),
glm::vec3(0,1,0));
Model = glm::rotate(glm::mat4(1.0f), 0.0f, glm::vec3(0.25f, 1.0f,0.75f));
距离为40时,没有任何问题,但当距离减少到3.8左右时,一切都会停止。
实际的渲染调用是通过以下方式进行的:
glBindVertexArray(cubeVAO);
glDrawElementsInstanced(GL_TRIANGLES, indices.size(),GL_UNSIGNED_INT,(GLvoid*)(0),latticePoints.size());
将所有顶点放在一个缓冲区中并通过调用以下方式进行渲染:
glBindVertexArray(nonInstancedVAO);
glDrawArrays(GL_TRIANGLES, 0,vertices.size() );
完全删除该行为。任何经历过类似行为的人都能指出我的解决方案吗?如果失败了,任何人都知道如何追踪这样的事情?我希望我能够确定使用gDEBugger导致减速的原因,但是这只是重新确认没有任何其他opengl调用,并且并没有真正帮助确定什么是占用所有内容处理时间。
另一个注意事项是glDrawArraysInstanced也显示相同的减速,并且将调用分成4个单独的调用,每个调度的四分之一也会停止减速。
更新
这是尝试对问题进行最小限度的说明。
//Minimal reproduction of problem
#include <stdio.h>
#include <string>
#include <fstream>
#include <stdlib.h>
#include <string.h>
#include <GL/glew.h>
#include <GLFW/glfw3.h>
// Include GLM
#include <glm/glm.hpp>
#include <glm/gtc/matrix_transform.hpp>
#include <glm/gtc/type_ptr.hpp>
#include <vector>
#include <iostream>
#include <stdio.h>
//Set to true to use instanced rendering (glDrawElementsInstanced), false to render a generated grid instead (glDrawElements)
#define Instanced true
//Translation from origin. Problme is pressent at 0 distance, but disapears at ex. 40.
const float distanceFromOrigin=0;
// Function to load shaders
GLuint LoadShaders(const char * vertex_file_path,const char * fragment_file_path);
int main(){
int width, height;
bool running = true;
// Initialise GLFW
glfwInit();
glfwWindowHint(GLFW_SAMPLES,1);
glfwWindowHint(GLFW_OPENGL_DEBUG_CONTEXT,GL_TRUE);
glfwWindowHint(GLFW_VERSION_MAJOR, 4);
GLFWwindow* windowRef = glfwCreateWindow( 512, 512, "",0,0);
glfwMakeContextCurrent(windowRef);
glewInit();
//Load Shader
GLuint programID = LoadShaders( "Simple.vs.c", "Simple.fs.c" );
GLuint MatrixID = glGetUniformLocation(programID, "MVP");
glUseProgram(programID);
glm::mat4 Model,Projection,MVP,View,checkMVP;
std::vector<GLuint> sqIndice = {3,2,1,1,0,3,4,5,6,6,7,4,0,4,7,7,3,0,0,1,5,5,4,0,2,3,7,7,6,2,6,5,1,1,2,6,0,4,7,7,3,0};
std::vector<GLfloat> sqVertex = {-1, 1, -1, -1, 1, 1, -1, -1, 1, -1, -1, -1, 1, 1, -1, 1, 1, 1, 1, -1, 1, 1, -1, -1};
std::vector<GLfloat> sqColor = {0.2472,0.24,0.6,0.6,0.24,0.442893,0.6,0.547014,0.24,0.24,0.6,0.33692,0.24,0.353173,0.6,0.6,0.24,0.563266,0.6,0.426641,0.24,0.263452,0.6,0.24};
const float lattice = 5;
const int mxn = 10;
std::vector<GLfloat> v1 = {lattice,-1,0};
std::vector<GLfloat> v2 = {1,lattice,0};
std::vector<GLfloat> v3 = {0,0,lattice};
std::vector<GLfloat> offset = {0,0,-distanceFromOrigin};
std::vector<GLfloat> latticePoints,sqVertexGrid,sqColorGrid;// = {0,0,0};
std::vector<GLuint> sqIndiceGrid;
// Looping stuff to generate the full grid of "instances" to render in a single call.
int instanceCount=0;
//Generate Lattice vectors, aswell as a vector containing the full grids of indices,vertexes and colors
for(int x=-mxn;x<mxn;++x){
for(int y=-mxn;y<mxn;++y){
for(int z=-mxn;z<mxn;++z){
for(int n=0;n<3;++n){
latticePoints.push_back( x*v1[n]+y*v2[n]+z*v3[n]+offset[n] );
};
for(int elm=0;elm<sqVertex.size();elm+=3){
for(int n=0;n<3;++n){
sqVertexGrid.push_back(sqVertex[elm+n]+x*v1[n]+y*v2[n]+z*v3[n]+offset[n]);
sqColorGrid.push_back(sqColor[elm+n]);
};
};
for(int elm=0;elm<sqIndice.size();++elm){
sqIndiceGrid.push_back(sqIndice[elm]+instanceCount*sqVertex.size()/3);
};
++instanceCount;glewInit
};
};
};
#if Instanced==true
//Initialize and fill vertex,color and indice buffers with the relevant data.
GLuint cubeVAO;
glGenVertexArrays(1, &cubeVAO);
glBindVertexArray(cubeVAO);
glEnable(GL_DEPTH_TEST);
//Vertex buffer
GLuint vertexBuffer;
glGenBuffers(1, &vertexBuffer);
glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
glBufferData(GL_ARRAY_BUFFER, sqVertex.size()*sizeof(GLfloat), &sqVertex[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0,3,GL_FLOAT,GL_FALSE,0,(void*)0);
//Color buffer
GLuint colorBuffer;
glGenBuffers(1, &colorBuffer);
glBindBuffer(GL_ARRAY_BUFFER, colorBuffer);
glBufferData(GL_ARRAY_BUFFER, sqColor.size()*sizeof(GLfloat), &sqColor[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(1);
glVertexAttribPointer(1,3,GL_FLOAT,GL_FALSE,0,(void*)0);
// Indice buffer
GLuint indicesBuffer;
glGenBuffers(1, &indicesBuffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, indicesBuffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sqIndice.size()*sizeof(GLuint), &sqIndice[0], GL_STATIC_DRAW);
//Lattice point buffer
GLuint latticePointBuffer;
glGenBuffers(1, &latticePointBuffer);
glBindBuffer(GL_ARRAY_BUFFER, latticePointBuffer);
glBufferData(GL_ARRAY_BUFFER, latticePoints.size()*sizeof(GLfloat), &latticePoints[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(2);
glVertexAttribPointer(2,3,GL_FLOAT,GL_FALSE,0,(void*)0);
glVertexAttribDivisor(2,1);
glBindVertexArray(0);
#elif Instanced==false
GLuint cubeGridVAO;
glGenVertexArrays(1, &cubeGridVAO);
glBindVertexArray(cubeGridVAO);
glEnable(GL_DEPTH_TEST);
//Vertex buffer
GLuint vertexBuffer;
glGenBuffers(1, &vertexBuffer);
glBindBuffer(GL_ARRAY_BUFFER, vertexBuffer);
glBufferData(GL_ARRAY_BUFFER, sqVertexGrid.size()*sizeof(GLfloat), &sqVertexGrid[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0,3,GL_FLOAT,GL_FALSE,0,(void*)0);
//Color buffer
GLuint colorBuffer;
glGenBuffers(1, &colorBuffer);
glBindBuffer(GL_ARRAY_BUFFER, colorBuffer);
glBufferData(GL_ARRAY_BUFFER, sqColorGrid.size()*sizeof(GLfloat), &sqColorGrid[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(1);
glVertexAttribPointer(1,3,GL_FLOAT,GL_FALSE,0,(void*)0);
// Indice buffer
GLuint indicesBuffer;
glGenBuffers(1, &indicesBuffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, indicesBuffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sqIndiceGrid.size()*sizeof(GLuint), &sqIndiceGrid[0], GL_STATIC_DRAW);
glBindVertexArray(0);
#endif
while(running)
{
glfwGetFramebufferSize(windowRef, &width, &height);
height = height > 0 ? height : 1;
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
Projection = glm::perspective(65.0f, (float)(width)/height, 0.1f, 300.0f);
View = glm::lookAt( glm::vec3(0.0f,0.0f,-(distanceFromOrigin+3.8f)),
glm::vec3(0.0f,0.0f,100.0f),
glm::vec3(0.0f,1.0f,0.0f));
Model = glm::rotate(glm::mat4(1.0f), 0.0f, glm::vec3(0.25f, 1.0f,0.75f));
MVP = Projection*View*Model;
glUniformMatrix4fv(MatrixID, 1, GL_FALSE, glm::value_ptr(MVP));
#if Instanced==true
glBindVertexArray(cubeVAO);
glDrawElementsInstanced(GL_TRIANGLES, sqIndice.size(),GL_UNSIGNED_INT,(GLvoid*)(0),latticePoints.size());
#elif Instanced==false
glBindVertexArray(cubeGridVAO);
glDrawElements(GL_TRIANGLES, sqIndiceGrid.size(),GL_UNSIGNED_INT,(GLvoid*)(0));
#endif
glfwPollEvents();
glfwSwapBuffers(windowRef);
std::cout<<".\n";
running = !glfwGetKey(windowRef,GLFW_KEY_ESCAPE) && !glfwWindowShouldClose(windowRef);
}
glfwDestroyWindow(windowRef);
glfwTerminate();
return 0;
};
GLuint LoadShaders(const char * vertex_file_path,const char * fragment_file_path){
// Create the shaders
GLuint VertexShaderID = glCreateShader(GL_VERTEX_SHADER);
GLuint FragmentShaderID = glCreateShader(GL_FRAGMENT_SHADER);
// Read the Vertex Shader code from the file
std::string VertexShaderCode;
std::ifstream VertexShaderStream(vertex_file_path, std::ios::in);
if(VertexShaderStream.is_open()){
std::string Line = "";
while(getline(VertexShaderStream, Line))
VertexShaderCode += "\n" + Line;
VertexShaderStream.close();
}else{
printf("Impossible to open %s. Are you in the right directory?\n", vertex_file_path);
return 0;
}
// Read the Fragment Shader code from the file
std::string FragmentShaderCode;
std::ifstream FragmentShaderStream(fragment_file_path, std::ios::in);
if(FragmentShaderStream.is_open()){
std::string Line = "";
while(getline(FragmentShaderStream, Line))
FragmentShaderCode += "\n" + Line;
FragmentShaderStream.close();
}
GLint Result = GL_FALSE;
int InfoLogLength;
// Compile Vertex Shader
printf("Compiling shader : %s\n", vertex_file_path);
char const * VertexSourcePointer = VertexShaderCode.c_str();
glShaderSource(VertexShaderID, 1, &VertexSourcePointer , NULL);
glCompileShader(VertexShaderID);
// Check Vertex Shader
glGetShaderiv(VertexShaderID, GL_COMPILE_STATUS, &Result);
glGetShaderiv(VertexShaderID, GL_INFO_LOG_LENGTH, &InfoLogLength);
if ( InfoLogLength > 0 ){
std::vector<char> VertexShaderErrorMessage(InfoLogLength+1);
glGetShaderInfoLog(VertexShaderID, InfoLogLength, NULL, &VertexShaderErrorMessage[0]);
printf("%s\n", &VertexShaderErrorMessage[0]);
}
// Compile Fragment Shader
printf("Compiling shader : %s\n", fragment_file_path);
char const * FragmentSourcePointer = FragmentShaderCode.c_str();
glShaderSource(FragmentShaderID, 1, &FragmentSourcePointer , NULL);
glCompileShader(FragmentShaderID);
// Check Fragment Shader
glGetShaderiv(FragmentShaderID, GL_COMPILE_STATUS, &Result);
glGetShaderiv(FragmentShaderID, GL_INFO_LOG_LENGTH, &InfoLogLength);
if ( InfoLogLength > 0 ){
std::vector<char> FragmentShaderErrorMessage(InfoLogLength+1);
glGetShaderInfoLog(FragmentShaderID, InfoLogLength, NULL, &FragmentShaderErrorMessage[0]);
printf("%s\n", &FragmentShaderErrorMessage[0]);
}
// Link the program
printf("Linking program\n");
GLuint ProgramID = glCreateProgram();
glAttachShader(ProgramID, VertexShaderID);
glAttachShader(ProgramID, FragmentShaderID);
glLinkProgram(ProgramID);
// Check the program
glGetProgramiv(ProgramID, GL_LINK_STATUS, &Result);
glGetProgramiv(ProgramID, GL_INFO_LOG_LENGTH, &InfoLogLength);
if ( InfoLogLength > 0 ){
std::vector<char> ProgramErrorMessage(InfoLogLength+1);
glGetProgramInfoLog(ProgramID, InfoLogLength, NULL, &ProgramErrorMessage[0]);
printf("%s\n", &ProgramErrorMessage[0]);
}
glDeleteShader(VertexShaderID);
glDeleteShader(FragmentShaderID);
return ProgramID;
}
答案 0 :(得分:6)
好的,深呼吸并坐下:你的问题是显卡内存速度。
但是你可以通过修复这个bug来简化GPU:
glDrawElementsInstanced(GL_TRIANGLES, sqIndice.size(),GL_UNSIGNED_INT,(GLvoid*)(0),latticePoints.size());
glDrawElementsInstanced
期望绘制的实例数作为最后一个参数。但是你传递了latticePoints
中的元素数量。这是实例数量的3倍。这导致着色器内部出现零点(因为阻止了越界访问)。因此,16000个立方体不会被翻译并被绘制在相同的位置。这导致在立方体的正面上涂刷16000次。深度缓冲区不会阻止这种情况,因为面部不会相互隐藏,它们位于同一位置。
因此当你的distanceFromOrigin
减少时,16000个中心立方体变得越来越大。 OpenGL必须绘制越来越多的像素。还有更多,确切地说。它必须绘制得如此之多,以至于它达到了显卡内存的速度限制。
阅读Diagnose OpenGl Performance Problems了解整个故事。