我的最终目标是以60 fps渲染100万个不同大小和颜色的球体。我希望能够在屏幕上移动相机。
我修改了this page of the tutorial I am studying上的代码以尝试实例化多个领域。然而,我发现在64个球体中,我的fps低于60,而在900个球体,我的fps是一个微不足道的4.我对实例的理解是天真的,但我相信我应该获得比每秒更多的帧数。这个。只有64个球体才能达到60 fps。我相信我在某种程度上导致CPU和GPU比他们应该更频繁地进行通信。所以我的问题是:如何在不导致fps降低(理想情况下为60 fps)的情况下实例这么多对象(理想情况下为数百万)?
我通过每10帧计算(10 / time_elapsed)
来计算fps,其中time_elapsed
是自上次fps调用以来经过的时间。我在代码的第118行使用printf
打印出来。
我一直在通过this tutorial学习OpenGL,所以我在Visual Studio 2013中使用32-bit GLEW和32-bit GLFW。我在64位操作系统上有8 GB的RAM(Windows 7) )2.30 GHz CPU。
我已经尝试根据上面的教程编写自己的示例。源代码:
(将第2行设置为要实例化的球体数量。确保第2行具有整数平方根。将第4行设置为球体的细节,它可以达到的最低值为0。数字越大=越详细。)
// Make sure NUM_INS is a square number
#define NUM_INS 1
// Detail up to 4 is probably good enough
#define SPHERE_DETAIL 4
#include <vector>
// GLEW
#define GLEW_STATIC
#include <GL/glew.h>
// GLFW
#include <GLFW/glfw3.h>
// GL includes
#include "Shader.h"
// GLM Mathemtics
#include <glm/glm.hpp>
#include <glm/gtc/matrix_transform.hpp>
#include <glm/gtc/type_ptr.hpp>
// Properties
GLuint screenWidth = 800, screenHeight = 600;
// Function prototypes
void key_callback(GLFWwindow* window, int key, int scancode, int action, int mode);
std::vector<GLfloat> create_sphere(int recursion);
// The MAIN function, from here we start our application and run the Game loop
int main()
{
// Init GLFW
glfwInit();
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
glfwWindowHint(GLFW_RESIZABLE, GL_FALSE);
GLFWwindow* window = glfwCreateWindow(screenWidth, screenHeight, "LearnOpenGL", nullptr, nullptr); // Windowed
glfwMakeContextCurrent(window);
// Set the required callback functions
glfwSetKeyCallback(window, key_callback);
// Initialize GLEW to setup the OpenGL Function pointers
glewExperimental = GL_TRUE;
glewInit();
// Define the viewport dimensions
glViewport(0, 0, screenWidth, screenHeight);
glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); // Comment to remove wireframe mode
// Setup OpenGL options
glEnable(GL_DEPTH_TEST);
// Setup and compile our shader(s)
Shader shader("core.vs", "core.frag");
// Generate a list of 100 quad locations/translation-vectors
std::vector<glm::vec2> translations(NUM_INS);
//glm::vec2 translations[NUM_INS];
int index = 0;
GLfloat offset = 1.0f / (float)sqrt(NUM_INS);
for (GLint y = -(float)sqrt(NUM_INS); y < (float)sqrt(NUM_INS); y += 2)
{
for (GLint x = -(float)sqrt(NUM_INS); x < (float)sqrt(NUM_INS); x += 2)
{
glm::vec2 translation;
translation.x = (GLfloat)x / (float)sqrt(NUM_INS) + offset;
translation.y = (GLfloat)y / (float)sqrt(NUM_INS) + offset;
translations[index++] = translation;
}
}
// Store instance data in an array buffer
GLuint instanceVBO;
glGenBuffers(1, &instanceVBO);
glBindBuffer(GL_ARRAY_BUFFER, instanceVBO);
glBufferData(GL_ARRAY_BUFFER, sizeof(glm::vec2) * NUM_INS, &translations[0], GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
// create 12 vertices of a icosahedron
std::vector<GLfloat> vv = create_sphere(SPHERE_DETAIL);
GLuint quadVAO, quadVBO;
glGenVertexArrays(1, &quadVAO);
glGenBuffers(1, &quadVBO);
glBindVertexArray(quadVAO);
glBindBuffer(GL_ARRAY_BUFFER, quadVBO);
glBufferData(GL_ARRAY_BUFFER, vv.size() * sizeof(GLfloat), &vv[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 6 * sizeof(GLfloat), (GLvoid*)0);
glEnableVertexAttribArray(1);
glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, 6 * sizeof(GLfloat), (GLvoid*)(2 * sizeof(GLfloat)));
// Also set instance data
glEnableVertexAttribArray(2);
glBindBuffer(GL_ARRAY_BUFFER, instanceVBO);
glVertexAttribPointer(2, 2, GL_FLOAT, GL_FALSE, 2 * sizeof(GLfloat), (GLvoid*)0);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glVertexAttribDivisor(2, 1); // Tell OpenGL this is an instanced vertex attribute.
glBindVertexArray(0);
// For printing frames-per-second
float counter = 0;
double get_time = 0;
double new_time;
// Game loop
while (!glfwWindowShouldClose(window))
{
// Print fps by printing (number_of_frames / time_elapsed)
counter += 1;
if (counter > 10) {
counter -= 10;
new_time = glfwGetTime();
printf("fps: %.2f ", (10/(new_time - get_time)));
get_time = new_time;
}
// Check and call events
glfwPollEvents();
// Clear buffers
//glClearColor(0.2f, 0.3f, 0.3f, 1.0f);
glClearColor(0.2f, 0.3f, 0.3f, 1.0f);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
// Draw 100 instanced quads
shader.Use();
glm::mat4 model;
model = glm::rotate(model, 0.0f, glm::vec3(1.0f, 0.0f, 0.0f));
// Camera/View transformation
glm::mat4 view;
GLfloat radius = 10.0f;
GLfloat camX = sin(glfwGetTime()) * radius;
GLfloat camZ = cos(glfwGetTime()) * radius;
view = glm::lookAt(glm::vec3(camX, 0.0f, camZ), glm::vec3(0.0f, 0.0f, 0.0f), glm::vec3(0.0f, 1.0f, 0.0f));
// Projection
glm::mat4 projection;
projection = glm::perspective(45.0f, (GLfloat)screenWidth / (GLfloat)screenHeight, 0.1f, 100.0f);
// Get the uniform locations
GLint modelLoc = glGetUniformLocation(shader.Program, "model");
GLint viewLoc = glGetUniformLocation(shader.Program, "view");
GLint projLoc = glGetUniformLocation(shader.Program, "projection");
// Pass the matrices to the shader
glUniformMatrix4fv(modelLoc, 1, GL_FALSE, glm::value_ptr(model));
glUniformMatrix4fv(viewLoc, 1, GL_FALSE, glm::value_ptr(view));
glUniformMatrix4fv(projLoc, 1, GL_FALSE, glm::value_ptr(projection));
glBindVertexArray(quadVAO);
glDrawArraysInstanced(GL_TRIANGLES, 0, vv.size() / 3, NUM_INS); // 100 triangles of 6 vertices each
glBindVertexArray(0);
// Swap the buffers
glfwSwapBuffers(window);
}
glfwTerminate();
return 0;
}
// Is called whenever a key is pressed/released via GLFW
void key_callback(GLFWwindow* window, int key, int scancode, int action, int mode)
{
if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS)
glfwSetWindowShouldClose(window, GL_TRUE);
}
std::vector<GLfloat> add_color(std::vector<GLfloat> sphere) {
// Add color
std::vector<GLfloat> colored_sphere;
for (GLint i = 0; i < sphere.size(); i+=9) {
colored_sphere.push_back(sphere[i]);
colored_sphere.push_back(sphere[i+1]);
colored_sphere.push_back(sphere[i+2]);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(sphere[i+3]);
colored_sphere.push_back(sphere[i+4]);
colored_sphere.push_back(sphere[i+5]);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(sphere[i+6]);
colored_sphere.push_back(sphere[i+7]);
colored_sphere.push_back(sphere[i+8]);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(0.0f);
}
return colored_sphere;
}
std::vector<GLfloat> tesselate(std::vector<GLfloat> shape, int recursion) {
if (recursion > 0) {
std::vector<GLfloat> new_sphere = {};
for (GLint i = 0; i < shape.size(); i += 9) {
// 1.902113 approximately
GLfloat radius = sqrt(1.0f + pow((1.0f + sqrt(5.0f)) / 2.0f, 2));
// Every 9 points is a triangle. Take 1 triangle and turn it into 4 triangles.
GLfloat p_one[] = {shape[i], shape[i + 1], shape[i + 2]};
GLfloat p_two[] = {shape[i + 3], shape[i + 4], shape[i + 5]};
GLfloat p_thr[] = {shape[i + 6], shape[i + 7], shape[i + 8]};
GLfloat p_one_two[] = { (p_one[0] + p_two[0]) / 2.0f, (p_one[1] + p_two[1]) / 2.0f, (p_one[2] + p_two[2]) / 2.0f };
GLfloat p_one_thr[] = { (p_one[0] + p_thr[0]) / 2.0f, (p_one[1] + p_thr[1]) / 2.0f, (p_one[2] + p_thr[2]) / 2.0f };
GLfloat p_two_thr[] = { (p_two[0] + p_thr[0]) / 2.0f, (p_two[1] + p_thr[1]) / 2.0f, (p_two[2] + p_thr[2]) / 2.0f };
GLfloat r_one_two = sqrt((p_one_two[0]*p_one_two[0]) + (p_one_two[1]*p_one_two[1]) + (p_one_two[2]*p_one_two[2]));
GLfloat r_one_thr = sqrt((p_one_thr[0]*p_one_thr[0]) + (p_one_thr[1]*p_one_thr[1]) + (p_one_thr[2]*p_one_thr[2]));
GLfloat r_two_thr = sqrt((p_two_thr[0]*p_two_thr[0]) + (p_two_thr[1]*p_two_thr[1]) + (p_two_thr[2]*p_two_thr[2]));
GLfloat t_one_two[] = { radius * p_one_two[0] / r_one_two, radius * p_one_two[1] / r_one_two, radius * p_one_two[2] / r_one_two };
GLfloat t_one_thr[] = { radius * p_one_thr[0] / r_one_thr, radius * p_one_thr[1] / r_one_thr, radius * p_one_thr[2] / r_one_thr };
GLfloat t_two_thr[] = { radius * p_two_thr[0] / r_two_thr, radius * p_two_thr[1] / r_two_thr, radius * p_two_thr[2] / r_two_thr };
// Triangle 1:
new_sphere.push_back(p_one[0]);
new_sphere.push_back(p_one[1]);
new_sphere.push_back(p_one[2]);
new_sphere.push_back(t_one_two[0]);
new_sphere.push_back(t_one_two[1]);
new_sphere.push_back(t_one_two[2]);
new_sphere.push_back(t_one_thr[0]);
new_sphere.push_back(t_one_thr[1]);
new_sphere.push_back(t_one_thr[2]);
// Triangle 2:
new_sphere.push_back(p_two[0]);
new_sphere.push_back(p_two[1]);
new_sphere.push_back(p_two[2]);
new_sphere.push_back(t_one_two[0]);
new_sphere.push_back(t_one_two[1]);
new_sphere.push_back(t_one_two[2]);
new_sphere.push_back(t_two_thr[0]);
new_sphere.push_back(t_two_thr[1]);
new_sphere.push_back(t_two_thr[2]);
// Triangle 3:
new_sphere.push_back(p_thr[0]);
new_sphere.push_back(p_thr[1]);
new_sphere.push_back(p_thr[2]);
new_sphere.push_back(t_one_thr[0]);
new_sphere.push_back(t_one_thr[1]);
new_sphere.push_back(t_one_thr[2]);
new_sphere.push_back(t_two_thr[0]);
new_sphere.push_back(t_two_thr[1]);
new_sphere.push_back(t_two_thr[2]);
// Center Triangle:
new_sphere.push_back(t_one_two[0]);
new_sphere.push_back(t_one_two[1]);
new_sphere.push_back(t_one_two[2]);
new_sphere.push_back(t_one_thr[0]);
new_sphere.push_back(t_one_thr[1]);
new_sphere.push_back(t_one_thr[2]);
new_sphere.push_back(t_two_thr[0]);
new_sphere.push_back(t_two_thr[1]);
new_sphere.push_back(t_two_thr[2]);
}
return tesselate(new_sphere, recursion - 1);
}
printf("number of vertices to be rendered: %d || ", shape.size());
return shape;
}
std::vector<GLfloat> create_sphere(int recursion) {
// Define the starting icosahedron
GLfloat t_ = (1.0f + sqrt(5.0f)) / 2.0f;
std::vector<GLfloat> icosahedron = {
-1.0f, t_, 0.0f, -t_, 0.0f, 1.0f, 0.0f, 1.0f, t_,
-1.0f, t_, 0.0f, 0.0f, 1.0f, t_, 1.0f, t_, 0.0f,
-1.0f, t_, 0.0f, 1.0f, t_, 0.0f, 0.0f, 1.0f, -t_,
-1.0f, t_, 0.0f, 0.0f, 1.0f, -t_, -t_, 0.0f, -1.0f,
-1.0f, t_, 0.0f, -t_, 0.0f, -1.0f, -t_, 0.0f, 1.0f,
1.0f, t_, 0.0f, 0.0f, 1.0f, t_, t_, 0.0f, 1.0f,
0.0f, 1.0f, t_, -t_, 0.0f, 1.0f, 0.0f, -1.0f, t_,
-t_, 0.0f, 1.0f, -t_, 0.0f, -1.0f, -1.0f, -t_, 0.0f,
-t_, 0.0f, -1.0f, 0.0f, 1.0f, -t_, 0.0f, -1.0f, -t_,
0.0f, 1.0f, -t_, 1.0f, t_, 0.0f, t_, 0.0f, -1.0f,
1.0f, -t_, 0.0f, t_, 0.0f, 1.0f, 0.0f, -1.0f, t_,
1.0f, -t_, 0.0f, 0.0f, -1.0f, t_,-1.0f, -t_, 0.0f,
1.0f, -t_, 0.0f,-1.0f, -t_, 0.0f, 0.0f, -1.0f, -t_,
1.0f, -t_, 0.0f, 0.0f, -1.0f, -t_, t_, 0.0f, -1.0f,
1.0f, -t_, 0.0f, t_, 0.0f, -1.0f, t_, 0.0f, 1.0f,
0.0f, -1.0f, t_, t_, 0.0f, 1.0f, 0.0f, 1.0f, t_,
-1.0f, -t_, 0.0f, 0.0f, -1.0f, t_,-t_, 0.0f, 1.0f,
0.0f, -1.0f, -t_,-1.0f, -t_, 0.0f,-t_, 0.0f, -1.0f,
t_, 0.0f, -1.0f, 0.0f, -1.0f, -t_, 0.0f, 1.0f, -t_,
t_, 0.0f, 1.0f, t_, 0.0f, -1.0f, 1.0f, t_, 0.0f,
};
// Tesselate the icososphere the number of times recursion
std::vector<GLfloat> colorless_sphere = tesselate(icosahedron, recursion);
// Add color and return
return add_color(colorless_sphere);
}
Vertex Shader :(名为core.vs)
#version 330 core
layout (location = 0) in vec3 position;
layout (location = 1) in vec3 color;
layout (location = 2) in vec2 offset;
out vec3 fColor;
uniform mat4 model;
uniform mat4 view;
uniform mat4 projection;
void main()
{
gl_Position = projection * view * model * vec4(position.x + offset.x, position.y + offset.y, position.z, 1.0f);
fColor = color;
}
片段着色器:(名为core.frag)
#version 330 core
in vec3 fColor;
out vec4 color;
void main()
{
color = vec4(fColor, 1.0f);
}
着色器类:(名为Shader.h)
#ifndef SHADER_H
#define SHADER_H
#include <string>
#include <fstream>
#include <sstream>
#include <iostream>
#include <GL/glew.h>
class Shader
{
public:
GLuint Program;
// Constructor generates the shader on the fly
Shader(const GLchar* vertexPath, const GLchar* fragmentPath)
{
// 1. Retrieve the vertex/fragment source code from filePath
std::string vertexCode;
std::string fragmentCode;
std::ifstream vShaderFile;
std::ifstream fShaderFile;
// ensures ifstream objects can throw exceptions:
vShaderFile.exceptions(std::ifstream::badbit);
fShaderFile.exceptions(std::ifstream::badbit);
try
{
// Open files
vShaderFile.open(vertexPath);
fShaderFile.open(fragmentPath);
std::stringstream vShaderStream, fShaderStream;
// Read file's buffer contents into streams
vShaderStream << vShaderFile.rdbuf();
fShaderStream << fShaderFile.rdbuf();
// close file handlers
vShaderFile.close();
fShaderFile.close();
// Convert stream into string
vertexCode = vShaderStream.str();
fragmentCode = fShaderStream.str();
}
catch (std::ifstream::failure e)
{
std::cout << "ERROR::SHADER::FILE_NOT_SUCCESFULLY_READ" << std::endl;
}
const GLchar* vShaderCode = vertexCode.c_str();
const GLchar * fShaderCode = fragmentCode.c_str();
// 2. Compile shaders
GLuint vertex, fragment;
GLint success;
GLchar infoLog[512];
// Vertex Shader
vertex = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vertex, 1, &vShaderCode, NULL);
glCompileShader(vertex);
// Print compile errors if any
glGetShaderiv(vertex, GL_COMPILE_STATUS, &success);
if (!success)
{
glGetShaderInfoLog(vertex, 512, NULL, infoLog);
std::cout << "ERROR::SHADER::VERTEX::COMPILATION_FAILED\n" << infoLog << std::endl;
}
// Fragment Shader
fragment = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(fragment, 1, &fShaderCode, NULL);
glCompileShader(fragment);
// Print compile errors if any
glGetShaderiv(fragment, GL_COMPILE_STATUS, &success);
if (!success)
{
glGetShaderInfoLog(fragment, 512, NULL, infoLog);
std::cout << "ERROR::SHADER::FRAGMENT::COMPILATION_FAILED\n" << infoLog << std::endl;
}
// Shader Program
this->Program = glCreateProgram();
glAttachShader(this->Program, vertex);
glAttachShader(this->Program, fragment);
glLinkProgram(this->Program);
// Print linking errors if any
glGetProgramiv(this->Program, GL_LINK_STATUS, &success);
if (!success)
{
glGetProgramInfoLog(this->Program, 512, NULL, infoLog);
std::cout << "ERROR::SHADER::PROGRAM::LINKING_FAILED\n" << infoLog << std::endl;
}
// Delete the shaders as they're linked into our program now and no longer necessery
glDeleteShader(vertex);
glDeleteShader(fragment);
}
// Uses the current shader
void Use()
{
glUseProgram(this->Program);
}
};
#endif
答案 0 :(得分:4)
我的最终目标是以60 fps渲染100万个不同大小和颜色的球体。
这是一种不合理的期望。
假设每个球体由50个三角形组成。对于良好的球形有点小,但我们假设它们很小。
每个球体50个三十五万个球体每帧5000万个三角形 。在60 FPS时,这是每秒3 十亿个三角形。
没有商用GPU可以做到这一点。这只是一个50个三角形球体;您的4x镶嵌二十面体将超过 5,000 三角形。
现在是的,绘制60个这样的球体每帧只有~300,000个三角形。但即使是60 FPS,每秒约有1800万个三角形。硬件确实存在,可以处理许多三角形,但它显然是一个很多。而你肯定不会得到100万。
这不是GPU / CPU通信或开销的问题。你只需要在GPU上投入的工作量超出其处理范围。你或许可以在这里和那里改进一些东西,但没有任何东西可以让你达到你想要的十分之一。
至少,不是采用这种整体方法。
对于想要绘制数百万个球体的特殊情况,我会使用光线追踪冒名顶替者而不是实际的球体几何体。也就是说,您绘制四边形,其位置由顶点(或几何)着色器生成。您为每个球体生成一个四边形,使四边形围绕球体。然后片段着色器执行简单的光线 - 球体相交测试,以查看相关片段(从摄像机视图的方向)是否击中球体。如果光线没有撞到球体,则丢弃该碎片。
您还需要修改gl_FragDepth
以给冒名顶替者提供适当的深度值,以便相交的球体可以正常工作。