在体素引擎中创建块网格很慢

时间:2016-07-14 12:42:10

标签: c++ performance opengl stdvector voxels

我正在使用C ++开发一个体素引擎,在实现了块之后,我意识到生成它们真的很贵。通过这个,我不是指用块填充它们,我的意思是生成一个块网格。

除了放置和移除体素之外,一旦生成块,游戏就会顺利运行。每当一个块发生变化时,就会重建它的网格。这是一个昂贵的过程。对一个块执行大约需要0.36秒,这会在编辑块时导致冻结约0.36秒。此外,由于单个块的这个0.36秒的尖峰,加载世界的块半径超过3或4,需要几分钟。有4个块,需要189秒,(4 * 2)^ 3 * 0.36(512个块,每个0.36秒)

这是我的网格生成代码。它遍历块中的每个块,如果它不是空的,它会为它添加立方体顶点,否则忽略它。这将在后来成为一个更复杂的方法,我已经计划了一些东西,如果方法已经很慢,这很糟糕。

void WorldRenderer::constructChunkMesh(Chunk* chunk)
{
    if (!chunk->isInitialized() || chunk->getNumBlocks() <= 0)
        return; //If the chunk isn't initialized, or is empty, don't construct anything for it.

    ChunkMesh mesh;

    //iterate over every block within the chunk.
    //CHUNK_SIZE has a value of 16. Each chunk is 16x16x16 blocks.
    for (int x = 0; x < CHUNK_SIZE; x++)
    {
        for (int y = 0; y < CHUNK_SIZE; y++)
        {
            for (int z = 0; z < CHUNK_SIZE; z++)
            {
                if (chunk->getBlock(x, y, z) != Blocks::BLOCK_TYPE_AIR) //if the block is solid, add vertices, otherwise, don't render it.
                {
                    //the 8 vertices for a cube. mesh.addVertex(...) returns the index.
                    int i0 = mesh.addVertex(Vertex(glm::vec3(0.0F, 0.0F, 1.0F) + glm::vec3(x, y, z), glm::vec3(0.0F, 1.0F, 0.0F), glm::vec4(1.0F, 1.0F, 1.0F, 1.0F), glm::vec2(0.0F, 0.0F)));
                    int i1 = mesh.addVertex(Vertex(glm::vec3(1.0F, 0.0F, 1.0F) + glm::vec3(x, y, z), glm::vec3(0.0F, 1.0F, 0.0F), glm::vec4(1.0F, 1.0F, 1.0F, 1.0F), glm::vec2(0.0F, 0.0F)));
                    int i2 = mesh.addVertex(Vertex(glm::vec3(0.0F, 1.0F, 1.0F) + glm::vec3(x, y, z), glm::vec3(0.0F, 1.0F, 0.0F), glm::vec4(1.0F, 1.0F, 1.0F, 1.0F), glm::vec2(0.0F, 0.0F)));
                    int i3 = mesh.addVertex(Vertex(glm::vec3(1.0F, 1.0F, 1.0F) + glm::vec3(x, y, z), glm::vec3(0.0F, 1.0F, 0.0F), glm::vec4(1.0F, 1.0F, 1.0F, 1.0F), glm::vec2(0.0F, 0.0F)));
                    int i4 = mesh.addVertex(Vertex(glm::vec3(0.0F, 0.0F, 0.0F) + glm::vec3(x, y, z), glm::vec3(0.0F, 1.0F, 0.0F), glm::vec4(1.0F, 1.0F, 1.0F, 1.0F), glm::vec2(0.0F, 0.0F)));
                    int i5 = mesh.addVertex(Vertex(glm::vec3(1.0F, 0.0F, 0.0F) + glm::vec3(x, y, z), glm::vec3(0.0F, 1.0F, 0.0F), glm::vec4(1.0F, 1.0F, 1.0F, 1.0F), glm::vec2(0.0F, 0.0F)));
                    int i6 = mesh.addVertex(Vertex(glm::vec3(0.0F, 1.0F, 0.0F) + glm::vec3(x, y, z), glm::vec3(0.0F, 1.0F, 0.0F), glm::vec4(1.0F, 1.0F, 1.0F, 1.0F), glm::vec2(0.0F, 0.0F)));
                    int i7 = mesh.addVertex(Vertex(glm::vec3(1.0F, 1.0F, 0.0F) + glm::vec3(x, y, z), glm::vec3(0.0F, 1.0F, 0.0F), glm::vec4(1.0F, 1.0F, 1.0F, 1.0F), glm::vec2(0.0F, 0.0F)));

                    //The xyz coord in the iteration in world-relative coordinates, instead of chunk-relative
                    int wx = (chunk->getPos().x * CHUNK_SIZE) + x;
                    int wy = (chunk->getPos().y * CHUNK_SIZE) + y;
                    int wz = (chunk->getPos().z * CHUNK_SIZE) + z;

                    //top       y+
                    if (World::getBlock(wx, wy + 1, wz) <= 0)
                    {
                        //if a block does not exist in the y+ direction to this one, add the top face.
                        mesh.addFace(i2, i3, i7);
                        mesh.addFace(i2, i7, i6);
                    }
                    //bottom    y-
                    if (World::getBlock(wx, wy - 1, wz) <= 0)
                    {
                        //if a block does not exist in the y- direction to this one, add the top face.
                        mesh.addFace(i0, i4, i1);
                        mesh.addFace(i1, i4, i5);
                    }
                    //front     z-
                    if (World::getBlock(wx, wy, wz - 1) <= 0)
                    {
                        //if a block does not exist in the z- direction to this one, add the top face.
                        mesh.addFace(i6, i7, i4);
                        mesh.addFace(i7, i5, i4);
                    }
                    //back      z+
                    if (World::getBlock(wx, wy, wz + 1) <= 0)
                    {
                        //if a block does not exist in the z+ direction to this one, add the top face.
                        mesh.addFace(i0, i1, i2);
                        mesh.addFace(i1, i3, i2);
                    }
                    //right     x+
                    if (World::getBlock(wx + 1, wy, wz) <= 0)
                    {
                        //if a block does not exist in the x+ direction to this one, add the top face.
                        mesh.addFace(i1, i7, i3);
                        mesh.addFace(i1, i5, i7);
                    }
                    //left      x-
                    if (World::getBlock(wx - 1, wy, wz) <= 0)
                    {
                        //if a block does not exist in the x- direction to this one, add the top face.
                        mesh.addFace(i2, i6, i4);
                        mesh.addFace(i0, i2, i4);
                    }
                }
            }
        }
    }


    //The rest of this is OpenGL code, and doesn't add any significant
    //performance drop. I have measured this.
    GeometryData gd = MeshHandler::compileGeometry(mesh.vertices.data(), mesh.indices.data(), mesh.vertices.size(), mesh.indices.size());

    RenderableChunk rc;
    rc.pos = chunk->getPos();
    auto a = std::find(chunks.begin(), chunks.end(), rc);
    int index = a - chunks.begin();

    if (a != chunks.end())
    {
        rc = chunks[index];
    }
    else
    {
        GLuint VAO;
        GLuint* VBOs = new GLuint[2];

        //1527864 bytes maximum per chunk (1.5MB)

        glGenVertexArrays(1, &VAO);
        glBindVertexArray(VAO);

        glGenBuffers(2, VBOs);
        glBindBuffer(GL_ARRAY_BUFFER, VBOs[0]);
        glBufferData(GL_ARRAY_BUFFER, sizeof(Vertex) * 8 * MAX_BLOCKS, nullptr, GL_DYNAMIC_DRAW);

        glVertexAttribPointer(ATTRIB_VERTEX_ARRAY, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), BUFFER_OFFSET(offsetof(Vertex, position)));
        glEnableVertexAttribArray(ATTRIB_VERTEX_ARRAY);
        glVertexAttribPointer(ATTRIB_NORMAL_ARRAY, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), BUFFER_OFFSET(offsetof(Vertex, normal)));
        glEnableVertexAttribArray(ATTRIB_NORMAL_ARRAY);
        glVertexAttribPointer(ATTRIB_COLOUR_ARRAY, 4, GL_FLOAT, GL_FALSE, sizeof(Vertex), BUFFER_OFFSET(offsetof(Vertex, colour)));
        glEnableVertexAttribArray(ATTRIB_COLOUR_ARRAY);
        glVertexAttribPointer(ATTRIB_TEXTURE_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), BUFFER_OFFSET(offsetof(Vertex, texture)));
        glEnableVertexAttribArray(ATTRIB_TEXTURE_ARRAY);

        glBindBuffer(GL_ARRAY_BUFFER, 0);

        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, VBOs[1]);
        glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(GLushort) * 36 * MAX_BLOCKS, nullptr, GL_DYNAMIC_DRAW);
        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);

        glBindVertexArray(0);

        rc.VAO = VAO;
        rc.VBOs = VBOs;
    }

    rc.numIndices = gd.numIndices;

    glBindVertexArray(rc.VAO);

    glBindBuffer(GL_ARRAY_BUFFER, rc.VBOs[0]);
    glBufferSubData(GL_ARRAY_BUFFER, 0, gd.vboSize(), gd.vertices);
    glBindBuffer(GL_ARRAY_BUFFER, 0);

    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, rc.VBOs[1]);
    glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, gd.iboSize(), gd.indices);
    glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);

    glBindVertexArray(0);

    if (index >= 0 && index < chunks.size())
    {
        chunks[index] = rc;
    }
    else
    {
        chunks.push_back(rc);
    }
}

使用结构ChunkMesh,我认为问题在于:

struct ChunkMesh
{
    std::vector<Vertex> vertices;
    std::vector<GLushort> indices;

    int addVertex(Vertex v)
    {
        //add a vertex to the mesh, and return its index in the list.
        vertices.push_back(v);
        return vertices.size() - 1;
    }

    void addFace(int v0, int v1, int v2)
    {
        //construct a face with 3 vertices.
        indices.push_back(v0);
        indices.push_back(v1);
        indices.push_back(v2);
    }
};

我认为问题出在ChunkMesh结构中,使用了push_backs。对于数百个push_back来说,std::vector非常慢,但我找不到替代方案。我可以用?

替换矢量

我是否要完全错误地渲染块?如何优化此功能?

非常感谢任何帮助。

感谢。

修改 我试过保留向量,这让我的困惑对性能没有影响。它保持在0.36秒。

我在ChunkMesh添加了一个构造函数来接受块数,如下所示:

ChunkMesh(int numBlocks)
{
    vertices.reserve(numBlocks * 8); //8 vertices per cube
    indices.reserve(numBlocks * 36); //36 indices per cube
}

1 个答案:

答案 0 :(得分:0)

我的建议是评估是否需要不在块表面的顶点。

如果没有,则不需要将它们添加到ChunkMesh,这会减少顶点和push_back调用的数量。