glMultiDrawElementsIndirect很慢

时间:2017-04-26 23:20:27

标签: opengl glsl julia opengl-4

我正在重写旧的渲染管道。 我创建了一个非常精简的原型,并且我很惊讶我的旧的相当复杂且经过严格优化的管道具有与超级简单原型完全相同的性能。

任务渲染1024个任意大小的网格(总共1400万个三角形),每个网格有一组不同的制服。

我现在所做的是使用统一缓冲区+ glMultiDrawElementsIndirect并使用gl_DrawIDARB索引到统一缓冲区。这是渲染循环:

function renderloop(window, N, frame_times, program, commandbuff)
    glUseProgram(program)
    glEnable(GL_DEPTH_TEST)
    glClearColor(1, 1, 1, 1)
    GLAbstraction.bind(commandbuff)
    n = 0
    while isopen(window) && n <= N
        tic()
        glFinish() # make sure we time the right thing
        GLWindow.poll_glfw()
        #glBindVertexArray(vbo.id) doesn't change timing much
        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)
        glMultiDrawElementsIndirect(
            GL_TRIANGLES,
            GL_UNSIGNED_INT,
            C_NULL, length(commandbuff), 0
        )
        #glBindVertexArray(0)
        GLWindow.swapbuffers(window)
        push!(frame_times, toq())
        n += 1
    end
    frame_times
end

我的另一个管道太复杂了,不能在这里写下来,但简而言之,它是未经优化的Julia代码,GLSL 3.0绘制代码,带有制服+光线拾取+ fxaa +几个渲染目标等等。 除了使用统一块等的现代化之外,着色器几乎相同。

这里可以看到新的(几乎)完整的代码:

vert = """
#version 450
#extension GL_ARB_shader_draw_parameters : enable

struct VertexArgument{
    vec4 color;
    mat4 model;
};

layout (location = 0) in vec3 position;
layout (location = 1) in vec3 normal;

layout (std140) uniform Scene{
    vec4 lightposition;
    mat4 proj;
    mat4 view;
    mat4 projview;
    vec2 resolution;
} scene;

layout (std140) uniform VertexArguments{
    VertexArgument[1024] args;
} vertex_arguments;


out VertexOut{
    vec3 vertex;
    vec3 normal;
    vec3 lightdir;
    vec4 color;
} vertex_out;

void main(){
    VertexArgument arg = vertex_arguments.args[gl_DrawIDARB];
    vec4 position_camspace = scene.view * arg.model * vec4(position,  1.0);
    gl_Position = scene.proj * position_camspace;
    vertex_out.lightdir = normalize(vec3(-10) - position.xyz);
    vertex_out.vertex = -position_camspace.xyz;
    vertex_out.normal = normal;
    vertex_out.color = arg.color;
}
"""

frag = """
#version 450

vec3 blinnphong(vec3 V, vec3 N, vec3 L, vec3 color){

    float diff_coeff = max(dot(L,N), 0.0);

    // specular coefficient
    vec3 H = normalize(L+V);

    float spec_coeff = pow(max(dot(H,N), 0.0), 8.0);
    if (diff_coeff <= 0.0)
        spec_coeff = 0.0;

    // final lighting model
    return vec3(
        vec3(0.1) * vec3(0.3)  +
        vec3(0.9) * color * diff_coeff +
        vec3(0.3) * spec_coeff
    );
}

in VertexOut{
    vec3 vertex;
    vec3 normal;
    vec3 lightdir;
    vec4 color;
} vertex_in;

layout (location = 0) out vec4 frag_color;

void main(){
    vec3 L = normalize(vertex_in.lightdir);
    vec3 N = normalize(vertex_in.normal);
    vec3 light1 = blinnphong(vertex_in.vertex, N, L, vertex_in.color.rgb);
    vec3 light2 = blinnphong(vertex_in.vertex, N, -L, vertex_in.color.rgb);
    frag_color = vec4(light1 + light2, 1.0);
}
"""

window = create_glcontext(
    major = 4, minor = 5, debugging = false,
    windowhints = [
        (GLFW.SAMPLES,      0),
        (GLFW.DEPTH_BITS,   32),

        (GLFW.ALPHA_BITS,   8),
        (GLFW.RED_BITS,     8),
        (GLFW.GREEN_BITS,   8),
        (GLFW.BLUE_BITS,    8),

        (GLFW.STENCIL_BITS, 0),
        (GLFW.AUX_BUFFERS,  0)
    ]
)

events = WindowEvents(Window => window)

cam = PerspectiveCamera(
    TranslationSpeed => 1f0,
    LookAt => Vec3f0(0),
    EyePosition => Vec3f0(6, 6, 8),
    Rotation => Vec3f0(0),
    Area => events[Area],
    RotationSpeed => 0.1f0
)

vertshader = compile_shader(Vector{UInt8}(vert), GL_VERTEX_SHADER, :vertexshader)
fragshader = compile_shader(Vector{UInt8}(frag), GL_FRAGMENT_SHADER, :fragshader)

program = compile_program(vertshader, fragshader)

scene = (
    Vec4f0(10),
    cam[Projection],
    cam[View],
    cam[ProjectionView],
    Vec2f0(widths(cam[Area]))
)

scene_buff = UniformBuffer(scene) # create UniformBuffer GL_STATIC_DRAW

FieldTraits.on(cam, ProjectionView) do projview
    # write new values to scene buffer.. if not doing this, timings stay the same
    scene_buff[1] = (
        Vec4f0(10),
        cam[Projection],
        cam[View],
        projview,
        Vec2f0(widths(cam[Area]))
    )
end

vals = (Vec4f0(1, 0, 0, 1), eye(Mat4f0))
uniform_array = UniformBuffer(typeof(vals))

function loadmeshes(folder)
  # load 1024 meshes
    meshpaths = filter(x-> endswith(x, ".ifs"), readdir(folder))[1:1024]
    faces = GLTriangle[]
    vertices = Tuple{Point3f0, Normal{3, Float32}}[]
    fidx = 0; vidx = 0;
    drawcommands = Vector{Command}(length(meshpaths))
    for (i, meshpath) in enumerate(meshpaths)
        mesh = read_ifs(joinpath(folder, meshpath))
        fs, vs = mesh.indexes[1], mesh.parent
        append!(faces, fs)
        ns = normals(vs, fs)
        append!(vertices, zip(vs, ns))
        mini, maxi = extrema(mesh.parent)
        x, y = ind2sub((32, 32), i)
        trans = translationmatrix(Vec3f0(x, y, 0f0))
        s = maximum(maxi .- mini)
        scale = scalematrix(Vec3f0(1f0 ./ s))
    # add uniform attributes to buffer
        push!(uniform_array, (
            Vec4f0(rand(Vec3f0)..., 1f0),
            trans * scale * translationmatrix(-Vec3f0(mini))
        ))
        drawcommands[i] = Command(length(fs) * 3, 1, fidx, vidx, 0)
        fidx += length(fs) * 3; vidx += length(vs)
    end

    vbo = VertexArray(view(vertices, faces)) # vertexarray
    ibuff = GLBuffer(drawcommands, buffertype = GL_DRAW_INDIRECT_BUFFER)
    vbo, ibuff
end

vbo, commandbuff = loadmeshes(homedir() * "/3dstuff/models")
sceneidx = glGetUniformBlockIndex(program, "Scene")
vertex_arts_idx = glGetUniformBlockIndex(program, "VertexArguments")

glUniformBlockBinding(program, sceneidx, 0)
glUniformBlockBinding(program, vertex_arts_idx, 1)
glBindBufferBase(GL_UNIFORM_BUFFER, 0, scene_buff.buffer.id)
glBindBufferBase(GL_UNIFORM_BUFFER, 1, uniform_array.buffer.id)


function renderloop(window, N, frame_times, commandbuff)
    glUseProgram(program)
    glEnable(GL_DEPTH_TEST)
    glClearColor(1, 1, 1, 1)
    GLAbstraction.bind(commandbuff)
    n = 0
    while isopen(window) && n <= N
        tic()
        glFinish() # make sure we time the real thing
        GLWindow.poll_glfw()
        #glBindVertexArray(vbo.id) doesn't change timing much
        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)
        glMultiDrawElementsIndirect(
            GL_TRIANGLES,
            GL_UNSIGNED_INT,
            C_NULL, length(commandbuff), 0
        )
        #glBindVertexArray(0)
        GLWindow.swapbuffers(window)
        push!(frame_times, toq())
        n += 1
    end
    frame_times
end
times = Float64[]
renderloop(window, 2000, times, commandbuff)
mean(times) * 1000 # ~ 14 ms

GPU是FirePro 9100。

旧管道计时:每帧约13毫秒。 新原型:〜15ms和0.2ms没有glMultiDrawElementsIndirect调用。

我也试过打开和关闭vsync并稍微移动代码,时间没有任何区别。新的原型也感觉不太顺畅,所以看起来它不仅仅是一个测量问题。

1 个答案:

答案 0 :(得分:1)

glMultiDrawElementsIndirect(         GL_TRIANGLES,         GL_UNSIGNED_INT,         C_NULL,长度(commandbuff),0     )

此参数应该是您要绘制的元素数量。在此处放置1024,以查看它是否可以解决性能问题。