Question

我正在尝试使用几何着色器将点膨胀成四边形以进行一些简单的2D渲染。大多数帧的渲染完全符合我的预期，但每隔一段时间渲染一些顶点都会使其属性不正确。我花了一些时间从一个包含许多移动部件的更大程序中简化这个，所以我在渲染过程中做了最低限度，但不幸的是仍然有很多设置。完整的代码在这里：

http://pastebin.com/mQyRcTjJ

#!/usr/bin/env python

# Copyright 2011-2013, Andrew Wilson
# Licensed under the MIT license:
# http://www.opensource.org/licenses/MIT

# memglitch.py

from OpenGL import GL
import sys
import pygame
import pygame.image
import pygame.key
import pygame as PG
import numpy
import hashlib
import collections
import ctypes


######## SHADERS ########

vertex_shader = '''\
#version 330

uniform vec2 screen_dimensions;
uniform vec2 cam_position;
uniform float zoom;

layout(location=0) in vec2 position;
layout(location=1) in vec2 size;
layout(location=2) in vec2 other;

out VertexData
{
   vec2 position;
   vec2 size;
   float layer;
   float rotation;

} outData;

void main()
{
    outData.position = position;
    outData.size = size;
    outData.rotation = other.x;
    outData.layer = other.y;
}
'''

geometry_shader = '''\
#version 330
#extension GL_EXT_gpu_shader4 : enable

layout (points) in;
layout (triangle_strip, max_vertices = 4) out;

uniform vec2 screen_dimensions;
uniform vec2 cam_position;
uniform float zoom;

in VertexData
{
   vec2 position;
   vec2 size;
   float rotation;
   float layer;
} vert[];

out FragData
{
    smooth vec2 texcoord;
    smooth float layer;
} vertOut;

vec4 calcPosition(in vec2 pos)
{
    // Transform a position in world-space into screen-space
    vec4 result;
    result.xy =
        (
            pos
            - cam_position
        )
        * zoom
        / screen_dimensions;
    result.zw = vec2(0.0, 1.0);
    return result;
}

void main()
{
    // Inflate each input point into a quad.
    float r = vert[0].rotation;
    mat2 rotation_matrix = mat2(cos(r), -sin(r), sin(r), cos(r));
    vec2 currentPos;
    vec4 texcoords = vec4(0,0,1,1);

    currentPos = vert[0].position + vert[0].size * vec2(-0.5, -0.5) * rotation_matrix;
    gl_Position = calcPosition(currentPos);
    vertOut.texcoord = texcoords.xy;
    vertOut.layer = vert[0].layer;
    gl_PrimitiveID = gl_PrimitiveIDIn;
    EmitVertex();

    currentPos = vert[0].position + vert[0].size * vec2(-0.5, 0.5) * rotation_matrix;
    gl_Position = calcPosition(currentPos);
    vertOut.texcoord = texcoords.xw;
    vertOut.layer = vert[0].layer;
    gl_PrimitiveID = gl_PrimitiveIDIn;
    EmitVertex();

    currentPos = vert[0].position + vert[0].size * vec2(0.5, -0.5) * rotation_matrix;
    gl_Position = calcPosition(currentPos);
    vertOut.texcoord = texcoords.zy;
    vertOut.layer = vert[0].layer;
    gl_PrimitiveID = gl_PrimitiveIDIn;
    EmitVertex();

    currentPos = vert[0].position + vert[0].size * vec2(0.5, 0.5) * rotation_matrix;
    gl_Position = calcPosition(currentPos);
    vertOut.texcoord = texcoords.zw;
    vertOut.layer = vert[0].layer;
    gl_PrimitiveID = gl_PrimitiveIDIn;
    EmitVertex();
}

'''

fragment_shader = '''\
#version 330
#extension GL_EXT_gpu_shader4 : enable

uniform sampler2DArray texture_atlas;

uniform float zoom;

in FragData
{
    smooth vec2 texcoord;
    smooth float layer;
};

layout(location=0) out vec4 fragcolor;

void main()
{
    fragcolor = texture(
        texture_atlas,
        vec3(texcoord, float(layer)));
}
'''


######## TEXTURE_SETUP ########

def make_texture_array(
        image,
        across=8,
        down=8):
    '''
    Split up an input image with a grid and assemble a
    texture array from all of the sub-images.
    '''

    source_width, source_height = image.get_size()
    width = source_width // across
    height = source_height // down
    subpixels = []

    for y in xrange(down):
        for x in xrange(across):
            subimage = image.subsurface((x*width, y*height, width, height))
            subpixels.append(pygame.image.tostring(subimage, "RGBA", True))

    pixels = "".join(subpixels)

    texture = GL.glGenTextures(1)
    GL.glBindTexture(GL.GL_TEXTURE_2D_ARRAY, texture)

    def tex_param(name, value):
        GL.glTexParameteri(GL.GL_TEXTURE_2D_ARRAY, name, value)

    tex_param(GL.GL_TEXTURE_MIN_FILTER, GL.GL_NEAREST)
    tex_param(GL.GL_TEXTURE_MAG_FILTER, GL.GL_NEAREST)
    tex_param(GL.GL_TEXTURE_WRAP_S, GL.GL_CLAMP_TO_EDGE)
    tex_param(GL.GL_TEXTURE_WRAP_T, GL.GL_CLAMP_TO_EDGE)
    tex_param(GL.GL_TEXTURE_BASE_LEVEL, 0)
    tex_param(GL.GL_TEXTURE_MAX_LEVEL, 0)

    targetformat = GL.GL_RGBA8
    sourceformat = GL.GL_RGBA

    GL.glTexImage3D(
        GL.GL_TEXTURE_2D_ARRAY,
        0,
        targetformat,
        width,
        height,
        across*down,
        0,
        sourceformat,
        GL.GL_UNSIGNED_BYTE,
        pixels)

    return texture


######## SHADER SETUP ########

def create_shader_program(resources):
    '''
    Compile the shader program. Populates resources.shader_program_object
    with the OpenGL program object and active_uniforms with a dictionary
    mapping uniform names to locations.
    '''
    writelog=sys.stderr.write
    shaders = []
    def compile_shader(source, gltype, name):
        writelog("Compiling {0} shader...\n".format(name))
        shader = make_shader(gltype, source)
        infolog = GL.glGetShaderInfoLog(shader)
        if len(infolog)==0:
            writelog("...completed\n")
        else:
            writelog("...completed with messages:\n")
            writelog(infolog)
            writelog("\n")
        shaders.append(shader)
    compile_shader(vertex_shader, GL.GL_VERTEX_SHADER, 'vertex')
    compile_shader(fragment_shader, GL.GL_FRAGMENT_SHADER, 'fragment')
    compile_shader(geometry_shader, GL.GL_GEOMETRY_SHADER, 'geometry')
    writelog("Compiling shader program...\n")
    program = make_program(*shaders)
    infolog = GL.glGetProgramInfoLog(program)
    if len(infolog)==0:
        writelog("...completed\n")
    else:
        writelog("...completed with messages:\n")
        writelog(infolog)
        writelog("\n")

    active_uniforms = GL.glGetProgramiv(program, GL.GL_ACTIVE_UNIFORMS)
    resources.uniform_locations = {}
    for i in range(active_uniforms):
        name, size, data_type = GL.glGetActiveUniform(program, i)
        resources.uniform_locations[name] = i
    resources.shader_program_object = program

def make_shader(shadertype, source):
    '''
    Compile and return an OpenGL shader object.
    '''
    shader = GL.glCreateShader(shadertype)
    GL.glShaderSource(shader, source)
    GL.glCompileShader(shader)
    retval = ctypes.c_uint(GL.GL_UNSIGNED_INT)
    GL.glGetShaderiv(shader, GL.GL_COMPILE_STATUS, retval)
    if not retval:
        print >> sys.stderr, "Failed to compile shader."
        print GL.glGetShaderInfoLog(shader)
        GL.glDeleteShader(shader)
        raise Exception("Failed to compile shader.")
    return shader

def make_program(*shaders):
    '''
    Compile and return an OpenGL program object.
    '''
    program = GL.glCreateProgram()
    for shader in shaders:
        GL.glAttachShader(program, shader)
    GL.glLinkProgram(program)
    retval = ctypes.c_int()
    GL.glGetProgramiv(program, GL.GL_LINK_STATUS, retval)
    if not retval:
        print >> sys.stderr, "Failed to link shader program."
        print GL.glGetProgramInfoLog(program)
        GL.glDeleteProgram(program)
        raise Exception("Failed to link shader program.")
    return program


######## RESOURCE ALLOCATION ########

class Resources(object):
    pass

def make_resources(screen_dimensions):
    loadimg = pygame.image.load
    spacemen_image = loadimg('diagnostic_numbers.png')

    resources = Resources()
    vertex_dtype = numpy.dtype([
        ("position", ("f4", 2)),
        ("size", ("f4", 2)),
        ("other", ("f4", 2))])
    resources.vertex_stride = 24
    resources.position_stream_offset = 0
    resources.size_stream_offset = 8
    resources.other_stream_offset = 16
    resources.vertex_array = numpy.zeros(512, dtype=vertex_dtype)
    resources.spacemen_texture = make_texture_array(spacemen_image, 16, 16)

    create_shader_program(resources)

    resources.array_buffer = GL.glGenBuffers(1)

    w,h = screen_dimensions
    resources.save_buffer = numpy.zeros((h,w,4),dtype="u1")

    return resources


######## SCREENSHOT #########

# pygame.surfarray.make_surface is broken in 1.9.1. It reads uninitialized
# stack contents on 64-bit systems. :( Here we use numpy to do the copying
# instead.
def make_surface(array):
    w,h,depth = array.shape
    if depth == 4:
        surf = pygame.Surface((w,h), depth=32, flags=pygame.SRCALPHA)
        pixels = pygame.surfarray.pixels3d(surf)
        pixels[:,:,:] = array[:,:,:3]
        alpha = pygame.surfarray.pixels_alpha(surf)
        alpha[:,:] = array[:,:,3]
    elif depth == 3:
        surf = pygame.Surface((w,h), depth=32)
        pixels = pygame.surfarray.pixels3d(surf)
        pixels[:,:,:depth] = array
    else:
        raise ValueError("Array must have minor dimension of 3 or 4.")
    return surf

class Screenshotter(object):
    '''
    Captures screenshots from OpenGL and records them by SHA1 hash.
    '''
    def __init__(self, save_buffer, screen_dimensions):
        self.hashes_seen = collections.Counter()
        self.save_buffer = save_buffer
        self.screen_dimensions = screen_dimensions
    def get_filename(self, screen_hash):
        return screen_hash + ".out.png"
    def take_screenshot(self):
        w,h = self.screen_dimensions
        save_buffer = self.save_buffer
        GL.glReadPixels(0, 0, w, h, GL.GL_RGBA, GL.GL_UNSIGNED_BYTE, self.save_buffer)
        byte_view = save_buffer.view("u1")
        screen_hash = hashlib.sha1(byte_view).hexdigest()
        if self.hashes_seen[screen_hash] == 0:
            oriented = numpy.swapaxes(save_buffer, 0, 1)[:,::-1,:]
            surf = make_surface(oriented)
            filename = self.get_filename(screen_hash)
            pygame.image.save(surf,filename)
            print filename
        self.hashes_seen[screen_hash] += 1
    def print_summary(self):
        for screen_hash, count in sorted(self.hashes_seen.items(), key=lambda(h,c):-c):
            print "{0} {1}".format(self.get_filename(screen_hash), count)


######## RENDERING ########

def prepare_context(resources, zoom, screen_dimensions):
    '''
    Prepare the OpenGL context for rendering.
    '''
    uniforms = resources.uniform_locations
    screen_w, screen_h = screen_dimensions

    GL.glViewport(0,0,screen_w,screen_h)

    GL.glEnable(GL.GL_BLEND)
    GL.glBlendFunc(GL.GL_SRC_ALPHA, GL.GL_ONE_MINUS_SRC_ALPHA)

    GL.glUseProgram(resources.shader_program_object)

    GL.glUniform2f(uniforms['cam_position'], 0, 0)
    GL.glUniform1f(uniforms['zoom'], zoom)
    GL.glUniform2f(uniforms['screen_dimensions'], screen_w, screen_h)
    GL.glActiveTexture(GL.GL_TEXTURE0)
    GL.glBindTexture(GL.GL_TEXTURE_2D_ARRAY, resources.spacemen_texture)
    GL.glUniform1i(uniforms['texture_atlas'], 0)

    GL.glBindBuffer(GL.GL_ARRAY_BUFFER, resources.array_buffer)

    GL.glBufferData(GL.GL_ARRAY_BUFFER, resources.vertex_array.nbytes, resources.vertex_array, GL.GL_STATIC_DRAW)

    GL.glEnableVertexAttribArray(0)
    GL.glEnableVertexAttribArray(1)
    GL.glEnableVertexAttribArray(2)
    GL.glVertexAttribPointer(
            0, 2, GL.GL_FLOAT, GL.GL_FALSE, resources.vertex_stride,
            ctypes.cast(resources.position_stream_offset, ctypes.c_void_p))
    GL.glVertexAttribPointer(
            1, 2, GL.GL_FLOAT, GL.GL_FALSE, resources.vertex_stride,
            ctypes.cast(resources.size_stream_offset, ctypes.c_void_p))
    GL.glVertexAttribPointer(
            2, 2, GL.GL_FLOAT, GL.GL_FALSE, resources.vertex_stride,
            ctypes.cast(resources.other_stream_offset, ctypes.c_void_p))

def render(resources, zoom, vertex_count):
    '''
    Render one frame.
    '''
    GL.glClearColor(0.4, 0.4, 0.4, 1.0)
    GL.glClear(GL.GL_COLOR_BUFFER_BIT)
    GL.glDrawArrays(
        GL.GL_POINTS,
        0,
        vertex_count)
    pygame.display.flip()


######## MAIN LOOP ########

def main():
    video_flags = PG.OPENGL|PG.DOUBLEBUF
    pygame.init()
    screen_dimensions = 512, 256
    pygame.display.set_mode(screen_dimensions, video_flags)
    resources = make_resources(screen_dimensions)
    frames = 3000
    done = 0
    zoom = 32.0
    vertex_count = 512
    screenshotter = Screenshotter(resources.save_buffer, screen_dimensions)
    for i in xrange(vertex_count):
        scale = 32.0
        y = (15 - i // 32) / 32.0 * scale - scale/4.0 + (scale/2.0/32.0)
        x = (i % 32) / 32.0 * scale - scale/2.0 + (scale/2.0/32.0)
        xx = i // 2
        lo = xx % 16
        hi = (xx // 16) % 16
        flavour = hi if i%2==0 else lo
        resources.vertex_array[i] =  ((x,y), (1,1), (0, flavour))

    prepare_context(resources, zoom, screen_dimensions)

    for i in xrange(frames):
        if done:
            break
        if i%100==0:
            print "{0}/{1}".format(i, frames)
        while 1:
            event = pygame.event.poll()
            if event.type == PG.NOEVENT:
                break
            if event.type == PG.QUIT:
                done = 1
        render(resources, zoom, vertex_count)
        screenshotter.take_screenshot()
    print "---"
    screenshotter.print_summary()

if __name__ == '__main__':
    main()

它还需要在工作目录中输入名为diagnostic_numbers.png的PNG：

以下是它通常显示的示例：

Intended rendering

每个方块都是一个单独的点，通过几何着色器膨胀为四边形。每个输入顶点都有一个2D位置（第一个顶点在左上角，然后它们在屏幕下排成行），一个大小（它们都是宽度和高度1）一个旋转（它们都有旋转0）和一个层（0-15）。该图层确定要从中渲染纹理数组的哪一层。

但是，对于某些顶点，某些帧会使用不正确的图层进行渲染。 E.g：

A glitch

由于顶点全部按顺序排列，因此可以看到每个毛刺是一个由8个相邻顶点组成的块。这似乎总是如此。还有一种情况是，已经使用顶点块顶点的层值来渲染已设置的突出顶点，这些顶点正好在数组中先前的80个顶点开始。在每种情况下，我都检查过这是真的。我也不认为我曾经在第一个 ~~128~~ 256个顶点（屏幕的上半部分）内观察到故障 - 它只会在此后的某个地方开始。

每一帧，这是运行*：

的所有GL代码

def render(resources, zoom, vertex_count):
    '''
    Render one frame.
    '''
    GL.glClearColor(0.4, 0.4, 0.4, 1.0)
    GL.glClear(GL.GL_COLOR_BUFFER_BIT)
    GL.glDrawArrays(
        GL.GL_POINTS,
        0,
        vertex_count)
    pygame.display.flip()

程序将运行3000帧，计算每个渲染帧的SHA1散列并为每个不同的输出帧保存PNG。以下是我的机器上典型控制台输出的示例：

Compiling vertex shader...
...completed
Compiling fragment shader...
...completed
Compiling geometry shader...
...completed
Compiling shader program...
...completed
0/3000
6fdbf7d09076ef084e57b90d7d445d2e56c54ab8.out.png
100/3000
200/3000
300/3000
3c7558f5a257c6b53fae9815df0ee8d457db9b19.out.png
400/3000
500/3000
66edb4d0fb88951af944c717c37b92f5cfa37cd0.out.png
4c3844a6879af3992081807e1e429e8ac83753f5.out.png
c538c6cddea6c6f53c3c968ebf8ab46bceb017f9.out.png
600/3000
700/3000
800/3000
900/3000
1000/3000
443afa3ee4c28611f0cc6b6a60712de503e34f8e.out.png
1100/3000
1200/3000
1300/3000
1400/3000
1500/3000
231d09f859aac29aef23d0c590187071e4fad321.out.png
1600/3000
1700/3000
1800/3000
df3051821a6d8327c77cfcf3d8053fdfcaf13e32.out.png
1900/3000
50ac6e618e5b58a8709baf557a757d62b041ef36.out.png
c53a53ba0f4d2401094cf25b271ab2e50b3909bf.out.png
2000/3000
5cb6c6989c24dc16e123d74f46985ebb243b2935.out.png
183cb8faaf1241526bd74e1f2ca65e6d89ab6c74.out.png
2100/3000
3666fcaac2d9d9b555c23367e5f01bb4f435cb65.out.png
2200/3000
69e14d278f2e340f50acc6274922d0cecc932ecd.out.png
1620c62c2eb151244ce929aa37a7dbc97c5def54.out.png
2300/3000
2400/3000
5932cf4ce48ad73bee1b35dd88b59d745ac3c493.out.png
2500/3000
2600/3000
dce25b06206bf0e671fb46e3365b9a42f1146813.out.png
2700/3000
2800/3000
2900/3000
---
6fdbf7d09076ef084e57b90d7d445d2e56c54ab8.out.png 2821
c538c6cddea6c6f53c3c968ebf8ab46bceb017f9.out.png 93
3c7558f5a257c6b53fae9815df0ee8d457db9b19.out.png 46
5cb6c6989c24dc16e123d74f46985ebb243b2935.out.png 12
df3051821a6d8327c77cfcf3d8053fdfcaf13e32.out.png 7
183cb8faaf1241526bd74e1f2ca65e6d89ab6c74.out.png 4
c53a53ba0f4d2401094cf25b271ab2e50b3909bf.out.png 3
69e14d278f2e340f50acc6274922d0cecc932ecd.out.png 3
4c3844a6879af3992081807e1e429e8ac83753f5.out.png 3
dce25b06206bf0e671fb46e3365b9a42f1146813.out.png 1
1620c62c2eb151244ce929aa37a7dbc97c5def54.out.png 1
5932cf4ce48ad73bee1b35dd88b59d745ac3c493.out.png 1
443afa3ee4c28611f0cc6b6a60712de503e34f8e.out.png 1
3666fcaac2d9d9b555c23367e5f01bb4f435cb65.out.png 1
66edb4d0fb88951af944c717c37b92f5cfa37cd0.out.png 1
231d09f859aac29aef23d0c590187071e4fad321.out.png 1
50ac6e618e5b58a8709baf557a757d62b041ef36.out.png 1

大多数帧都按预期渲染，但是相当多的帧表现出了毛刺，并且一些毛刺模式更常见。

如果要运行代码，则需要Python 2.7，pygame，PyOpenGL和numpy。在Ubuntu上，我安装的软件包是python-numpy，python-opengl和python-pygame。

我在64位Ubuntu上的NVIDIA驱动程序版本310和313上尝试过它，并得到了相同的结果。我的硬件（如lspci所示）是“NVIDIA Corporation G98M [GeForce 9300M GS]”。

我不确定现在要检查什么。我想我已经正确上传了顶点数据，因为它至少在某些时候正确呈现，我只在开始时上传一次。但是我每帧都这么做，我不认为我在那里做错了。着色器编译器没有警告。我接下来应该尝试什么？它有可能是驱动程序错误吗？我怎么知道它是不是？

* - 除了捕获屏幕截图的代码，但可以禁用，并且仍然会发生故障。

我尝试过的事情：

将EndPrimitive（）添加到几何着色器的末尾。没有区别。
重新排列顶点数组中的字段。没有区别。
在着色器中为属性指定不同的位置。如果我将位置放在位置2，则毛刺将影响位置的y元素。

Answer 1

我没有运行你的代码，坦率地说，它对于通灵调试来说太复杂了。但是这个问题一直存在，实际上并不容易处理。以下是一些帮助我的方法：

在你的调试中非常正式，有点像。记录您尝试的内容以及发生的事情。一次只能更改一个变量。
Sprinkle glGetError断言所有地方。永远不要在没有断言的情况下调用glGetError。
跟踪你的滑动。 glIs ..应该始终符合你的想象。
始终运行单线程。
这样，运行几个不同的驱动程序和hw。这解决了99％的案件。
不要先责怪司机。有驱动程序错误，但通常只是最方便的借口，而不是最可能的问题。
仔细查找与其他工作代码不同的内容。
一个问题是您正在使用额外的抽象级别。是GL中还是python绑定中的错误？如果可以的话，至少要看一下绑定的代码。可能只是在评论图书馆做一些奇怪的事情。最后，排除库的唯一方法是直接写C或你自己的绑定。
让您的代码可移植。在Windows和Mac和Linux上运行它（奖金android和ios）。跨平台工作的代码不仅确保您只访问经过良好测试的代码路径，而且还可以获得三个OS层检查的好处。
尝试不同的硬件，操作系统和驱动程序版本。
使用供应商调试工具。人们提到了gDebugger，PerfHud，但尝试了所有这些。 Apple的乐器很好。 Qualcom的Adreno工具也是如此。它们都非常挑剔，而且往往所有以前的要点都不如完全设置其中一个以便你理解它足以让你信任它。
构建一个最小的repro案例并向供应商提交错误。这个时间框架通常太长而无法使用。如果供应商接受错误，请务必要求解决方法。从错误修复到公众修复可能很容易两年。

希望这会有所帮助。这很难也很奇怪。与CPU调试不同。

Answer 2

我能够在Windows 8上使用驱动程序版本320.49在我的GTX 660 Ti上成功运行程序。我从来没有能够像你发布的屏幕截图那样出现故障。我不知道你是否尝试过这个，但可能值得看看会发生什么。

你的geom着色器中的

vertOut.layer = vert[0].layer;。您是否尝试将其设置为常量值0？如果是这样，它会被传递到片段着色器，没有任何问题，能够运行整个3000帧没有任何故障？如果这样可行，你知道它可能不是纹理数组搞砸了，并且与图层值的计算有关。如果这显示出毛刺，那么纹理数组可能会在沿线的某处乱搞，并且你的geom着色器很好。

Answer 3

这可能不是一个完全回答这个问题的答案，但我在我的openGL应用程序中遇到了奇怪的故障，当我寻找答案时，我登陆了这个页面。我发现我的问题的解决方案很可能是我给了glDrawArrays（...）一个太高的错误顶点数。

如何诊断奇怪的OpenGL故障？

3 个答案: