优化WebGL着色器?

时间:2017-02-14 20:20:55

标签: opengl-es glsl webgl shader raytracing

我编写了以下着色器来渲染带有一堆同心圆的图案。最后,我希望每个旋转球体都是一个光发射器,以便在these lines上创建一些东西。

当然,我现在正在做最基本的部分来渲染不同的对象。

不幸的是,着色器非常慢(在高端macbook上全屏16fps)。我很确定这是因为我在着色器中有许多for循环和分支。我想知道如何以更加性能优化的方式实现我想要实现的几何体:

编辑:您可以在此处运行着色器:https://www.shadertoy.com/view/lssyRH

我遗漏的一个明显的优化是,目前所有的片段都是针对整个24个周围的圈子进行检查的。通过检查片段是否与图的外边界相交,完全放弃这些检查将非常快速和容易。我想我只是想弄清楚最佳做法是如何做这样的事情。

#define N 10
#define M 5
#define K 24
#define M_PI 3.1415926535897932384626433832795

void mainImage( out vec4 fragColor, in vec2 fragCoord )
{
    float aspectRatio = iResolution.x / iResolution.y;

    float h = 1.0;
    float w = aspectRatio;

    vec2 uv = vec2(fragCoord.x / iResolution.x * aspectRatio, fragCoord.y / iResolution.y); 

    float radius = 0.01;
    float orbitR = 0.02;
    float orbiterRadius = 0.005;
    float centerRadius = 0.002;
    float encloseR = 2.0 * orbitR;
    float encloserRadius = 0.002;
    float spacingX = (w / (float(N) + 1.0));
    float spacingY = h / (float(M) + 1.0);
    float x = 0.0;
    float y = 0.0;
    vec4 totalLight = vec4(0.0, 0.0, 0.0, 1.0);
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < M; j++) {
            // compute the center of the diagram
            vec2 center = vec2(spacingX * (float(i) + 1.0), spacingY * (float(j) + 1.0));
            x =  center.x + orbitR * cos(iGlobalTime);
            y =  center.y + orbitR * sin(iGlobalTime);
            vec2 bulb = vec2(x,y);
            if (length(uv - center) < centerRadius) {
                // frag intersects white center marker                   
                fragColor = vec4(1.0);
                return;               
            } else if (length(uv - bulb) < radius) {
                // intersects rotating "light"
                fragColor = vec4(uv,0.5+0.5*sin(iGlobalTime),1.0);
                return;
            } else {
                // intersects one of the enclosing 24 cylinders
                for(int k = 0; k < K; k++) {
                    float theta = M_PI * 2.0 * float(k)/ float(K);
                    x = center.x + cos(theta) * encloseR;
                    y = center.y + sin(theta) * encloseR;
                    vec2 encloser = vec2(x,y);
                    if (length(uv - encloser) < encloserRadius) {
                        fragColor = vec4(uv,0.5+0.5*sin(iGlobalTime),1.0);
                    return;
                    }
                }   
            }
        }
    }


}

2 个答案:

答案 0 :(得分:1)

请记住,您希望优化片段着色器,并且只需要片段着色器:

  1. sin(iGlobalTime)cos(iGlobalTime)移出循环,这些在整个绘制调用中保持静态,因此无需在每次循环迭代时重新计算它们。
  2. GPU尽可能使用矢量化指令集(SIMD),利用它。您通过执行多个标量操作浪费了大量周期,您可以使用单个向量指令(请参阅带注释的代码)
  3. 您的半径检查是否平方,在您真正需要时保存该sqrt(length
  4. 使用浮点常量替换常量的浮动转换(您的循环限制)(智能着色器编译器已经执行此操作,但不是值得信赖的事情)
  5. 在着色器中没有未定义的行为(不写入gl_FragColor)
  6. 以下是着色器的优化和注释版本(仍包含未定义的行为,就像您提供的那样)。注释的形式为:

    // annotation
    // old code, if any
    new code
    
    #define N 10
    // define float constant N
    #define fN 10.
    #define M 5
    // define float constant M
    #define fM 5.
    #define K 24
    // define float constant K
    #define fK 24.
    #define M_PI 3.1415926535897932384626433832795
    // predefine 2 times PI
    #define M_PI2 6.28318531
    
    void mainImage( out vec4 fragColor, in vec2 fragCoord )
    {
        float aspectRatio = iResolution.x / iResolution.y;
    
        // we dont need these separate
        // float h = 1.0;
        // float w = aspectRatio;
    
        // use vector ops(2 divs 1 mul => 1 div 1 mul)
        // vec2 uv = vec2(fragCoord.x / iResolution.x * aspectRatio, fragCoord.y / iResolution.y); 
        vec2 uv = fragCoord.xy / iResolution.xy;
        uv.x *= aspectRatio;
    
        // most of the following declarations should be predefined  or marked as "const"...
    
        float radius = 0.01;
        // precalc squared radius
        float radius2 = radius*radius;
        float orbitR = 0.02;
        float orbiterRadius = 0.005;
        float centerRadius = 0.002;
        // precalc squared center radius
        float centerRadius2 = centerRadius * centerRadius;
        float encloseR = 2.0 * orbitR;
        float encloserRadius = 0.002;
        // precalc squared encloser radius
        float encloserRadius2 = encloserRadius * encloserRadius;
    
        // Use float constants and vector ops here(2 casts 2 adds 2 divs => 1 add 1 div)
        // float spacingX = w / (float(N) + 1.0);
        // float spacingY = h / (float(M) + 1.0);
        vec2 spacing = vec2(aspectRatio, 1.0) / (vec2(fN, fM)+1.);
    
        // calc sin and cos of global time
        // saves N*M(sin,cos,2 muls) 
        vec2 stct = vec2(sin(iGlobalTime), cos(iGlobalTime));
        vec2 orbit = orbitR * stct;
    
        // not needed anymore
        // float x = 0.0;
        // float y = 0.0;
    
        // was never used
        // vec4 totalLight = vec4(0.0, 0.0, 0.0, 1.0);
    
        for (int i = 0; i < N; i++) {
            for (int j = 0; j < M; j++) {
                // compute the center of the diagram
                // Use vector ops
                // vec2 center = vec2(spacingX * (float(i) + 1.0), spacingY * (float(j) + 1.0));
                vec2 center = spacing * (vec2(i,j)+1.0);
    
                // Again use vector opts, use precalced time trig(orbit = orbitR * stct)
                // x = center.x + orbitR * cos(iGlobalTime);
                // y = center.y + orbitR * sin(iGlobalTime);
                // vec2 bulb = vec2(x,y);
                vec2 bulb = center + orbit;
                // calculate offsets
                vec2 centerOffset = uv - center;
                vec2 bulbOffset = uv - bulb;
                // use squared length check
                // if (length(uv - center) < centerRadius) {
                if (dot(centerOffset, centerOffset) < centerRadius2) {
                    // frag intersects white center marker                   
                    fragColor = vec4(1.0);
                    return;               
                // use squared length check
                // } else if (length(uv - bulb) < radius) {
                } else if (dot(bulbOffset, bulbOffset) < radius2) {
                    // Use precalced sin global time in stct.x
                    // intersects rotating "light"
                    fragColor = vec4(uv,0.5+0.5*stct.x,1.0);
                    return;
                } else {
                    // intersects one of the enclosing 24 cylinders
                    for(int k = 0; k < K; k++) {
                        // use predefined 2*PI and float K
                        float theta = M_PI2 * float(k) / fK;
                        // Use vector ops(2 muls 2 adds => 1 mul 1 add)
                        // x = center.x + cos(theta) * encloseR;
                        // y = center.y + sin(theta) * encloseR;
                        // vec2 encloser = vec2(x,y);
                        vec2 encloseOffset = uv - (center + vec2(cos(theta),sin(theta)) * encloseR);
                        if (dot(encloseOffset,encloseOffset) < encloserRadius2) {
                            fragColor = vec4(uv,0.5+0.5*stct.x,1.0);
                            return;
                        }
                    }   
                }
            }
        }
    }
    

答案 1 :(得分:0)

我做了一点思考......我意识到优化它的最佳方法是实际更改逻辑,以便在对小圆圈进行交叉测试之前检查圆圈组的边界。这让它以60fps运行:

此处示例: https://www.shadertoy.com/view/lssyRH