在AMD上缓慢的模板纹理

时间:2017-09-19 15:46:47

标签: opengl fbo stencil-buffer

我试图使用我绑定并在光交互片段着色器中使用的FBO +模板纹理附件为修改后的Doom3引擎添加软阴影。 它工作得很好,但在Radeon 460上存在严重的性能问题(我没有其他AMD GPU,但怀疑它相同或更差,因为它相对较差)新)。

关于最新的司机。

fps下降非常糟糕,实际上qglCopyTexImage2D对另一个纹理(每个光线!)实际上比绑定FBO中使用的模板纹理更快。

另一个问题是,当我尝试使用qglCopyTexImage2D优化qglCopyTexSubImage2D时,它会开始闪烁。

有关其他程序员的模板纹理的任何实用建议吗?

nVidia和英特尔在速度方面表现都很出色。

        globalImages->currentRenderImage->Bind();
        globalImages->currentRenderImage->uploadWidth = curWidth; // used as a shader param
        globalImages->currentRenderImage->uploadHeight = curHeight;
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
        qglTexImage2D( GL_TEXTURE_2D, 0, r_fboColorBits.GetInteger() == 15 ? GL_RGB5_A1 : GL_RGBA, curWidth, curHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL ); //NULL means reserve texture memory, but texels are undefined

        globalImages->currentRenderFbo->Bind();
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
        qglTexImage2D( GL_TEXTURE_2D, 0, r_fboColorBits.GetInteger() == 15 ? GL_RGB5_A1 : GL_RGBA, curWidth, curHeight, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL ); //NULL means reserve texture memory, but texels are undefined

        if ( glConfig.vendor != glvAny ) { 
            globalImages->currentStencilFbo->Bind();
            globalImages->currentStencilFbo->uploadWidth = curWidth;
            globalImages->currentStencilFbo->uploadHeight = curHeight;
            qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
            qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
            qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
            qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
            qglTexImage2D( GL_TEXTURE_2D, 0, GL_STENCIL_INDEX8, curWidth, curHeight, 0, GL_STENCIL_INDEX, GL_UNSIGNED_BYTE, 0 );
        }

        globalImages->currentDepthImage->Bind();
        globalImages->currentDepthImage->uploadWidth = curWidth; // used as a shader param
        globalImages->currentDepthImage->uploadHeight = curHeight;
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE );
        qglTexParameterf( GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE );
        if ( glConfig.vendor == glvIntel ) { // FIXME allow 24-bit depth for low-res monitors
            qglTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_COMPONENT16, curWidth, curHeight, 0, GL_DEPTH_COMPONENT, GL_FLOAT, 0 );
        } else {
            qglTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_STENCIL, curWidth, curHeight, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 0 );
        }
    }

    // (re-)attach textures to FBO
    if ( !fboId || r_fboSharedColor.IsModified() || r_fboSharedDepth.IsModified() ) {
        // create a framebuffer object, you need to delete them when program exits.
        if ( !fboId )
            qglGenFramebuffers( 1, &fboId );
        qglBindFramebuffer( GL_FRAMEBUFFER_EXT, fboId );
        // attach a texture to FBO color attachement point
        qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, globalImages->currentRenderImage->texnum, 0 );
        // attach a renderbuffer to depth attachment point
        GLuint depthTex = r_fboSharedDepth.GetBool() ? globalImages->currentDepthImage->texnum : globalImages->currentDepthFbo->texnum;
        qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, depthTex, 0 );
        if ( glConfig.vendor == glvIntel ) // separate stencil, thank God
            qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, globalImages->currentStencilFbo->texnum, 0 );
        else
            qglFramebufferTexture2D( GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, depthTex, 0 );
        int status = qglCheckFramebufferStatus( GL_FRAMEBUFFER );
        if ( GL_FRAMEBUFFER_COMPLETE != status ) { // something went wrong, fall back to default
            common->Printf( "glCheckFramebufferStatus %d\n", status );
            qglDeleteFramebuffers( 1, &fboId );
            fboId = 0; // try from scratch next time
            r_useFbo.SetBool( false );
        }
        qglBindFramebuffer( GL_FRAMEBUFFER, 0 ); // not obvious, but let it be 
    }
    qglBindFramebuffer( GL_FRAMEBUFFER, fboId );
    qglClear( GL_COLOR_BUFFER_BIT ); // otherwise transparent skybox blends with previous frame
    fboUsed = true;
    GL_CheckErrors();
}

/*
 Soft shadows vendor specific implementation
 Intel: separate stencil, direct access, fastest
 nVidia: combined stencil & depth, direct access, fast
 AMD: combined stencil & depth, direct access very slow, resorting to stencil copy
 */

void FB_CopyStencil() { // duzenko: why, AMD? WHY?? 
    if ( glConfig.vendor != glvAMD || !r_softShadows.GetBool() )
        return;
    globalImages->currentStencilFbo->Bind();
    qglCopyTexImage2D( GL_TEXTURE_2D, 0, GL_DEPTH_STENCIL, 0, 0, glConfig.vidWidth, glConfig.vidHeight, 0 );
    /*globalImages->currentDepthFbo->Bind();
    idScreenRect& r = backEnd.currentScissor;
    //qglCopyTexSubImage2D( GL_TEXTURE_2D, 0, r.x1, r.y1, r.x1, r.y1, r.x2 - r.x1 + 1, r.y2 - r.y1 + 1 );*/
    GL_CheckErrors();
}

void FB_BindStencilTexture() {
    const GLenum GL_DEPTH_STENCIL_TEXTURE_MODE = 0x90EA;
    idImage* stencil = glConfig.vendor != glvAny ? globalImages->currentStencilFbo : globalImages->currentDepthImage;
    stencil->Bind();
    if ( glConfig.vendor != glvIntel )
        glTexParameteri( GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX );
}

1 个答案:

答案 0 :(得分:0)

我最终得到了两个帧缓冲区:一个用于阴影,另一个用于其他所有帧缓冲区。 阴影纹理在前者中是FBO附件,在后者中绑定为texture2D。