Compute Shader无法针对特定(有效)代码进行编译

时间:2019-01-24 19:32:59

标签: directx shader hlsl

我正在使用条件循环,分支和使用我的计算着色器(DX11)CS 5.0更新UAV时遇到奇怪的行为。 我想通过Sparse Voxel Octtree砖进行一次幼稚的漫射。数据通过BFS线性化。结构(LWSVO =轻量级SVO)如下:

struct LWSVO
{
    int firstChild;
    int bitfield; 
    int level;
    int m_colour;
    int m_normal;
    float3 m_position;
};

firstChild 是任何节点的第一个孩子的索引。如果是叶子,则第一个子代为-1。 bitfield 是有效(1)和无效(0)子节点的位置。我还将数据从压缩的24字节较小结构解压缩到此结构。我已经全面检查了拆包的每个部分(实际上是着色器中使用的每个单独功能)。一切正常,我已经根据实际数据进行了检查。

Althoguh以下信息不是很相关,但是这是算法的工作原理(它在CPU中完美运行)。现在,raymarching(某种)算法从八叉树立方体的一个面开始,并沿z轴遍历(每个线程在射线生成中具有固定的x,y)。命中后,它将按给定级别(mapCartesianToIndex())计算节点的索引。因此,当我们有一个针对某个点的索引时,我们将获得一个给定的父节点和一个子位置的偏移量。如果子位置无效,则返回-1。 我们有5种不同的节点状态:空(0),满(1),叶(2),最大级别(3)和无效(-1)。行进在maxlevel的叶子上停止。

代码如下:

/*
Although we dont have ENUMs, lets get this stright:

nodeState::EMPTY = 0;
nodeState::FULL = 1;
nodeState::LEAF = 2;
nodeState::MAXLEVEL = 3;
nodeState::INVALID = -1;

*/


//#define RTX_EPS 0.000001

/*----------------------------------------
*                                        |
*   main two LWSVO structures            |
*---------------------------------------*/

/*
** The main structure for linear SVO.
*/
struct LWSVO
{
    int firstChild;
    int bitfield; 
    int level;
    int m_colour;
    int m_normal;
    float3 m_position;
};


/*
** The compressed structure for LWSVO.
*/
struct LWSVO_
{
    //bitfield is Big Endian : 7 6 5 4 3 2 1 0
    int firstChild;
    int bitf_level_colr_colg;
    int colb_norx_nory_norz;
    float x;
    float y;
    float z;
};


StructuredBuffer<LWSVO_> lwSVO_ : register(t0);

RWStructuredBuffer<uint> Result : register (u0); 



/*----------------------------------------
*                                        |
*   Code related to LWSVO access         |
*---------------------------------------*/
LWSVO uncompressSVO(LWSVO_ d)
{
    LWSVO l;
    int r, g, b;
    int norx, nory, norz;
    l.firstChild = d.firstChild;

    g = (int)(d.bitf_level_colr_colg & 0x000000ff);
    r = (int)((d.bitf_level_colr_colg & 0x0000ff00) >> 8);
    l.level = (int)((d.bitf_level_colr_colg & 0x00ff0000) >> 16);
    l.bitfield = (int)((d.bitf_level_colr_colg & 0xff000000) >> 24);


    norz = (int)(d.colb_norx_nory_norz & 0x000000ff);
    nory = (int)((d.colb_norx_nory_norz & 0x0000ff00) >> 8);
    norx = (int)((d.colb_norx_nory_norz & 0x00ff0000) >> 16);
    b = (int)((d.colb_norx_nory_norz & 0xff000000) >> 24);

    l.m_colour = (r + (g << 8) + (b << 16));
    l.m_normal = (norx + (nory << 8) + (norz << 16));

    l.m_position = float3(d.x,d.y,d.z);

    return l;
}

void copyNode(LWSVO source, out LWSVO dest)
{
    dest.firstChild = source.firstChild;
    dest.bitfield = source.bitfield;
    dest.level = source.level;
    dest.m_colour = source.m_colour;
    dest.m_normal = source.m_normal;
    dest.m_position = source.m_position;

}

/*Given a node , outputs the state of it. 1 is FULL 2 is LEAF, 0 is EMPTY, but theres no empty node in the ds.*/
int getState(int child)
{
    //return NodeState::FULL;
    if (child > 0)
        return 1;
    else if (child == -1)
        return 2;
    else
        return -1; //wont get here
}


/*Given a parent node and a childindex, outputs the index of the child at given index*/
int getChild(LWSVO pNode, int childIndex)
{

    //check bitmask
    uint primarybf = 0x00000001;
    /*int a[8];
    a[0] = 1;
    a[1] = 2;
    a[2] = 4;
    a[3] = 8;
    a[4] = 16;
    a[5] = 32;
    a[6] = 64;
    a[7] = 128;
    */
    primarybf = (primarybf << childIndex);

    if ((pNode.bitfield & primarybf) != 0)
    {
        int count = 0;
        for (int i = 0; i < childIndex; i++)
        {
            uint a = 0x00000001;
            a = a << i;
            if ((pNode.bitfield & a) != 0) //if bit is set at position i.
            {
                count = count + 1;
            }
        }       
        return (pNode.firstChild + count);
    }   
    else //no child
    {
        return -1; //means empty
    }

    //return 0;
}


/*----------------------------------------
*                                        |
*   C++ library function ports           |
*   TODO: optimise later.                |
*---------------------------------------*/

//have to implement thge copysign  function!
float copysign(in float value, in float source)
{
    /*int sourcesign = (int)(source & 0x80000000) >> 31;
    int output = 0;
    output = (int)sourcesign << 31;
    value = (value & 0x7fffffff);
    return (output + value);    
    */

    //here's the thing.. will look for a bitpos solution later.
    //lets see if the damn engine works.
    if (source >= 0.0f)
    {
        if (value >= 0.0f)
            return value;
        return -value;
    }
    else
    {
        if (value >= 0.0f)
            return -value;
        return value;

    }
}

int signbit(float source)
{
    //int sourcesign = ((int)source >> 31) & 0x00000001;
    //return sourcesign;
    if (source >= 0)
        return 0;
    return 1;
}


/*----------------------------------------
*                                        |
*   HDR to 8 bit LDR colour & vice versa |
*---------------------------------------*/

int pack8BitColour(float3 colour) //Checked.
{
    int r, g, b;
    r = int(colour.x*255.0f);
    g = int(colour.y*255.0f) << 8;
    b = int(colour.z*255.0f) << 16;
    return(r + g + b);// +(int)0xff000000);
}

float3 unpack8BitColour(int val) //Checked.
{
    float3 output;
    output.x = (float)(val & 0x000000ff) / 255.0f;
    output.y = (float)((val & 0x0000ff00) >> 8) / 255.0f;
    output.z = (float)((val & 0x00ff0000) >> 16) / 255.0f;
    return output;
}



/*---------------------------------------
*                                       |
*   The brnachless index finding logic. |
*   finds the index in the SVO voxel    |
*   given a level and a position.       |
*---------------------------------------*/

void mapCartesianToIndex(float3 position, int maxlevel, out int index[12])
{

//This piece of logic works exactly as it should and produces perfect result in CPU.

    bool x, y, z;

    //transformation of value to 
    // -1 ---- 0 ---- +1
    //coordinate system

    float factor = (pow(2, maxlevel - 2));
    float stride = 1 / factor;
    float step = stride / 2;

    while (maxlevel > 1)
    {
        int pivot;
        float mid;

        pivot = floor(abs(position.x) / stride);
        mid = float((stride*pivot)) + step;

        mid = copysign(mid, position.x);
        x = !bool(signbit(position.x - mid));

        pivot = floor(abs(position.y) / stride);
        mid = float((stride*pivot)) + step;

        mid = copysign(mid, position.y);
        y = !bool(signbit(position.y - mid));

        pivot = floor(abs(position.z) / stride);
        mid = float((stride*pivot)) + step;

        mid = copysign(mid, position.z);
        z = !bool(signbit(position.z - mid));


        //index[level - 1].w = 0;
        //w = 0;

        index[maxlevel - 1] = (int)x * 4 + (int)y * 2 + (int)z;

        //factor /= 2;
        stride *= 2;
        step *= 2;
        maxlevel = maxlevel - 1;
    }


    x = !bool(signbit(position.x));
    y = !bool(signbit(position.y));
    z = !bool(signbit(position.z));


    index[0] = (int)x * 4 + (int)y * 2 + (int)z;

}




/*---------------------------------------
*                                       |
*   Raymarching logic.                  |
*                                       |
*   To be replaced by RTX soon.         |
*---------------------------------------*/


float3 makeRay(uint3 id)
{
    int i = id.x;
    int j = id.y;

    float zf = 2.0f / 256.0f;
    float xx, yy, zz;
    xx = max((zf*i - 1.0f) + 0.000001, -1.0f);
    yy = max((1.0f - zf * j) - 0.000001, -1.0f);
    zz = -1.0f + zf * 126;// +0.000001;
    return float3(xx*1.0f, yy*1.0f, zz*1.0f);
}



uint rayMarchAlgo(float3 rayOrigin)
{
    volatile uint m_col = 0;
    uint maxLev = 8;

    float zMinAdvanceFactor = 2.0f / 256.0f;
    LWSVO m_node;

    //in case while runs more that 256
    uint whilemax = 0;

    while (rayOrigin.z < 1.0f && whilemax <256) 
    {
        //LWSVO_ struct is compressed 24 bit where data is packed.
        //uncompress just converts it to a friendlier format.

        m_node = uncompressSVO(lwSVO_[0]); 
        int childindex[12];

        //calculate the indexes of the point rayOrigin for level by level and
        //copies the result to the childindex array.

        mapCartesianToIndex(rayOrigin, maxLev, childindex);
        int nodeState = -1;
        for (uint levCount = 0; levCount < maxLev; levCount++)
        {
            //check the indexes of the hitpoint for level by level.
            //if the node is full at this level, go to the next level
            //if empty, move z forward.
            //if leaf or maxlevel, return the colour.

            int offset = 0; //init
            offset = getChild(m_node, childindex[levCount]);  //gets the 
            m_node = uncompressSVO(lwSVO_[offset]);

            nodeState = getState(m_node.firstChild);
            if (offset == -1) //means empty node
            {
                nodeState = 0; //empty.
            }
            else
            {
                m_node = uncompressSVO(lwSVO_[offset]);
                nodeState = getState(m_node);
                if (m_node.level == maxLev)
                {
                    //m_col = 255 << 8;
                    m_col = m_node.m_colour;
                        //neither of above works.
                    nodeState = 3; //maxlevel
                    break;
                }

            }
            //dealing with three states.
            if (nodeState == 2)
            {
                m_col =  m_node.m_colour; //doesn't work
                //m_col = 255; //this doesn't work either.
                break;
            }
            else if (nodeState == 0)
            {
                break;
            }
            else if (nodeState == 3)
            {
                break;
            }
            else if (nodeState == -1)
            {               
                break;
            }

        }
        if (nodeState == 2 || nodeState == 3)
        {
            break;
        }
        else if (nodeState == 0)
        {
            rayOrigin.z = rayOrigin.z + (1.0f / 128.0f);
        }
        whilemax = whilemax + 1;
    }

    return m_col;
}


[numthreads(16, 16, 1)]
void CSMain(uint3 threadid : SV_DispatchThreadID)
{

    float3 pt = makeRay(threadid);

    int index = threadid.x + threadid.y*256;

    col = rayMarchAlgo(pt);
    Result[index] = col; 
}

第335行,在for循环中,如果我运行了1次(例如,将levCount <1而不是levCount

如果我将levCount

逻辑健全性检查:

正如我所提到的,我已经检查了每个单独的功能,并且所有功能似乎都与实际数据完全吻合。

加载CS:这是我用来加载着色器的代码。

bool RTX_Renderer::loadComputeShader(LPCWSTR filename, ID3D11ComputeShader** computeShader)
{
    DWORD dwShaderFlags = D3DCOMPILE_ENABLE_STRICTNESS;
#if defined( _DEBUG )
    dwShaderFlags |= D3DCOMPILE_DEBUG;
#endif

    LPCSTR pProfile = (g_d3dDevice->GetFeatureLevel() >= D3D_FEATURE_LEVEL_11_0) ? "cs_5_0" : "cs_4_0";

    ID3DBlob* pErrorBlob = NULL;
    ID3DBlob* pBlob = NULL;
    HRESULT hr = D3DCompileFromFile(filename, NULL, NULL, "CSMain", pProfile, dwShaderFlags, NULL, &pBlob, &pErrorBlob);
    if (FAILED(hr))
    {
        if (pErrorBlob)
            OutputDebugStringA((char*)pErrorBlob->GetBufferPointer());
        if (pErrorBlob)
            pErrorBlob->Release();
        if (pBlob)
            pBlob->Release();

        return false;
    }
    else
    {
        hr = g_d3dDevice->CreateComputeShader(pBlob->GetBufferPointer(), pBlob->GetBufferSize(), NULL, computeShader);
        if (pErrorBlob)
            pErrorBlob->Release();
        if (pBlob)
            pBlob->Release();

        return hr == S_OK;
    }
}

如果您需要更多信息,请告诉我。任何线索都将受到高度赞赏。

1 个答案:

答案 0 :(得分:0)

对于将来的参考:通过在调用的函数内部固定传递数组的初始化来解决此问题。有问题的函数是 mapCartesianToIndex()。初始化int索引[12]解决了该问题,并且代码可以正常编译。