我正在使用条件循环,分支和使用我的计算着色器(DX11)CS 5.0更新UAV时遇到奇怪的行为。 我想通过Sparse Voxel Octtree砖进行一次幼稚的漫射。数据通过BFS线性化。结构(LWSVO =轻量级SVO)如下:
struct LWSVO
{
int firstChild;
int bitfield;
int level;
int m_colour;
int m_normal;
float3 m_position;
};
firstChild 是任何节点的第一个孩子的索引。如果是叶子,则第一个子代为-1。 bitfield 是有效(1)和无效(0)子节点的位置。我还将数据从压缩的24字节较小结构解压缩到此结构。我已经全面检查了拆包的每个部分(实际上是着色器中使用的每个单独功能)。一切正常,我已经根据实际数据进行了检查。
Althoguh以下信息不是很相关,但是这是算法的工作原理(它在CPU中完美运行)。现在,raymarching(某种)算法从八叉树立方体的一个面开始,并沿z轴遍历(每个线程在射线生成中具有固定的x,y)。命中后,它将按给定级别(mapCartesianToIndex())计算节点的索引。因此,当我们有一个针对某个点的索引时,我们将获得一个给定的父节点和一个子位置的偏移量。如果子位置无效,则返回-1。 我们有5种不同的节点状态:空(0),满(1),叶(2),最大级别(3)和无效(-1)。行进在maxlevel的叶子上停止。
代码如下:
/*
Although we dont have ENUMs, lets get this stright:
nodeState::EMPTY = 0;
nodeState::FULL = 1;
nodeState::LEAF = 2;
nodeState::MAXLEVEL = 3;
nodeState::INVALID = -1;
*/
//#define RTX_EPS 0.000001
/*----------------------------------------
* |
* main two LWSVO structures |
*---------------------------------------*/
/*
** The main structure for linear SVO.
*/
struct LWSVO
{
int firstChild;
int bitfield;
int level;
int m_colour;
int m_normal;
float3 m_position;
};
/*
** The compressed structure for LWSVO.
*/
struct LWSVO_
{
//bitfield is Big Endian : 7 6 5 4 3 2 1 0
int firstChild;
int bitf_level_colr_colg;
int colb_norx_nory_norz;
float x;
float y;
float z;
};
StructuredBuffer<LWSVO_> lwSVO_ : register(t0);
RWStructuredBuffer<uint> Result : register (u0);
/*----------------------------------------
* |
* Code related to LWSVO access |
*---------------------------------------*/
LWSVO uncompressSVO(LWSVO_ d)
{
LWSVO l;
int r, g, b;
int norx, nory, norz;
l.firstChild = d.firstChild;
g = (int)(d.bitf_level_colr_colg & 0x000000ff);
r = (int)((d.bitf_level_colr_colg & 0x0000ff00) >> 8);
l.level = (int)((d.bitf_level_colr_colg & 0x00ff0000) >> 16);
l.bitfield = (int)((d.bitf_level_colr_colg & 0xff000000) >> 24);
norz = (int)(d.colb_norx_nory_norz & 0x000000ff);
nory = (int)((d.colb_norx_nory_norz & 0x0000ff00) >> 8);
norx = (int)((d.colb_norx_nory_norz & 0x00ff0000) >> 16);
b = (int)((d.colb_norx_nory_norz & 0xff000000) >> 24);
l.m_colour = (r + (g << 8) + (b << 16));
l.m_normal = (norx + (nory << 8) + (norz << 16));
l.m_position = float3(d.x,d.y,d.z);
return l;
}
void copyNode(LWSVO source, out LWSVO dest)
{
dest.firstChild = source.firstChild;
dest.bitfield = source.bitfield;
dest.level = source.level;
dest.m_colour = source.m_colour;
dest.m_normal = source.m_normal;
dest.m_position = source.m_position;
}
/*Given a node , outputs the state of it. 1 is FULL 2 is LEAF, 0 is EMPTY, but theres no empty node in the ds.*/
int getState(int child)
{
//return NodeState::FULL;
if (child > 0)
return 1;
else if (child == -1)
return 2;
else
return -1; //wont get here
}
/*Given a parent node and a childindex, outputs the index of the child at given index*/
int getChild(LWSVO pNode, int childIndex)
{
//check bitmask
uint primarybf = 0x00000001;
/*int a[8];
a[0] = 1;
a[1] = 2;
a[2] = 4;
a[3] = 8;
a[4] = 16;
a[5] = 32;
a[6] = 64;
a[7] = 128;
*/
primarybf = (primarybf << childIndex);
if ((pNode.bitfield & primarybf) != 0)
{
int count = 0;
for (int i = 0; i < childIndex; i++)
{
uint a = 0x00000001;
a = a << i;
if ((pNode.bitfield & a) != 0) //if bit is set at position i.
{
count = count + 1;
}
}
return (pNode.firstChild + count);
}
else //no child
{
return -1; //means empty
}
//return 0;
}
/*----------------------------------------
* |
* C++ library function ports |
* TODO: optimise later. |
*---------------------------------------*/
//have to implement thge copysign function!
float copysign(in float value, in float source)
{
/*int sourcesign = (int)(source & 0x80000000) >> 31;
int output = 0;
output = (int)sourcesign << 31;
value = (value & 0x7fffffff);
return (output + value);
*/
//here's the thing.. will look for a bitpos solution later.
//lets see if the damn engine works.
if (source >= 0.0f)
{
if (value >= 0.0f)
return value;
return -value;
}
else
{
if (value >= 0.0f)
return -value;
return value;
}
}
int signbit(float source)
{
//int sourcesign = ((int)source >> 31) & 0x00000001;
//return sourcesign;
if (source >= 0)
return 0;
return 1;
}
/*----------------------------------------
* |
* HDR to 8 bit LDR colour & vice versa |
*---------------------------------------*/
int pack8BitColour(float3 colour) //Checked.
{
int r, g, b;
r = int(colour.x*255.0f);
g = int(colour.y*255.0f) << 8;
b = int(colour.z*255.0f) << 16;
return(r + g + b);// +(int)0xff000000);
}
float3 unpack8BitColour(int val) //Checked.
{
float3 output;
output.x = (float)(val & 0x000000ff) / 255.0f;
output.y = (float)((val & 0x0000ff00) >> 8) / 255.0f;
output.z = (float)((val & 0x00ff0000) >> 16) / 255.0f;
return output;
}
/*---------------------------------------
* |
* The brnachless index finding logic. |
* finds the index in the SVO voxel |
* given a level and a position. |
*---------------------------------------*/
void mapCartesianToIndex(float3 position, int maxlevel, out int index[12])
{
//This piece of logic works exactly as it should and produces perfect result in CPU.
bool x, y, z;
//transformation of value to
// -1 ---- 0 ---- +1
//coordinate system
float factor = (pow(2, maxlevel - 2));
float stride = 1 / factor;
float step = stride / 2;
while (maxlevel > 1)
{
int pivot;
float mid;
pivot = floor(abs(position.x) / stride);
mid = float((stride*pivot)) + step;
mid = copysign(mid, position.x);
x = !bool(signbit(position.x - mid));
pivot = floor(abs(position.y) / stride);
mid = float((stride*pivot)) + step;
mid = copysign(mid, position.y);
y = !bool(signbit(position.y - mid));
pivot = floor(abs(position.z) / stride);
mid = float((stride*pivot)) + step;
mid = copysign(mid, position.z);
z = !bool(signbit(position.z - mid));
//index[level - 1].w = 0;
//w = 0;
index[maxlevel - 1] = (int)x * 4 + (int)y * 2 + (int)z;
//factor /= 2;
stride *= 2;
step *= 2;
maxlevel = maxlevel - 1;
}
x = !bool(signbit(position.x));
y = !bool(signbit(position.y));
z = !bool(signbit(position.z));
index[0] = (int)x * 4 + (int)y * 2 + (int)z;
}
/*---------------------------------------
* |
* Raymarching logic. |
* |
* To be replaced by RTX soon. |
*---------------------------------------*/
float3 makeRay(uint3 id)
{
int i = id.x;
int j = id.y;
float zf = 2.0f / 256.0f;
float xx, yy, zz;
xx = max((zf*i - 1.0f) + 0.000001, -1.0f);
yy = max((1.0f - zf * j) - 0.000001, -1.0f);
zz = -1.0f + zf * 126;// +0.000001;
return float3(xx*1.0f, yy*1.0f, zz*1.0f);
}
uint rayMarchAlgo(float3 rayOrigin)
{
volatile uint m_col = 0;
uint maxLev = 8;
float zMinAdvanceFactor = 2.0f / 256.0f;
LWSVO m_node;
//in case while runs more that 256
uint whilemax = 0;
while (rayOrigin.z < 1.0f && whilemax <256)
{
//LWSVO_ struct is compressed 24 bit where data is packed.
//uncompress just converts it to a friendlier format.
m_node = uncompressSVO(lwSVO_[0]);
int childindex[12];
//calculate the indexes of the point rayOrigin for level by level and
//copies the result to the childindex array.
mapCartesianToIndex(rayOrigin, maxLev, childindex);
int nodeState = -1;
for (uint levCount = 0; levCount < maxLev; levCount++)
{
//check the indexes of the hitpoint for level by level.
//if the node is full at this level, go to the next level
//if empty, move z forward.
//if leaf or maxlevel, return the colour.
int offset = 0; //init
offset = getChild(m_node, childindex[levCount]); //gets the
m_node = uncompressSVO(lwSVO_[offset]);
nodeState = getState(m_node.firstChild);
if (offset == -1) //means empty node
{
nodeState = 0; //empty.
}
else
{
m_node = uncompressSVO(lwSVO_[offset]);
nodeState = getState(m_node);
if (m_node.level == maxLev)
{
//m_col = 255 << 8;
m_col = m_node.m_colour;
//neither of above works.
nodeState = 3; //maxlevel
break;
}
}
//dealing with three states.
if (nodeState == 2)
{
m_col = m_node.m_colour; //doesn't work
//m_col = 255; //this doesn't work either.
break;
}
else if (nodeState == 0)
{
break;
}
else if (nodeState == 3)
{
break;
}
else if (nodeState == -1)
{
break;
}
}
if (nodeState == 2 || nodeState == 3)
{
break;
}
else if (nodeState == 0)
{
rayOrigin.z = rayOrigin.z + (1.0f / 128.0f);
}
whilemax = whilemax + 1;
}
return m_col;
}
[numthreads(16, 16, 1)]
void CSMain(uint3 threadid : SV_DispatchThreadID)
{
float3 pt = makeRay(threadid);
int index = threadid.x + threadid.y*256;
col = rayMarchAlgo(pt);
Result[index] = col;
}
第335行,在for循环中,如果我运行了1次(例如,将levCount <1而不是levCount 如果我将levCount 逻辑健全性检查: 正如我所提到的,我已经检查了每个单独的功能,并且所有功能似乎都与实际数据完全吻合。 加载CS:这是我用来加载着色器的代码。 如果您需要更多信息,请告诉我。任何线索都将受到高度赞赏。bool RTX_Renderer::loadComputeShader(LPCWSTR filename, ID3D11ComputeShader** computeShader)
{
DWORD dwShaderFlags = D3DCOMPILE_ENABLE_STRICTNESS;
#if defined( _DEBUG )
dwShaderFlags |= D3DCOMPILE_DEBUG;
#endif
LPCSTR pProfile = (g_d3dDevice->GetFeatureLevel() >= D3D_FEATURE_LEVEL_11_0) ? "cs_5_0" : "cs_4_0";
ID3DBlob* pErrorBlob = NULL;
ID3DBlob* pBlob = NULL;
HRESULT hr = D3DCompileFromFile(filename, NULL, NULL, "CSMain", pProfile, dwShaderFlags, NULL, &pBlob, &pErrorBlob);
if (FAILED(hr))
{
if (pErrorBlob)
OutputDebugStringA((char*)pErrorBlob->GetBufferPointer());
if (pErrorBlob)
pErrorBlob->Release();
if (pBlob)
pBlob->Release();
return false;
}
else
{
hr = g_d3dDevice->CreateComputeShader(pBlob->GetBufferPointer(), pBlob->GetBufferSize(), NULL, computeShader);
if (pErrorBlob)
pErrorBlob->Release();
if (pBlob)
pBlob->Release();
return hr == S_OK;
}
}
答案 0 :(得分:0)
对于将来的参考:通过在调用的函数内部固定传递数组的初始化来解决此问题。有问题的函数是 mapCartesianToIndex()。初始化int索引[12]解决了该问题,并且代码可以正常编译。