Question

我已按PBRT中的描述成功实施了BVH。这个问题虽然有一个小问题 - 遍历通过与光线相交的所有节点，这是错误的（在性能方面）。

所以我最终优化了光线遍历，目前我使用的是Aila＆amp; amp; Laine实施了他们的“了解GPU上的光线效率”。首先，这是代码：

INLINE bool BVH::Traverse(TriangleWoop* prims, Ray* ray, IntersectResult* result)
{
unsigned int todo[32];
unsigned int todoOffset = 0;
unsigned int nodeNum = 0;

bool hit = false;
IntersectResult tmp = IntersectResult();
*(int*)&tmp.data.w = -1;
float tmin = 2e30f;

float4 origin = ray->origin;
float4 direction = ray->direction;
float4 invdir = rcp(direction);

float tmpx = 0.0f, tmpy = 0.0f;

while(true)
{
    while(this->nodes[nodeNum].prim_count == 0)
    {
        tmpx += 0.01f;
        tmpy += 0.001f;

        float4 c0v1 = (this->nodes[nodeNum + 1].bounds.minPt - origin) * invdir;
        float4 c0v2 = (this->nodes[nodeNum + 1].bounds.maxPt - origin) * invdir;
        float4 c1v1 = (this->nodes[this->nodes[nodeNum].above_child].bounds.minPt - origin) * invdir;
        float4 c1v2 = (this->nodes[this->nodes[nodeNum].above_child].bounds.maxPt - origin) * invdir;
        float4 c0n = f4min(c0v1, c0v2);
        float4 c0f = f4max(c0v1, c0v2);
        float4 c1n = f4min(c1v1, c1v2);
        float4 c1f = f4max(c1v1, c1v2);

        float n0 = max(c0n.x, max(c0n.y, c0n.z));
        float f0 = min(c0f.x, min(c0f.y, c0f.z));
        float n1 = max(c1n.x, max(c1n.y, c1n.z));
        float f1 = min(c1f.x, min(c1f.y, c1f.z));

        bool child0 = (f0 > 0.0f) && (n0 < f0);
        bool child1 = (f1 > 0.0f) && (n1 < f1);

        child0 &= (n0 < tmin);
        child1 &= (n1 < tmin);

        unsigned int nodeAddr = this->nodes[nodeNum].above_child;
        nodeNum = nodeNum + 1;

        if(child0 != child1)
        {
            if(child1)
            {
                nodeNum = nodeAddr;
            }
        }
        else
        {
            if(!child0)
            {
                if(todoOffset == 0)
                {
                    goto result;
                }

                nodeNum = todo[--todoOffset];
            }
            else
            {
                if(n1 < n0)
                {
                    swap(nodeNum, nodeAddr);
                }

                todo[todoOffset++] = nodeAddr;
            }
        }
    }

    if(this->nodes[nodeNum].prim_count > 0)
    {
        for(unsigned int i = this->nodes[nodeNum].prim_offset; i < this->nodes[nodeNum].prim_offset + this->nodes[nodeNum].prim_count; i++)
        {
            const TriangleWoop* tri = &prims[this->indexes[i]];

            if(IntersectRayTriangleWoop(ray, tri, &tmp))
            {
                if(tmp.data.z > 0.0f && tmp.data.z < result->data.z)
                {
                    tmin = tmp.data.z;
                    result->data.z = tmp.data.z;
                    result->data.x = tmp.data.x;
                    result->data.y = tmp.data.y;
                    *(int*)&result->data.w = this->indexes[i];
                    hit = true;
                }
            }
        }
    }

    if(todoOffset == 0)
    {
        goto result;
    }

    nodeNum = todo[--todoOffset];
}

result:
result->data.x = tmpx;
result->data.y = tmpy;

return hit;
}

从技术上讲，它只是一个标准的while-while stack ray-bvh遍历。现在回到主要问题，查看下一张图片（从外面查看sponza），用彩色可以看到BVH中有多少节点被访问过（全红= 100，全黄= 1100）： Sponza BVH "heat" map from outside

下图显示了类似的情况： Sponza BVH "heat" map from inside

正如您所看到的，这是一个问题 - 它只需要遍历比预期更多的节点。有人可以看到我的代码有问题吗？欢迎任何建议，因为我已经坚持了几天，并且无法想出一些解决方案。

边界体积层次结构光线遍历问题

0 个答案: