我正在测试在计算着色器中写入2D和3D纹理,输出由32位浮点数组成的梯度噪声纹理。写入2D纹理效果很好,但是写入3D纹理则不行。与2D纹理相比,创建3D纹理时还需要考虑其他因素吗?
下面如何定义3D纹理的代码:
HRESULT BaseComputeShader::CreateTexture3D(UINT width, UINT height, UINT depth, DXGI_FORMAT format, ID3D11Texture3D** texture)
{
D3D11_TEXTURE3D_DESC textureDesc;
ZeroMemory(&textureDesc, sizeof(textureDesc));
textureDesc.Width = width;
textureDesc.Height = height;
textureDesc.Depth = depth;
textureDesc.MipLevels = 1;
textureDesc.Format = format;
textureDesc.Usage = D3D11_USAGE_DEFAULT;
textureDesc.BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS;
textureDesc.CPUAccessFlags = 0;
textureDesc.MiscFlags = 0;
return renderer->CreateTexture3D(&textureDesc, 0, texture);
}
HRESULT BaseComputeShader::CreateTexture3DUAV(UINT depth, DXGI_FORMAT format, ID3D11Texture3D** texture, ID3D11UnorderedAccessView** unorderedAccessView)
{
D3D11_UNORDERED_ACCESS_VIEW_DESC uavDesc;
ZeroMemory(&uavDesc, sizeof(uavDesc));
uavDesc.Format = format;
uavDesc.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE3D;
uavDesc.Texture3D.MipSlice = 0;
uavDesc.Texture3D.FirstWSlice = 0;
uavDesc.Texture3D.WSize = depth;
return renderer->CreateUnorderedAccessView(*texture, &uavDesc, unorderedAccessView);
}
HRESULT BaseComputeShader::CreateTexture3DSRV(DXGI_FORMAT format, ID3D11Texture3D** texture, ID3D11ShaderResourceView** shaderResourceView)
{
D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc;
ZeroMemory(&srvDesc, sizeof(srvDesc));
srvDesc.Format = format;
srvDesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D;
srvDesc.Texture3D.MostDetailedMip = 0;
srvDesc.Texture3D.MipLevels = 1;
return renderer->CreateShaderResourceView(*texture, &srvDesc, shaderResourceView);
}
以及如何在计算着色器中写入它:
// The texture we're writing to
RWTexture3D<float> outputTexture : register(u0);
[numthreads(8, 8, 8)]
void main(uint3 DTid : SV_DispatchThreadID)
{
float noiseValue = 0.0f;
float value = 0.0f;
float localAmplitude = amplitude;
float localFrequency = frequency;
// Loop for the number of octaves, running the noise function as many times as desired (8 is usually sufficient)
for (int k = 0; k < octaves; k++)
{
noiseValue = noise(float3(DTid.x * localFrequency, DTid.y * localFrequency, DTid.z * localFrequency)) * localAmplitude;
value += noiseValue;
// Calculate a new amplitude based on the input persistence/gain value
// amplitudeLoop will get smaller as the number of layers (i.e. k) increases
localAmplitude *= persistence;
// Calculate a new frequency based on a lacunarity value of 2.0
// This gives us 2^k as the frequency
// i.e. Frequency at k = 4 will be f * 2^4 as we have looped 4 times
localFrequency *= 2.0f;
}
// Output value to 2D index in the texture provided by thread indexing
outputTexture[DTid.xyz] = value;
}
最后,我如何运行着色器:
// Set the shader
deviceContext->CSSetShader(computeShader, nullptr, 0);
// Set the shader's buffers and views
deviceContext->CSSetConstantBuffers(0, 1, &cBuffer);
deviceContext->CSSetUnorderedAccessViews(0, 1, &textureUAV, nullptr);
// Launch the shader
deviceContext->Dispatch(512, 512, 512);
// Reset the shader now we're done
deviceContext->CSSetShader(nullptr, nullptr, 0);
// Reset the shader views
ID3D11UnorderedAccessView* ppUAViewnullptr[1] = { nullptr };
deviceContext->CSSetUnorderedAccessViews(0, 1, ppUAViewnullptr, nullptr);
// Create the shader resource view for access in other shaders
HRESULT result = CreateTexture3DSRV(DXGI_FORMAT_R32_FLOAT, &texture, &textureSRV);
if (result != S_OK)
{
MessageBox(NULL, L"Failed to create texture SRV after compute shader execution", L"Failed", MB_OK);
exit(0);
}
答案 0 :(得分:0)
我的简单错误。计算着色器线程的数量受到限制。在计算着色器中,您最多只能有1024个线程,并且调度调用最多只能调度65535个线程组。 HLSL编译器将解决前一个问题,而Visual C ++编译器将不解决后一个问题。
答案 1 :(得分:0)
如果您创建512 * 512 * 512的纹理(这似乎是您要实现的目标),则需要按组划分调度:
deviceContext->Dispatch(512 / 8, 512 / 8, 512 / 8);
在您先前的情况下,调度是: 512 * 8 * 512 * 8 * 512 * 8 = 68719476736单位
很可能触发了超时检测并导致驱动程序崩溃
65535的限制是每个尺寸,因此在您的情况下,您可以完全安全地运行它。
最后一个,您可以在创建3d纹理之后(在分派调用之前)立即创建着色器资源视图和无序视图。
通常建议这样做,以避免混淆上下文代码和资源创建代码。
在创建资源时,您的检查也无效:
if (result != S_OK)
HRESULT成功条件为> = 0
您可以改用内置宏,例如:
if (SUCCEEDED(result))