我遇到的问题是下面的像素着色器(HLSL)编译为 68 指令(使用以下建议的优化)。但是,我想将它与着色器模型2一起使用,因此很遗憾,我只能使用最多 64 指令。有没有人在不改变着色器结果的情况下看到任何可能的优化?
着色器将屏幕的一个或多或少的球形区域(具有正弦形边界)从RGB转换为白色渐变 - >红色 - >黑色带有一些额外的亮度等修改。
着色器代码为:
// Normalized timefactor (1 = fully enabled)
float timeFactor;
// Center of "light"
float x;
float y;
// Size of "light"
float viewsizeQ;
float fadesizeQ;
// Rotational shift
float angleShift;
// Resolution
float screenResolutionWidth;
float screenResolutionHeight;
float screenZoomQTimesX;
// Texture sampler
sampler TextureSampler : register(s0);
float4 method(float2 texCoord : TEXCOORD0) : COLOR0
{
// New color after transformation
float4 newColor;
// Look up the texture color.
float4 color = tex2D(TextureSampler, texCoord);
// Calculate distance
float2 delta = (float2(x, y) - texCoord.xy)
* float2(screenResolutionWidth, screenResolutionHeight);
// Get angle from center
float distQ = dot(delta, delta) - sin((atan2(delta.x, delta.y) + angleShift) * 13) * screenZoomQTimesX;
// Within fadeSize
if (distQ < fadesizeQ)
{
// Make greyscale
float grey = dot(color.rgb, float3(0.3, 0.59, 0.11));
// Increase contrast by applying a color transformation based on a quasi-sigmoid gamma curve
grey = 1 / (1 + pow(1.25-grey/2, 16) );
// Transform Black/White color range to Black/Red/White color range
// 1 -> 0.5f ... White -> Red
if (grey >= 0.75)
{
newColor.r = 0.7 + 0.3 * color.r;
grey = (grey - 0.75) * 4;
newColor.gb = 0.7 * grey + 0.3 * color.gb;
}
else // 0.5f -> 0 ... Red -> Black
{
newColor.r = 1.5 * 0.7 * grey + 0.3 * color.r;
newColor.gb = 0.3 * color.gb ;
}
// Within viewSize (Full transformation, only blend with timefactor)
if (distQ < viewsizeQ)
{
color.rgb = lerp(newColor.rgb, color.rgb, timeFactor);
}
// Outside viewSize but still in fadeSize (Spatial fade-out but also with timefactor)
else
{
float factor = timeFactor * (1 - (distQ - viewsizeQ) / (fadesizeQ - viewsizeQ));
color.rgb = lerp(newColor.rgb, color.rgb, factor);
}
}
答案 0 :(得分:5)
也很少有点,你有x,y用于灯光中心+屏幕宽度/高度。
替换为:
float2 light;
float2 screenResolution;
然后在你的代码中:
float2 delta = (light - texCoord.xy) * screenResolution;
应删除另外2条说明。
接下来是使用atan2,这可能是最饥饿的。
你可以声明另一个float2(float2 vecshift),其中x = cos(AngleShift)和y = sin(angleShift)。只需在CPU中预先计算这个。
然后你可以做以下事情(基本上做一个交叉产品来提取角度而不是使用atan2):
float2 dn = normalize(delta);
float cr = dn.x *vecshift.y -dn.y * vecshift.x;
float distQ = dot(delta, delta) - sin((asin(cr))*13) *screenZoomQTimesX;
请注意,我不太喜欢某事物的罪,但多项式形式不适合你的用例。我确信有一个比使用sin *更清晰的版本来调制,因为))
使用?构造而不是if / else也可以(有时)帮助你的指令计数。
color.rgb = lerp(newColor.rgb, color.rgb, distQ < viewsizeQ ? timeFactor : timeFactor * (1 - (distQ - viewsizeQ) / (fadesizeQ - viewsizeQ)));
减少2条指令。
此处的完整版设置为60条指令。
// Normalized timefactor (1 = fully enabled)
float timeFactor;
float2 light;
float viewsizeQ;
float fadesizeQ;
float2 screenResolution;
float screenZoomQTimesX;
float2 vecshift;
// Texture sampler
sampler TextureSampler : register(s0);
float4 method(float2 texCoord : TEXCOORD0) : COLOR0
{
// New color after transformation
float4 newColor;
// Look up the texture color.
float4 color =tex2D(Samp, texCoord);
// Calculate distance
float2 delta = (light - texCoord.xy) * screenResolution;
float2 dn = normalize(delta);
float cr = dn.x *vecshift.y -dn.y * vecshift.x;
float distQ = dot(delta, delta) - sin((asin(cr))*13) *screenZoomQTimesX;
//float distQ = dot(delta, delta) - a13 *screenZoomQTimesX;
if (distQ < fadesizeQ)
{
// Make greyscale
float grey = dot(color.rgb, float3(0.3, 0.59, 0.11));
// Increase contrast by applying a color transformation based on a quasi-sigmoid gamma curve
grey = 1 / (1 + pow(1.25-grey/2, 16) );
// Transform Black/White color range to Black/Red/White color range
// 1 -> 0.5f ... White -> Red
if (grey >= 0.75)
{
newColor.r = 0.7 + 0.3 * color.r;
grey = (grey - 0.75) * 4;
newColor.gb = 0.7 * grey + 0.3 * color.gb;
}
else // 0.5f -> 0 ... Red -> Black
{
newColor.r = 1.5 * 0.7 * grey + 0.3 * color.r;
newColor.gb = 0.3 * color.gb ;
}
color.rgb = lerp(newColor.rgb, color.rgb, distQ < viewsizeQ ? timeFactor : timeFactor * (1 - (distQ - viewsizeQ) / (fadesizeQ - viewsizeQ)));
}
return color;
}
答案 1 :(得分:4)
一些建议
power
从0变为1,则创建1 x 256的纹理(或任何水平尺寸保留最佳功能),并使用tex1D只查找当前power
的值。您需要在CPU上运行此功能以填充此纹理,但在加载时只需执行一次。color.rgb = /*0.7 */ factor * newColor.rgb + /*0.3 **/ (1 - factor) * color.rgb;
,而是使用color.rgb = lerp(newColor.rgb, color.rgb, factor);
(lerp通常会编译为大多数GPU上的汇编指令),从而为您节省指令。 答案 2 :(得分:1)
使用更多的lerps我能够得到64条指令。查找表没有帮助,因为atan2实际上导致的指令少于查找纹理。
// Normalized timefactor (1 = fully enabled)
float timeFactor;
// Center of "light"
float x;
float y;
// Size of "light"
float viewsizeQ;
float fadesizeQ;
// Rotational shift
float angleShift;
// Resolution
float screenResolutionWidth;
float screenResolutionHeight;
float screenZoomQTimesX;
// Texture sampler
sampler TextureSampler : register(s0);
float4 method(float2 texCoord : TEXCOORD0) : COLOR0
{
float4 newColor;
// Look up the texture color.
float4 color = tex2D(TextureSampler, texCoord);
// Calculate distance
float2 delta = (float2(x, y) - texCoord.xy)
* float2(screenResolutionWidth, screenResolutionHeight);
// Get angle from center
float distQ = dot(delta, delta) - sin((atan2(delta.x, delta.y) + angleShift) * 13) * screenZoomQTimesX;
// Outside fadeSize: No color transformation
if (distQ >= fadesizeQ) return color;
// Otherwise (within color transformed region) /////////////////////////////////////////////////////////
// Make greyscale
float grey = dot(color.rgb, float3(0.3, 0.59, 0.11));
// Increase contrast by applying a color transformation based on a quasi-sigmoid gamma curve
grey = 1 / (1 + pow(1.25-grey/2, 16));
// Transform greyscale to white->red->black gradient
// 1 -> 0.5f ... White -> Red
if (grey >= 0.5)
{
newColor = lerp(float4(0.937,0.104,0.104,1), float4(1,1,1,1), 2 * (grey-0.5)
}
else // 0.5f -> 0 ... Red -> Black
{
newColor = lerp(float4(0,0,0,1), float4(0.937,0.104,0.104,1), 2 * grey);
}
float factor = saturate(timeFactor * (1 - (distQ - viewsizeQ) / (fadesizeQ - viewsizeQ)));
color.rgb = lerp(color.rgb, newColor.rgb, factor);
return color;
}