我在以下着色器代码上获得了太多指令槽:
float2 texture_size;
float4x4 matrixTransform;
const static float coef = 2.0;
const static float3 yuv_weighted = float3(14.352, 28.176, 5.472);
sampler decal : register(s0);
float4 df(float4 A, float4 B)
{
return abs(float4(A.x - B.x, A.y - B.y, A.z - B.z, A.w - B.w));
}
float4 weighted_distance(float4 a, float4 b, float4 c, float4 d, float4 e, float4 f, float4 g, float4 h)
{
return (df(a, b) + df(a, c) + df(d, e) + df(d, f) + 4.0 * df(g, h));
}
float4 main_vertex(inout float4 color : COLOR0, inout float2 texCoord : TEXCOORD0, inout float4 position : POSITION0) : TEXCOORD1
{
float2 ps = float2(1.0 / texture_size.x, 1.0 / texture_size.y);
float4 t1;
t1.xy = float2(ps.x, 0); // F
t1.zw = float2(0, ps.y); // H
position = mul(position, matrixTransform);
return t1;
}
/* FRAGMENT SHADER */
float4 main_fragment(float4 p : POSITION0, float2 tex0 : TEXCOORD0, float4 tex1 : TEXCOORD1) : COLOR0
{
bool4 edr, edr_left, edr_up, px; // px = pixel, edr = edge detection rule
bool4 ir_lv1, ir_lv2_left, ir_lv2_up;
bool4 nc; // new_color
bool4 fx, fx_left, fx_up; // inequations of straight lines.
float2 fp = frac(tex0 * texture_size);
float2 dx = tex1.xy;
float2 dy = tex1.zw;
float3 A = tex2D(decal, tex0 - dx - dy).xyz;
float3 B = tex2D(decal, tex0 - dy).xyz;
float3 C = tex2D(decal, tex0 + dx - dy).xyz;
float3 D = tex2D(decal, tex0 - dx).xyz;
float3 E = tex2D(decal, tex0).xyz;
float3 F = tex2D(decal, tex0 + dx).xyz;
float3 G = tex2D(decal, tex0 - dx + dy).xyz;
float3 H = tex2D(decal, tex0 + dy).xyz;
float3 I = tex2D(decal, tex0 + dx + dy).xyz;
float3 A1 = tex2D(decal, tex0 - dx - 2.0 * dy).xyz;
float3 C1 = tex2D(decal, tex0 + dx - 2.0 * dy).xyz;
float3 A0 = tex2D(decal, tex0 - 2.0 * dx - dy).xyz;
float3 G0 = tex2D(decal, tex0 - 2.0 * dx + dy).xyz;
float3 C4 = tex2D(decal, tex0 + 2.0 * dx - dy).xyz;
float3 I4 = tex2D(decal, tex0 + 2.0 * dx + dy).xyz;
float3 G5 = tex2D(decal, tex0 - dx + 2.0 * dy).xyz;
float3 I5 = tex2D(decal, tex0 + dx + 2.0 * dy).xyz;
float3 B1 = tex2D(decal, tex0 - 2.0 * dy).xyz;
float3 D0 = tex2D(decal, tex0 - 2.0 * dx).xyz;
float3 H5 = tex2D(decal, tex0 + 2.0 * dy).xyz;
float3 F4 = tex2D(decal, tex0 + 2.0 * dx).xyz;
float4 b = mul(float4x3(B, D, H, F), yuv_weighted);
float4 c = mul(float4x3(C, A, G, I), yuv_weighted);
float4 e = mul(float4x3(E, E, E, E), yuv_weighted);
float4 d = b.yzwx;
float4 f = b.wxyz;
float4 g = c.zwxy;
float4 h = b.zwxy;
float4 i = c.wxyz;
float4 i4 = mul(float4x3(I4, C1, A0, G5), yuv_weighted);
float4 i5 = mul(float4x3(I5, C4, A1, G0), yuv_weighted);
float4 h5 = mul(float4x3(H5, F4, B1, D0), yuv_weighted);
float4 f4 = h5.yzwx;
float4 Ao = float4(1.0, -1.0, -1.0, 1.0);
float4 Bo = float4(1.0, 1.0, -1.0, -1.0);
float4 Co = float4(1.5, 0.5, -0.5, 0.5);
float4 Ax = float4(1.0, -1.0, -1.0, 1.0);
float4 Bx = float4(0.5, 2.0, -0.5, -2.0);
float4 Cx = float4(1.0, 1.0, -0.5, 0.0);
float4 Ay = float4(1.0, -1.0, -1.0, 1.0);
float4 By = float4(2.0, 0.5, -2.0, -0.5);
float4 Cy = float4(2.0, 0.0, -1.0, 0.5);
// These inequations define the line below which interpolation occurs.
fx.x = (Ao.x * fp.y + Bo.x * fp.x > Co.x);
fx_left.x = (Ax.x * fp.y + Bx.x * fp.x > Cx.x);
fx_up.x = (Ay.x * fp.y + By.x * fp.x > Cy.x);
fx.y = (Ao.y * fp.y + Bo.y * fp.x > Co.y);
fx_left.y = (Ax.y * fp.y + Bx.y * fp.x > Cx.y);
fx_up.y = (Ay.y * fp.y + By.y * fp.x > Cy.y);
fx.z = (Ao.z * fp.y + Bo.z * fp.x > Co.z);
fx_left.z = (Ax.z * fp.y + Bx.z * fp.x > Cx.z);
fx_up.z = (Ay.z * fp.y + By.z * fp.x > Cy.z);
fx.w = (Ao.w * fp.y + Bo.w * fp.x > Co.w);
fx_left.w = (Ax.w * fp.y + Bx.w * fp.x > Cx.w);
fx_up.w = (Ay.w * fp.y + By.w * fp.x > Cy.w);
ir_lv1.x = ((e.x != f.x) && (e.x != h.x));
ir_lv2_left.x = ((e.x != g.x) && (d.x != g.x));
ir_lv2_up.x = ((e.x != c.x) && (b.x != c.x));
ir_lv1.y = ((e.y != f.y) && (e.y != h.y));
ir_lv2_left.y = ((e.y != g.y) && (d.y != g.y));
ir_lv2_up.y = ((e.y != c.y) && (b.y != c.y));
ir_lv1.z = ((e.z != f.z) && (e.z != h.z));
ir_lv2_left.z = ((e.z != g.z) && (d.z != g.z));
ir_lv2_up.z = ((e.z != c.z) && (b.z != c.z));
ir_lv1.w = ((e.w != f.w) && (e.w != h.w));
ir_lv2_left.w = ((e.w != g.w) && (d.w != g.w));
ir_lv2_up.w = ((e.w != c.w) && (b.w != c.w));
float4 w1 = weighted_distance(e, c, g, i, h5, f4, h, f);
float4 w2 = weighted_distance(h, d, i5, f, i4, b, e, i);
float4 df_fg = df(f, g);
float4 df_hc = df(h, c);
float4 t1 = (coef * df_fg);
float4 t2 = df_hc;
float4 t3 = df_fg;
float4 t4 = (coef * df_hc);
edr = bool4((w1.x < w2.x) && ir_lv1.x,
(w1.y < w2.y) && ir_lv1.y,
(w1.z < w2.z) && ir_lv1.z,
(w1.w < w2.w) && ir_lv1.w);
edr_left = bool4((t1.x <= t2.x) && ir_lv2_left.x,
(t1.y <= t2.y) && ir_lv2_left.y,
(t1.z <= t2.z) && ir_lv2_left.z,
(t1.w <= t2.w) && ir_lv2_left.w);
edr_up = bool4((t4.x <= t3.x) && ir_lv2_up.x,
(t4.y <= t3.y) && ir_lv2_up.y,
(t4.z <= t3.z) && ir_lv2_up.z,
(t4.w <= t3.w) && ir_lv2_up.w);
nc.x = (edr.x && (fx.x || edr_left.x && fx_left.x || edr_up.x && fx_up.x));
nc.y = (edr.y && (fx.y || edr_left.y && fx_left.y || edr_up.y && fx_up.y));
nc.z = (edr.z && (fx.z || edr_left.z && fx_left.z || edr_up.z && fx_up.z));
nc.w = (edr.w && (fx.w || edr_left.w && fx_left.w || edr_up.w && fx_up.w));
t1 = df(e, f);
t2 = df(e, h);
px = bool4(t1.x <= t2.x,
t1.y <= t2.y,
t1.z <= t2.z,
t1.w <= t2.w);
float3 res = nc.x ? px.x ? F : H :
nc.y ? px.y ? B : F :
nc.z ? px.z ? D : B :
nc.w ? px.w ? H : D : E;
return float4(res.x, res.y, res.z, 1.0);
}
technique T0
{
pass P0
{
VertexShader = compile vs_3_0 main_vertex();
PixelShader = compile ps_3_0 main_fragment();
}
}
目前代码使用601个指令槽,它需要为512或更少,因此在编译时会出现此错误:
编译的着色器代码使用太多指令槽(601)。最大。目标允许(ps_3_0)为512。
最初代码有649个指令插槽,我能够将其降低到601.我现在已经达到了一个我不知道在哪里优化它的地步。任何帮助将不胜感激。
答案 0 :(得分:0)
感谢来自@Gnietschow的信息,我得到了这个工作。以下优化版本将使用少于512个指令槽。
float2 texture_size;
float4x4 matrixTransform;
const static float coef = 2.0;
const static float3 yuv_weighted = float3(14.352, 28.176, 5.472);
sampler decal : register(s0);
float4 df(float4 A, float4 B)
{
return abs(A - B);
}
float4 weighted_distance(float4 a, float4 b, float4 c, float4 d,
float4 e, float4 f, float4 g, float4 h)
{
return (df(a, b) + df(a, c) + df(d, e) + df(d, f) + 4.0 * df(g, h));
}
float4 main_vertex(inout float4 color : COLOR0, inout float2 texCoord : TEXCOORD0,
inout float4 position : POSITION0) : TEXCOORD1
{
float2 ps = float2(1.0 / texture_size.x, 1.0 / texture_size.y);
float4 t1;
t1.xy = float2(ps.x, 0); // F
t1.zw = float2(0, ps.y); // H
position = mul(position, matrixTransform);
return t1;
}
/* FRAGMENT SHADER */
float4 main_fragment(float4 p : POSITION0, float2 tex0 : TEXCOORD0,
float4 tex1 : TEXCOORD1) : COLOR0
{
bool4 edr, edr_left, edr_up, px; // px = pixel, edr = edge detection rule
bool4 ir_lv1, ir_lv2_left, ir_lv2_up;
bool4 nc; // new_color
bool4 fx, fx_left, fx_up; // inequations of straight lines.
float2 fp = frac(tex0 * texture_size);
float2 dx = tex1.xy;
float2 dy = tex1.zw;
float3 A = tex2D(decal, tex0 - dx - dy).xyz;
float3 B = tex2D(decal, tex0 - dy).xyz;
float3 C = tex2D(decal, tex0 + dx - dy).xyz;
float3 D = tex2D(decal, tex0 - dx).xyz;
float3 E = tex2D(decal, tex0).xyz;
float3 F = tex2D(decal, tex0 + dx).xyz;
float3 G = tex2D(decal, tex0 - dx + dy).xyz;
float3 H = tex2D(decal, tex0 + dy).xyz;
float3 I = tex2D(decal, tex0 + dx + dy).xyz;
float3 A1 = tex2D(decal, tex0 - dx - 2.0 * dy).xyz;
float3 C1 = tex2D(decal, tex0 + dx - 2.0 * dy).xyz;
float3 A0 = tex2D(decal, tex0 - 2.0 * dx - dy).xyz;
float3 G0 = tex2D(decal, tex0 - 2.0 * dx + dy).xyz;
float3 C4 = tex2D(decal, tex0 + 2.0 * dx - dy).xyz;
float3 I4 = tex2D(decal, tex0 + 2.0 * dx + dy).xyz;
float3 G5 = tex2D(decal, tex0 - dx + 2.0 * dy).xyz;
float3 I5 = tex2D(decal, tex0 + dx + 2.0 * dy).xyz;
float3 B1 = tex2D(decal, tex0 - 2.0 * dy).xyz;
float3 D0 = tex2D(decal, tex0 - 2.0 * dx).xyz;
float3 H5 = tex2D(decal, tex0 + 2.0 * dy).xyz;
float3 F4 = tex2D(decal, tex0 + 2.0 * dx).xyz;
float4 b = mul(float4x3(B, D, H, F), yuv_weighted);
float4 c = mul(float4x3(C, A, G, I), yuv_weighted);
float4 e = mul(float4x3(E, E, E, E), yuv_weighted);
float4 d = b.yzwx;
float4 f = b.wxyz;
float4 g = c.zwxy;
float4 h = b.zwxy;
float4 i = c.wxyz;
float4 i4 = mul(float4x3(I4, C1, A0, G5), yuv_weighted);
float4 i5 = mul(float4x3(I5, C4, A1, G0), yuv_weighted);
float4 h5 = mul(float4x3(H5, F4, B1, D0), yuv_weighted);
float4 f4 = h5.yzwx;
float4 Ao = float4(1.0, -1.0, -1.0, 1.0);
float4 Bo = float4(1.0, 1.0, -1.0, -1.0);
float4 Co = float4(1.5, 0.5, -0.5, 0.5);
float4 Ax = float4(1.0, -1.0, -1.0, 1.0);
float4 Bx = float4(0.5, 2.0, -0.5, -2.0);
float4 Cx = float4(1.0, 1.0, -0.5, 0.0);
float4 Ay = float4(1.0, -1.0, -1.0, 1.0);
float4 By = float4(2.0, 0.5, -2.0, -0.5);
float4 Cy = float4(2.0, 0.0, -1.0, 0.5);
// These inequations define the line below which interpolation occurs.
fx.x = (Ao.x * fp.y + Bo.x * fp.x > Co.x);
fx_left.x = (Ax.x * fp.y + Bx.x * fp.x > Cx.x);
fx_up.x = (Ay.x * fp.y + By.x * fp.x > Cy.x);
fx.y = (Ao.y * fp.y + Bo.y * fp.x > Co.y);
fx_left.y = (Ax.y * fp.y + Bx.y * fp.x > Cx.y);
fx_up.y = (Ay.y * fp.y + By.y * fp.x > Cy.y);
fx.z = (Ao.z * fp.y + Bo.z * fp.x > Co.z);
fx_left.z = (Ax.z * fp.y + Bx.z * fp.x > Cx.z);
fx_up.z = (Ay.z * fp.y + By.z * fp.x > Cy.z);
fx.w = (Ao.w * fp.y + Bo.w * fp.x > Co.w);
fx_left.w = (Ax.w * fp.y + Bx.w * fp.x > Cx.w);
fx_up.w = (Ay.w * fp.y + By.w * fp.x > Cy.w);
ir_lv1.x = ((e.x != f.x) && (e.x != h.x));
ir_lv2_left.x = ((e.x != g.x) && (d.x != g.x));
ir_lv2_up.x = ((e.x != c.x) && (b.x != c.x));
ir_lv1.y = ((e.y != f.y) && (e.y != h.y));
ir_lv2_left.y = ((e.y != g.y) && (d.y != g.y));
ir_lv2_up.y = ((e.y != c.y) && (b.y != c.y));
ir_lv1.z = ((e.z != f.z) && (e.z != h.z));
ir_lv2_left.z = ((e.z != g.z) && (d.z != g.z));
ir_lv2_up.z = ((e.z != c.z) && (b.z != c.z));
ir_lv1.w = ((e.w != f.w) && (e.w != h.w));
ir_lv2_left.w = ((e.w != g.w) && (d.w != g.w));
ir_lv2_up.w = ((e.w != c.w) && (b.w != c.w));
float4 w1 = weighted_distance(e, c, g, i, h5, f4, h, f);
float4 w2 = weighted_distance(h, d, i5, f, i4, b, e, i);
float4 df_fg = df(f, g);
float4 df_hc = df(h, c);
float4 t1 = (coef * df_fg);
float4 t2 = df_hc;
float4 t3 = df_fg;
float4 t4 = (coef * df_hc);
edr = bool4((w1 < w2) && ir_lv1);
edr_left = bool4((t1 < t2) && ir_lv2_left);
edr_up = bool4((t4 < t3) && ir_lv2_up);
nc.x = (edr.x && (fx.x || edr_left.x && fx_left.x || edr_up.x && fx_up.x));
nc.y = (edr.y && (fx.y || edr_left.y && fx_left.y || edr_up.y && fx_up.y));
nc.z = (edr.z && (fx.z || edr_left.z && fx_left.z || edr_up.z && fx_up.z));
nc.w = (edr.w && (fx.w || edr_left.w && fx_left.w || edr_up.w && fx_up.w));
t1 = df(e, f);
t2 = df(e, h);
px = bool4(t1 < t2);
float3 res = nc.x ? px.x ? F : H :
nc.y ? px.y ? B : F :
nc.z ? px.z ? D : B :
nc.w ? px.w ? H : D : E;
return float4(res.x, res.y, res.z, 1.0);
}
technique T0
{
pass P0
{
VertexShader = compile vs_3_0 main_vertex();
PixelShader = compile ps_3_0 main_fragment();
}
}