kernel void pixelshuffle(
texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
device float* res [[buffer(0)]],
ushort3 gid [[thread_position_in_grid]])
{
if (gid.x >= inTexture.get_width() || gid.y >= inTexture.get_height() || gid.z%4!=0)
{
return;
}
ushort3 ita,itb,itc,itd;
half4 ya,yb,yc,yd;
const half4 sr = inTexture.read(gid.xy, gid.z);
const half4 sg = inTexture.read(gid.xy, gid.z+1);
const half4 sb = inTexture.read(gid.xy, gid.z+2);
const half4 sa = inTexture.read(gid.xy, gid.z+3);
ushort x2,y2,z;
x2 = gid.x*2;
y2 = gid.y*2;
z = gid.z>>2;
ita=ushort3(x2,y2,z);
itb=ushort3(x2+1,y2,z);
itc=ushort3(x2,y2+1,z);
itd=ushort3(x2+1,y2+1,z);
// 1st location
threadgroup_barrier(mem_flags::mem_none);
ya= half4(sr.r,sg.r,sb.r,sa.r);
yb= half4(sr.g,sg.g,sb.g,sa.g);
yc= half4(sr.b,sg.b,sb.b,sa.b);
yd= half4(sr.a,sg.a,sb.a,sa.a);
//threadgroup_barrier(mem_flags::mem_none);
//threadgroup_barrier(mem_flags::mem_threadgroup);
// 2nd location
if(gid.x==0 && gid.y==0 && gid.z==0)
{
res[0]=0.0;//float(inTexture.get_array_size());
}
outTexture.write(ya, ita.xy, ita.z);
outTexture.write(yb, itb.xy, itb.z);
outTexture.write(yc, itc.xy, itc.z);
outTexture.write(yd, itd.xy, itd.z);
//threadgroup_barrier(mem_flags::mem_none);
// 3rd location
if(gid.x==0 && gid.y==0 && gid.z>=0)
{
res[1]=0.0;//float(gid.z);
}
}
上面是我的自定义图层的代码。 它可以得到正确的结果,并且第二,第三位置的代码用于调试,但是如果我这样删除第一,第二,第三位置的任何一个或全部代码,
// 1st location
//threadgroup_barrier(mem_flags::mem_none);
// 2nd location
//if(gid.x==0 && gid.y==0 && gid.z==0)
//{
// res[0]=0.0;//float(inTexture.get_array_size());
//}
// 3rd location
//if(gid.x==0 && gid.y==0 && gid.z>=0)
//{
//res[1]=0.0;//float(gid.z);
//}
outTexture的结果不正确。 为什么?不使用res缓冲区时如何获得正确的结果?