如何在coreml自定义层的金属内核函数中获得正确的MTLTexture结果?

时间:2018-09-13 10:00:44

标签: ios metal coreml

 kernel void pixelshuffle(
                             texture2d_array<half, access::read> inTexture [[texture(0)]],
                             texture2d_array<half, access::write> outTexture [[texture(1)]],
                             device float* res [[buffer(0)]],
                             ushort3 gid [[thread_position_in_grid]])
    {
      if (gid.x >= inTexture.get_width() || gid.y >= inTexture.get_height() || gid.z%4!=0)
      {
        return;
      }

      ushort3 ita,itb,itc,itd;
      half4 ya,yb,yc,yd;

      const half4 sr = inTexture.read(gid.xy, gid.z);
      const half4 sg = inTexture.read(gid.xy, gid.z+1);
      const half4 sb = inTexture.read(gid.xy, gid.z+2);
      const half4 sa = inTexture.read(gid.xy, gid.z+3);
      ushort x2,y2,z;

      x2 = gid.x*2;
      y2 = gid.y*2;
      z = gid.z>>2;


      ita=ushort3(x2,y2,z);
      itb=ushort3(x2+1,y2,z);
      itc=ushort3(x2,y2+1,z);
      itd=ushort3(x2+1,y2+1,z);
      // 1st location
      threadgroup_barrier(mem_flags::mem_none);



      ya= half4(sr.r,sg.r,sb.r,sa.r);
      yb= half4(sr.g,sg.g,sb.g,sa.g);
      yc= half4(sr.b,sg.b,sb.b,sa.b);
      yd= half4(sr.a,sg.a,sb.a,sa.a);
      //threadgroup_barrier(mem_flags::mem_none);
      //threadgroup_barrier(mem_flags::mem_threadgroup);
      // 2nd location
      if(gid.x==0  && gid.y==0 && gid.z==0)
      {
        res[0]=0.0;//float(inTexture.get_array_size());
      }


      outTexture.write(ya, ita.xy, ita.z);
      outTexture.write(yb, itb.xy, itb.z);
      outTexture.write(yc, itc.xy, itc.z);
      outTexture.write(yd, itd.xy, itd.z);
      //threadgroup_barrier(mem_flags::mem_none);
      // 3rd location
      if(gid.x==0  && gid.y==0 && gid.z>=0)
      {
        res[1]=0.0;//float(gid.z);
      }

    }

上面是我的自定义图层的代码。 它可以得到正确的结果,并且第二,第三位置的代码用于调试,但是如果我这样删除第一,第二,第三位置的任何一个或全部代码,

  // 1st location
  //threadgroup_barrier(mem_flags::mem_none);

  // 2nd location
  //if(gid.x==0  && gid.y==0 && gid.z==0)
  //{
  //  res[0]=0.0;//float(inTexture.get_array_size());
  //}


 // 3rd location
 //if(gid.x==0  && gid.y==0 && gid.z>=0)
 //{
 //res[1]=0.0;//float(gid.z);
 //}

outTexture的结果不正确。 为什么?不使用res缓冲区时如何获得正确的结果?

0 个答案:

没有答案