更新所有像素

时间:2017-04-03 13:27:28

标签: halide

我尝试在Halide中实现光流算法。我对u和v向量的更新有一些问题。这是我的C ++版本:

for(int i= 0; i<h; i++) {
for(int j= 0; j<bpl; j++) {
  float iix = Ix[i*bpl+j];
  float iiy = Iy[i*bpl+j];
  float iit = It[i*bpl+j];
  for(int k=0; k<40; k++) {
    float Uav     = (u[(i-1)*bpl+j] + u[(i+1)*bpl+j] + u[i*bpl+j-1] + u[i*bpl+j+1])/4;
    float Vav     = (v[(i-1)*bpl+j] + v[(i+1)*bpl+j] + v[i*bpl+j-1] + v[i*bpl+j+1])/4;
    float P       = iix*Uav + iiy*Vav + iit;
    float D       = iix*iix + iiy*iiy + lambda*lambda;
    float tmp     = P/D;
    float utmp    = Uav - iix*tmp;
    float vtmp    = Vav - iiy*tmp;
    u[i*bpl+j] = utmp;
    v[i*bpl+j] = vtmp;
  }
}

}

这是我的卤化物实施:

Func u("u"), v("v");
Func Uav("Uav"), Vav("Vav"), P("P"), D("D"), tmp("tmp"), utmp("utmp"), vtmp("vtmp");
RDom r_0(0, input_1.width(), 0, input_1.height());
u(x, y, c) = 0;
v(x, y, c) = 0;
for(int k=0; k<40; k++) {
Uav (x, y, c) = (u(x, y-1, c) + u(x, y+1, c) + u(x-1, y, c) + u(x+1, y, c))/4;
Vav (x, y, c) = (v(x, y-1, c) + v(x, y+1, c) + v(x-1, y, c) + v(x+1, y, c))/4;
P (x, y, c) = Ix(x, y, c) * Uav(x, y, c) + Iy(x, y, c) * Vav(x, y, c) + It(x, y, c);
D (x, y, c) = Ix(x, y, c) * Ix(x, y, c) + Iy(x, y, c) * Iy(x, y, c) + lambda * lambda;
tmp (x, y, c) = P(x, y, c)/D(x, y, c);
utmp(x, y, c) = Uav(x, y, c) - Ix(x, y, c) * tmp(x, y, c);
vtmp(x, y, c) = Vav(x, y, c) - Iy(x, y, c) * tmp(x, y, c);
u(r_0.x, r_0.y, c) = utmp(x, y, c);
v(r_0.x, r_0.y, c) = vtmp(x, y, c);
}

当我运行程序时,出现以下运行时错误:

  

错误:   Func u不能给出新的更新定义,因为它已经在另一个Func的定义中实现或使用。   中止(核心倾销)

感谢您的回复。

如同说,AhiyaHiya,变量x,y,c被声明为:

Var x(“x”),y(“y”),c(“c”);

正如您所建议的,我使用extern c ++函数来更新我的Halide函数。这是我的外部功能:

extern "C" DLLEXPORT buffer_t compute_flow(buffer_t *Ix, buffer_t *Iy, buffer_t *It, buffer_t *u, buffer_t *v, 
                                       const int32_t bpl, const int32_t h, const float lambda, const uint8_t IsU) {
    //about Ix
    const auto min0_ix = Ix->min[0];
    const auto internalX_ix = min0_ix;
    const auto min1_ix = Ix->min[1];
    const auto internalY_ix = min1_ix;
    const auto stride0_ix = Ix->stride[0];
    const auto stride1_ix = Ix->stride[1];
    const auto x_ix = bpl + internalX_ix;
    const auto y_ix = h + internalY_ix;
        //about Iy
    const auto min0_iy = Iy->min[0];
    const auto internalX_iy = min0_iy;
    const auto min1_iy = Iy->min[1];
    const auto internalY_iy = min1_iy;
    const auto stride0_iy = Iy->stride[0];
    const auto stride1_iy = Iy->stride[1];
    const auto x_iy = bpl + internalX_iy;
    const auto y_iy = h + internalY_iy;

    //about It
    const auto min0_it = It->min[0];
    const auto internalX_it = min0_it;
    const auto min1_it = It->min[1];
    const auto internalY_it = min1_it;
    const auto stride0_it = It->stride[0];
    const auto stride1_it = It->stride[1];
    const auto x_it = bpl + internalX_it;
    const auto y_it = h + internalY_it;

    const auto iix = *(Ix->host + (x_ix - min0_ix) * stride0_ix + (y_ix - min1_ix) * stride1_ix);
    const auto iiy = *(Iy->host + (x_iy - min0_iy) * stride0_iy + (y_iy - min1_iy) * stride1_iy);
    const auto iit = *(It->host + (x_it - min0_it) * stride0_it + (y_it - min1_it) * stride1_it);

    //about u
    const auto min0_u = u->min[0];
    const auto internalX_u = min0_u;
    const auto min1_u = u->min[1];
    const auto internalY_u = min1_u;
    const auto stride0_u = u->stride[0];
    const auto stride1_u = u->stride[1];
    const auto x_u = bpl + internalX_u;
    const auto y_u = h + internalY_u;
      //about v
    const auto min0_v = v->min[0];
    const auto internalX_v = min0_v;
    const auto min1_v = v->min[1];
    const auto internalY_v = min1_v;
    const auto stride0_v = v->stride[0];
    const auto stride1_v = v->stride[1];
    const auto x_v = bpl + internalX_v;
    const auto y_v = h + internalY_v;

    buffer_t *uResult, *vResult;

    for(int k=0; k<40; k++) {
      const auto u0 = *(u->host + (x_u - min0_u) * stride0_u + (y_u - 1 - min1_u) * stride1_u); //u[(i-1)*bpl+j] 
      const auto u1 = *(u->host + (x_u - min0_u) * stride0_u + (y_u + 1 - min1_u) * stride1_u); //u[(i+1)*bpl+j]
      const auto u2 = *(u->host + (x_u - 1 - min0_u) * stride0_u + (y_u - min1_u) * stride1_u); //u[i*bpl+j-1]
      const auto u3 = *(u->host + (x_u + 1 - min0_u) * stride0_u + (y_u - min1_u) * stride1_u); //u[i*bpl+j+1]

      const auto v0 = *(v->host + (x_v - min0_v) * stride0_v + (y_v - 1 - min1_v) * stride1_v); //v[(i-1)*bpl+j] 
      const auto v1 = *(v->host + (x_v - min0_v) * stride0_v + (y_v + 1 - min1_v) * stride1_v); //v[(i+1)*bpl+j]
      const auto v2 = *(v->host + (x_v - 1 - min0_v) * stride0_v + (y_v - min1_v) * stride1_v); //v[i*bpl+j-1]
      const auto v3 = *(v->host + (x_v + 1 - min0_v) * stride0_v + (y_v - min1_v) * stride1_v); //v[i*bpl+j+1]

      const auto Uav     = (u0 + u1 + u2 + u3)/4;
      const auto Vav     = (v0 + v1 + v2 + v3)/4;
      const auto P       = iix*Uav + iiy*Vav + iit;
      const auto D       = iix*iix + iiy*iiy + lambda*lambda;
      const auto tmp     = P/D;
      const auto utmp    = Uav - iix*tmp;
      const auto vtmp    = Vav - iiy*tmp;

      *(u->host + (x_u - min0_u) * stride0_u + (y_u - min1_u) * stride1_u) = utmp; //u[i*bpl+j]
      *(v->host + (x_v - min0_v) * stride0_v + (y_v - min1_v) * stride1_v) = vtmp; //v[i*bpl+j]

      if(IsU)
        *(uResult->host + (x_u - min0_u) * stride0_u + (y_u - min1_u) * stride1_u) = utmp;
      else
        *(vResult->host + (x_v - min0_v) * stride0_v + (y_v - min1_v) * stride1_v) = vtmp;
    }

    if(IsU) return *uResult;
    else return *vResult;

}

在我的主要内容中,我将其称为:

    const float lambda = 0.05;

    Image<uint8_t> input_1 = load_image(argv[1]);
    Image<uint8_t> input_2 = load_image(argv[1]);

    Var x("x"); //image indice in x direction
    Var y("y"); //image indice in y direction
    Var c("c"); //image number of channel

    //clamp to edge
    Func clamped_1("clamped_1"), clamped_2("clamped_2");
    clamped_1 = BoundaryConditions::repeat_edge(input_1);
    clamped_2 = BoundaryConditions::repeat_edge(input_2);

    //convert rgb image to grayscale image
    Func f_1("f_1"), f_2("f_2");
    f_1(x,y,c) = min(0.299f * clamped_1(x,y,0) + 0.587f * clamped_1(x,y,1) + 0.114f * clamped_1(x,y,2), 255.0f);
    f_2(x,y,c) = min(0.299f * clamped_2(x,y,0) + 0.587f * clamped_2(x,y,1) + 0.114f * clamped_2(x,y,2), 255.0f);

    //gaussian bluring
    Image<float> kernel(5, 5);
    kernel(0, 1) = 0.000067; kernel(0, 1) = 0.001663; kernel(0, 2) = 0.004706; kernel(0, 3) = 0.001663; kernel(0, 4) = 0.000067;
    kernel(1, 0) = 0.001663; kernel(1, 1) = 0.041482; kernel(1, 2) = 0.117381; kernel(1, 3) = 0.041482; kernel(1, 4) = 0.001663;
    kernel(2, 0) = 0.004706; kernel(2, 1) = 0.117381; kernel(2, 2) = 0.332152; kernel(2, 3) = 0.117381; kernel(2, 4) = 0.004706;
    kernel(3, 0) = 0.001663; kernel(3, 1) = 0.041482; kernel(3, 2) = 0.117381; kernel(3, 3) = 0.041482; kernel(3, 4) = 0.001663;
    kernel(4, 0) = 0.000067; kernel(4, 1) = 0.001663; kernel(4, 2) = 0.004706; kernel(4, 3) = 0.001663; kernel(4, 4) = 0.000067;
    RDom r(kernel);
    Func I1("I1"), I2("I2");
    I1(x, y, c) = sum(f_1(x+r.x, y+r.y, c) * kernel(r.x, r.y));
    I2(x, y, c) = sum(f_2(x+r.x, y+r.y, c) * kernel(r.x, r.y));

    //inputs derivations
    Func Ix("Ix"), Iy("Iy"), It("It");
    Ix(x, y, c) = (-I1(x-1, y-1, c) + I1(x, y-1, c) - I1(x-1, y, c) + I1(x, y, c)) + 
                  (-I2(x-1, y-1, c) + I2(x, y-1, c) - I2(x-1, y, c) + I2(x, y, c));
    Iy(x, y, c) = (-I1(x-1, y-1, c) - I1(x, y-1, c) + I1(x-1, y, c) + I1(x, y, c)) + 
                  (-I2(x-1, y-1, c) - I2(x, y-1, c) + I2(x-1, y, c) + I2(x, y, c));
    It(x, y, c) = (-I1(x-1, y-1, c) - I1(x, y-1, c) - I1(x-1, y, c) - I1(x, y, c)) - 
                  ( I2(x-1, y-1, c) + I2(x, y-1, c) + I2(x-1, y, c) + I2(x, y, c));

    Func u("u"), v("v");
    u(x, y, c) = 0; v(x, y, c) = 0;

    Func callU("callU"), callV("callV");
    vector<ExternFuncArgument> argsU(9);
    argsU[0] = Ix;                   argsU[1] = Iy;            argsU[2] = It;
    argsU[3] = u;                    argsU[4] = v;             argsU[5] = input_1.width();
    argsU[6] = input_1.height();     argsU[7] = lambda;        argsU[8] = 1;
    vector<ExternFuncArgument> argsV(9);
    argsV[0] = Ix;                   argsV[1] = Iy;            argsV[2] = It;
    argsV[3] = u;                    argsV[4] = v;             argsV[5] = input_1.width();
    argsV[6] = input_1.height();     argsV[7] = lambda;        argsV[8] = 0;
    vector<Type> types(9);
    types[0] = Ix.output_types()[0]; types[1] = Iy.output_types()[0]; types[2] = It.output_types()[0];
    types[3] = u.output_types()[0];  types[4] = v.output_types()[0];  types[5] = Int(32);
    types[6] = Int(32);              types[7] = Float(32);            types[8] = UInt(8);

    callU.define_extern("compute_flow", argsU, types, 1);
    callV.define_extern("compute_flow", argsV, types, 1);

    Func outputU("outputU"), outputV("outputV");
    outputU(x, y, c) = callU(x, y, c);
    outputV(x, y, c) = callV(x, y, c);

    Ix.compute_root();
    Iy.compute_root();
    It.compute_root();
    outputU.compile_jit();
    outputV.compile_jit();

    Image<uint8_t> out_u = outputU.realize(input_1.width(), input_1.height(), input_1.channels());
    Image<uint8_t> out_v = outputV.realize(input_1.width(), input_1.height(), input_1.channels());

当我不安排你和v时,所有编译都很好,但是我得到了这个运行时错误:

  

错误:Func u无法安排内联计算,因为它用于外部计算的函数callU Aborted(core dumped)

但是,当我将你和你安排为:

    u.compute_root();
    v.compute_root();

我收到以下运行时错误:

  

/home/rokiatou/Documents/Thèse/halide/Halide-master/src/BoundsInference.cpp:283内部错误条件失败:b.empty()|| b.size()== func_args.size()中止(核心转储)

我不确定我的外部函数compute_flow是否定义良好。我无法解决调度问题。

欢迎任何帮助。谢谢。

1 个答案:

答案 0 :(得分:2)

我假设变量x,y,c被声明为Halide :: Var;如果是这种情况,那么上面列出的错误实际上是准确的。

您可以使用c ++ for循环向Halide :: Func添加更新定义,但是您需要使用常规的C或C ++样式变量来实现,至少在一个变量点中;上面的代码只是一遍又一遍地引用相同的变量。

关于访问您列出的像素,&#34;(x-1,y,c),(x + 1,y,c),(x,y,c),(x,y-1, C)[...]&#34;这是一个访问外部Halide函数中的buffer_t *内的值的示例:

extern "C" void 
auto get_something_done_in_c(buffer_t* my_buffer, const int32_t dx, const int32_t dy)
{
    const auto min0       = my_buffer->min[0];
    const auto internal_x = min0;

    const auto min1       = my_buffer->min[1];
    const auto internal_y = min1;

    const auto stride0 = my_buffer->stride[0];
    const auto stride1 = my_buffer->stride[1];

    const auto x1 = dx + internal_x;
    const auto y1 = dy + internal_y;

    const auto value =  *(my_buffer->host + (x1 - min0) * stride0 + (y1 - min1) * stride1);

    return value;
}
HalideExtern_3(int32_t, get_something_done_in_c, buffer_t, int32_t, int32_t);

稍微解释如何获得&#39;值...我使用my_buffer变量来访问数据指针,称为主机。该主机变量返回您关心的数据的指针地址。由于这是一维缓冲区,因此使用x和y坐标,乘以stride0和stride1以获取数据指针中的地址偏移量,以获得您关注的值。