Question

从python代码调用该操作时，将执行C ++ .so中的以下代码：

REGISTER_OP("Winograd2x2ImTrans")
    .Input("input1: float")
    .Output("output: float")
    .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
      c->set_output(0, c->input(0));
      printf("This line is printed on the console\n");
      return Status::OK();
    });

但是未调用在REGISTER_KERNEL_BUILDER中注册的Op：

class Winograd2x2ImTransCudaOp : public OpKernel {
public:
  explicit Winograd2x2ImTransCudaOp(OpKernelConstruction* context) : OpKernel(context) {}

  void Compute(OpKernelContext* context) override {
      printf("This line is not printed on the console\n");

      //Nor is the below exception thrown
      throw "THROW THIS";

    // Grab the input tensor
    const Tensor& I_tensor = context->input(0);
    auto Input = I_tensor.flat<float>();
    // OP_REQUIRES(context, iA_tensor.dims()==2 && iB_tensor.dims()==2);

    int B = I_tensor.dim_size(0);
    int H = I_tensor.dim_size(1);
    int W = I_tensor.dim_size(2);
    int C = I_tensor.dim_size(3);
    int n_patch_width = (W + 1) / 2;
    int n_patch_height = (H + 1) / 2;

    // Create an output tensor
    Tensor* O_tensor = NULL;
    OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape{16, B, n_patch_height, n_patch_width, C}, &O_tensor));
    auto Output = O_tensor->template flat<float>();
    // Set all but the first element of the output tensor to 0.
    Winograd2x2ImTransComputeLauncher(Input.data(), Output.data(), C, B, H, W, 1, 1); 
  }
};

REGISTER_KERNEL_BUILDER(Name("Winograd2x2ImTrans").Device(DEVICE_GPU), Winograd2x2ImTransCudaOp);

我用来创建共享库文件的代码是：

TF_INC=/usr/lib/python3.7/site-packages/tensorflow/include
LD_LIBRARY_PATH=/opt/cuda/lib64
TF_LIB=/usr/lib/python3.7/site-packages/tensorflow

all: winograd2x2_conv_op.so winograd2x2_conv_grad_op.so

forward: winograd2x2_conv_op.so

backward: winograd2x2_conv_grad_op.so

winograd2x2_conv_op.so: winograd2x2_conv_op.cc winograd2x2_conv_cuda.cu.o
    g++ -std=c++11 -shared -o winograd2x2_conv_op.so winograd2x2_conv_op.cc winograd2x2_conv_cuda.cu.o -I $(TF_INC) -fPIC -L$(LD_LIBRARY_PATH) -lcudart -O2 -lcublas -L$(TF_LIB) -ltensorflow_framework -D_GLIBCXX_USE_CXX11_ABI=1

winograd2x2_conv_cuda.cu.o: winograd2x2_conv_cuda.cu.cc
    nvcc -std=c++11 -c -o winograd2x2_conv_cuda.cu.o winograd2x2_conv_cuda.cu.cc -I $(TF_INC) -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch sm_52 -L$(TF_LIB) -ltensorflow_framework -D_GLIBCXX_USE_CXX11_ABI=1 # --use_fast_math

winograd2x2_conv_grad_op.so: winograd2x2_conv_grad_op.cc winograd2x2_conv_grad_cuda.cu.o
    g++ -std=c++11 -shared -o winograd2x2_conv_grad_op.so winograd2x2_conv_grad_op.cc winograd2x2_conv_grad_cuda.cu.o -I $(TF_INC) -fPIC -L$(LD_LIBRARY_PATH) -lcudart -O2 -lcublas -L$(TF_LIB) -ltensorflow_framework -D_GLIBCXX_USE_CXX11_ABI=1

winograd2x2_conv_grad_cuda.cu.o: winograd2x2_conv_grad_cuda.cu.cc
    nvcc -std=c++11 -c -o winograd2x2_conv_grad_cuda.cu.o winograd2x2_conv_grad_cuda.cu.cc -I $(TF_INC) -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch sm_52 -L$(TF_LIB) -ltensorflow_framework -D_GLIBCXX_USE_CXX11_ABI=1 # --use_fast_math

clean:
    rm *.o *.so

感谢您提供任何有助于识别/修复问题的帮助。

谢谢！

TensorFlow新操作：REGISTER_OP似乎不错，但REGISTER_KERNEL_BUILDER无法正常工作

0 个答案: