从python代码调用该操作时,将执行C ++ .so中的以下代码:
REGISTER_OP("Winograd2x2ImTrans")
.Input("input1: float")
.Output("output: float")
.SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) {
c->set_output(0, c->input(0));
printf("This line is printed on the console\n");
return Status::OK();
});
但是未调用在REGISTER_KERNEL_BUILDER中注册的Op:
class Winograd2x2ImTransCudaOp : public OpKernel {
public:
explicit Winograd2x2ImTransCudaOp(OpKernelConstruction* context) : OpKernel(context) {}
void Compute(OpKernelContext* context) override {
printf("This line is not printed on the console\n");
//Nor is the below exception thrown
throw "THROW THIS";
// Grab the input tensor
const Tensor& I_tensor = context->input(0);
auto Input = I_tensor.flat<float>();
// OP_REQUIRES(context, iA_tensor.dims()==2 && iB_tensor.dims()==2);
int B = I_tensor.dim_size(0);
int H = I_tensor.dim_size(1);
int W = I_tensor.dim_size(2);
int C = I_tensor.dim_size(3);
int n_patch_width = (W + 1) / 2;
int n_patch_height = (H + 1) / 2;
// Create an output tensor
Tensor* O_tensor = NULL;
OP_REQUIRES_OK(context, context->allocate_output(0, TensorShape{16, B, n_patch_height, n_patch_width, C}, &O_tensor));
auto Output = O_tensor->template flat<float>();
// Set all but the first element of the output tensor to 0.
Winograd2x2ImTransComputeLauncher(Input.data(), Output.data(), C, B, H, W, 1, 1);
}
};
REGISTER_KERNEL_BUILDER(Name("Winograd2x2ImTrans").Device(DEVICE_GPU), Winograd2x2ImTransCudaOp);
我用来创建共享库文件的代码是:
TF_INC=/usr/lib/python3.7/site-packages/tensorflow/include
LD_LIBRARY_PATH=/opt/cuda/lib64
TF_LIB=/usr/lib/python3.7/site-packages/tensorflow
all: winograd2x2_conv_op.so winograd2x2_conv_grad_op.so
forward: winograd2x2_conv_op.so
backward: winograd2x2_conv_grad_op.so
winograd2x2_conv_op.so: winograd2x2_conv_op.cc winograd2x2_conv_cuda.cu.o
g++ -std=c++11 -shared -o winograd2x2_conv_op.so winograd2x2_conv_op.cc winograd2x2_conv_cuda.cu.o -I $(TF_INC) -fPIC -L$(LD_LIBRARY_PATH) -lcudart -O2 -lcublas -L$(TF_LIB) -ltensorflow_framework -D_GLIBCXX_USE_CXX11_ABI=1
winograd2x2_conv_cuda.cu.o: winograd2x2_conv_cuda.cu.cc
nvcc -std=c++11 -c -o winograd2x2_conv_cuda.cu.o winograd2x2_conv_cuda.cu.cc -I $(TF_INC) -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch sm_52 -L$(TF_LIB) -ltensorflow_framework -D_GLIBCXX_USE_CXX11_ABI=1 # --use_fast_math
winograd2x2_conv_grad_op.so: winograd2x2_conv_grad_op.cc winograd2x2_conv_grad_cuda.cu.o
g++ -std=c++11 -shared -o winograd2x2_conv_grad_op.so winograd2x2_conv_grad_op.cc winograd2x2_conv_grad_cuda.cu.o -I $(TF_INC) -fPIC -L$(LD_LIBRARY_PATH) -lcudart -O2 -lcublas -L$(TF_LIB) -ltensorflow_framework -D_GLIBCXX_USE_CXX11_ABI=1
winograd2x2_conv_grad_cuda.cu.o: winograd2x2_conv_grad_cuda.cu.cc
nvcc -std=c++11 -c -o winograd2x2_conv_grad_cuda.cu.o winograd2x2_conv_grad_cuda.cu.cc -I $(TF_INC) -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch sm_52 -L$(TF_LIB) -ltensorflow_framework -D_GLIBCXX_USE_CXX11_ABI=1 # --use_fast_math
clean:
rm *.o *.so
感谢您提供任何有助于识别/修复问题的帮助。
谢谢!