我在Tensorflow(TF)中创建了一个新操作,并将其注册为GPU。以下是名为pixel_selector.cc
的C ++文件的一部分:
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
using namespace tensorflow;
REGISTER_OP("PixelSelector")
.Input("in: float32")
.Input("coord: float32")
.Input("stride: int16")
.Output("out: float32")
class PixelSelectorOp : public OpKernel {
public:
explicit PixelSelectorOp(OpKernelConstruction* context) : OpKernel(context) {}
void Compute(OpKernelContext* context) override
{
// Grab the input tensor
const Tensor& input_tensor = context->input(0);
const Tensor& input_tensor1 = context->input(1);
const Tensor& input_tensor2 = context->input(2);
...
std::cout << "Batch size: " << batch << std::endl;
std::cout << "Depth size: " << depth << std::endl;
std::cout << "Width size: " << width << std::endl;
std::cout << "Height size: " << height << std::endl;
...
std::cout << "Num Pixels " << pixels << std::endl;
std::cout << "Num Coord " << num_coord << std::endl;
...
}
};
REGISTER_KERNEL_BUILDER(Name("PixelSelector").Device(DEVICE_GPU), PixelSelectorOp);
该文件已经编译
TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
nvcc -std=c++11 -c -o pixel_selector.o pixel_selector.cc -I $TF_INC -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC
g++ -std=c++11 -shared -o pixel_selector.so pixel_selector.o -I $TF_INC -fPIC -lcudart -L /usr/local/cuda-8.0/lib64/
但是当在TF会话中运行时,我得到以下输出:
Batch size: Batch size: 1111
Depth size: 141
Width size: 300
Height size: 300
Num Pixels 1
Num Coord 3
Segmentation fault (core dump created)
看起来是并发访问。有什么想法吗?