我想注册一个新的Op(doc),为此,我开发了下面粘贴的文件,Op只是将输入复制到输出中。此Op在CPU上成功运行,即在REGISTER_KERNEL_BUILDER
中注册(.Device(DEVICE_CPU)
)。
但是,当我切换到.Device(DEVICE_GPU)
时,遇到了细分错误,到目前为止,我的实验都未能成功指出这种情况的发生原因。
有指针吗?
我使用TensorFlow-gpu==1.4
上安装的pip
。
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/shape_inference.h"
#include <vector>
using namespace tensorflow;
using CPUDevice = Eigen::ThreadPoolDevice;
using GPUDevice = Eigen::GpuDevice;
REGISTER_OP("ForwardFaceIndexMap")
.Input( "face_index_map: int32")
.Output("out_face_index_map: int32")
;
class ForwardFaceIndexMapOp : public OpKernel {
public:
explicit ForwardFaceIndexMapOp(OpKernelConstruction* context) : OpKernel(context) {}
void Compute(OpKernelContext* context) override {
const Tensor& inn_face_index_map = context->input(0);
Tensor* out_face_index_map = NULL;
OP_REQUIRES_OK(context, context->allocate_output(0, inn_face_index_map.shape(), &out_face_index_map));
auto inn_face_index_map_flat = inn_face_index_map. flat<int >();
auto out_face_index_map_flat = out_face_index_map-> flat<int >();
for (int i = 0; i < inn_face_index_map_flat.size(); i++)
out_face_index_map_flat(i) = inn_face_index_map_flat(i);
}
};
REGISTER_KERNEL_BUILDER(
Name("ForwardFaceIndexMap")
.Device(DEVICE_CPU),
ForwardFaceIndexMapOp
);