Question

我想注册一个新的Op（doc），为此，我开发了下面粘贴的文件，Op只是将输入复制到输出中。此Op在CPU上成功运行，即在REGISTER_KERNEL_BUILDER中注册（.Device(DEVICE_CPU)）。

但是，当我切换到.Device(DEVICE_GPU)时，遇到了细分错误，到目前为止，我的实验都未能成功指出这种情况的发生原因。

有指针吗？

我使用TensorFlow-gpu==1.4上安装的pip。

#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/shape_inference.h"

#include <vector>

using namespace tensorflow;

using CPUDevice = Eigen::ThreadPoolDevice;
using GPUDevice = Eigen::GpuDevice;

REGISTER_OP("ForwardFaceIndexMap")
    .Input( "face_index_map:      int32")
    .Output("out_face_index_map:  int32")
;

class ForwardFaceIndexMapOp : public OpKernel {
    public:
        explicit ForwardFaceIndexMapOp(OpKernelConstruction* context) : OpKernel(context) {}

        void Compute(OpKernelContext* context) override {

            const Tensor& inn_face_index_map = context->input(0);

            Tensor* out_face_index_map = NULL;
            OP_REQUIRES_OK(context, context->allocate_output(0, inn_face_index_map.shape(), &out_face_index_map));

            auto inn_face_index_map_flat = inn_face_index_map.  flat<int  >();
            auto out_face_index_map_flat = out_face_index_map-> flat<int  >();

            for (int i = 0;             i  < inn_face_index_map_flat.size(); i++)
                out_face_index_map_flat(i) = inn_face_index_map_flat(i);

    }
};

REGISTER_KERNEL_BUILDER(
    Name("ForwardFaceIndexMap")
    .Device(DEVICE_CPU),
    ForwardFaceIndexMapOp
);

TF new OP-在CPU上运行，在GPU上失败

0 个答案: