TF new OP-在CPU上运行,在GPU上失败

时间:2018-07-30 21:20:41

标签: python tensorflow operator-overloading

我想注册一个新的Op(doc),为此,我开发了下面粘贴的文件,Op只是将输入复制到输出中。此Op在CPU上成功运行,即在REGISTER_KERNEL_BUILDER中注册(.Device(DEVICE_CPU))。

但是,当我切换到.Device(DEVICE_GPU)时,遇到了细分错误,到目前为止,我的实验都未能成功指出这种情况的发生原因。

有指针吗?

我使用TensorFlow-gpu==1.4上安装的pip

#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/shape_inference.h"

#include <vector>

using namespace tensorflow;

using CPUDevice = Eigen::ThreadPoolDevice;
using GPUDevice = Eigen::GpuDevice;

REGISTER_OP("ForwardFaceIndexMap")
    .Input( "face_index_map:      int32")
    .Output("out_face_index_map:  int32")
;

class ForwardFaceIndexMapOp : public OpKernel {
    public:
        explicit ForwardFaceIndexMapOp(OpKernelConstruction* context) : OpKernel(context) {}

        void Compute(OpKernelContext* context) override {

            const Tensor& inn_face_index_map = context->input(0);

            Tensor* out_face_index_map = NULL;
            OP_REQUIRES_OK(context, context->allocate_output(0, inn_face_index_map.shape(), &out_face_index_map));

            auto inn_face_index_map_flat = inn_face_index_map.  flat<int  >();
            auto out_face_index_map_flat = out_face_index_map-> flat<int  >();

            for (int i = 0;             i  < inn_face_index_map_flat.size(); i++)
                out_face_index_map_flat(i) = inn_face_index_map_flat(i);

    }
};

REGISTER_KERNEL_BUILDER(
    Name("ForwardFaceIndexMap")
    .Device(DEVICE_CPU),
    ForwardFaceIndexMapOp
);

0 个答案:

没有答案