tensorflow自定义op梯度

时间:2016-03-24 15:40:00

标签: tensorflow

我们想在张量流中创建自定义图层。因此,我们决定简单地从玩具示例开始:复制层。经过一些尝试和错误后,我们得到了渐变会传递正确值的点。但是,在第二次迭代中,特征得到了NAN。 这可能是一个简单的错误,但目前我无法看到它。

总的来说,我有两个问题:

  1. 有人可以在这里发现问题以及如何解决问题吗?
  2. 调试张量流会话的好方法是什么?
  3. copy_op.cc

    #include "tensorflow/core/framework/op.h"
    #include "tensorflow/core/framework/op_kernel.h"
    #include <stdio.h>
    
    namespace tensorflow {
    
    
    
    typedef Eigen::ThreadPoolDevice CPUDevice;
    typedef Eigen::GpuDevice GPUDevice;
    
    template<typename Device, typename T>
    class MyCopyOp: public OpKernel {
    public:
        explicit MyCopyOp(OpKernelConstruction* context) :
                OpKernel(context) {
        }
    
        void Compute(OpKernelContext* context) override {
            const Tensor& input = context->input(0);
            auto in_flat = input.flat<T>();
    
            printf("Debug MyCopyOp Features: %s \n",input.DebugString().c_str());
    
            Tensor* output = nullptr;
            OP_REQUIRES_OK(context,
                    context->allocate_output(0, input.shape(), &output));
    
            auto out_flat = output->flat<T>();
            out_flat.setZero();
    
            for (int d = 0; d < input.dims(); ++d) {
                for (int i = 0; i < input.dim_size(d); ++i) {
                    out_flat(d * input.dim_size(d) + i) = in_flat(
                            d * input.dim_size(d) + i);
                }
            }
    
            printf("Debug MyCopyOp Output: %s \n",output->DebugString().c_str());
        }
    
    };
    
    
    template<typename Device, typename T>
    class MyCopyGradOp: public OpKernel {
    public:
        explicit MyCopyGradOp(OpKernelConstruction* context) :
                OpKernel(context) {
    
        }
    
        void Compute(OpKernelContext* context) override {
            printf("called MyCopyGradOp.Compute() \n");
            const Tensor& gradients = context->input(0);
            const Tensor& features = context->input(1);
            printf("Debug MyCopyOpGrad Gradients: %s \n",gradients.DebugString().c_str());
            printf("Debug MyCopyOpGrad Features: %s \n",features.DebugString().c_str());
    
            TensorShape output_shape = features.shape();
    
            Tensor* output = nullptr;
            OP_REQUIRES_OK(context,
                    context->allocate_output(0, output_shape, &output));
            output->flat<T>().setZero();
    
            const T* btm_ptr = gradients.flat<T>().data();
            T* top_ptr = output->flat<T>().data();
    
            for (int i = 0; i < gradients.NumElements(); ++i) {
                top_ptr[i] = btm_ptr[i];
            }
    
            printf("Debug MyCopyOpGrad Output: %s \n",output->DebugString().c_str());
            printf("---------------------------------- \n");
        }
    
    };
    
    
    REGISTER_OP("MyCopy")
    .Input("features: T")
    .Output("output: T")
    .Attr("T: realnumbertype")
    .Doc(R"doc(
    Copies all input values to the output
    )doc");
    
    REGISTER_OP("MyCopyGrad")
    .Input("gradients: T")
    .Input("features: T")
    .Output("backprops: T")
    .Attr("T: realnumbertype")
    .Doc(R"doc(
    TODO!!
    )doc");
    
    
    #define REGISTER_MYCOPY_KERNELS(type)                                           \
      REGISTER_KERNEL_BUILDER(                                                      \
          Name("MyCopy").Device(DEVICE_CPU).TypeConstraint<type>("T"),              \
          MyCopyOp<Eigen::ThreadPoolDevice, type>);                                 \
      REGISTER_KERNEL_BUILDER(                                                      \
          Name("MyCopyGrad").Device(DEVICE_CPU).TypeConstraint<type>("T"),          \
          MyCopyGradOp<Eigen::ThreadPoolDevice, type>);                             //  \
      // REGISTER_KERNEL_BUILDER(                                                      \
      //     Name("MyCopy").Device(DEVICE_GPU).TypeConstraint<type>("T"),              \
      //     MyCopyOp<Eigen::GpuDevice, type>);                                        \
      // REGISTER_KERNEL_BUILDER(                                                      \
      //     Name("MyCopyGrad").Device(DEVICE_GPU).TypeConstraint<type>("T"),          \
      //     MyCopyGradOp<Eigen::GpuDevice, type>);                                
    
    
    REGISTER_MYCOPY_KERNELS(float); 
    REGISTER_MYCOPY_KERNELS(int);
    REGISTER_MYCOPY_KERNELS(double);
    
    
    }
    

    我们使用简单的MNIST示例作为基础:

    layer_test.py

    from tensorflow.examples.tutorials.mnist import input_data
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    
    import tensorflow as tf
    from tensorflow.python.framework import ops
    copy_op_module = tf.load_op_library('copy_op.so')
    
    @ops.RegisterGradient("MyCopy")
    def _CopyOpGrad(op, grad):
      return copy_op_module.my_copy_grad(grad,op.inputs[0])
    
    sess = tf.InteractiveSession()
    
    x = tf.placeholder(tf.float32, shape=[None, 784])
    y_ = tf.placeholder(tf.float32, shape=[None, 10])
    
    W = tf.Variable(tf.zeros([784,10]))
    b = tf.Variable(tf.zeros([10]))
    
    sess.run(tf.initialize_all_variables())
    
    y1 = tf.nn.softmax(tf.matmul(x,W) + b)
    y = copy_op_module.my_copy(y1)            //Here: MyCopy Layer is inserted
    
    cross_entropy = -tf.reduce_sum(y_*tf.log(y))
    
    train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
    
    for i in range(2):
      batch = mnist.train.next_batch(50)
      train_step.run(feed_dict={x: batch[0], y_: batch[1]})
    
    correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}))
    

    编译

    TF_INC=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_include())')
    TF_LIB=$(python -c 'import tensorflow as tf; print(tf.sysconfig.get_lib())')
    g++ -std=c++11 -shared copy_op.cc -o copy_op.so -I $TF_INC -L $TF_LIB -fPIC -Wl,-rpath $TF_LIB
    

    输出:

    Debug MyCopyOp Features: Tensor<type: float shape: [50,10] values: 0.1 0.1 0.1...> 
    Debug MyCopyOp Output: Tensor<type: float shape: [50,10] values: 0.1 0.1 0.1...> 
    called MyCopyGradOp.Compute() 
    Debug MyCopyOpGrad Gradients: Tensor<type: float shape: [50,10] values: -0 -0 -0...> 
    Debug MyCopyOpGrad Features: Tensor<type: float shape: [50,10] values: 0.1 0.1 0.1...> 
    Debug MyCopyOpGrad Output: Tensor<type: float shape: [50,10] values: -0 -0 -0...> 
    ---------------------------------- 
    Debug MyCopyOp Features: Tensor<type: float shape: [50,10] values: nan nan nan...> 
    Debug MyCopyOp Output: Tensor<type: float shape: [50,10] values: nan nan nan...> 
    called MyCopyGradOp.Compute() 
    Debug MyCopyOpGrad Gradients: Tensor<type: float shape: [50,10] values: nan nan nan...> 
    Debug MyCopyOpGrad Features: Tensor<type: float shape: [50,10] values: nan nan nan...> 
    Debug MyCopyOpGrad Output: Tensor<type: float shape: [50,10] values: nan nan nan...> 
    ---------------------------------- 
    Debug MyCopyOp Features: Tensor<type: float shape: [10000,10] values: nan nan nan...> 
    Debug MyCopyOp Output: Tensor<type: float shape: [10000,10] values: nan nan nan...> 
    0.098
    

    提前多多感谢!

1 个答案:

答案 0 :(得分:0)

来自mrry的评论:使用 - tf.reduce_sum(y_ * tf.log(y))来计算交叉熵(使用tf.nn.softmax_cross_entropy_with_logits(y, y_)代替)并将W变量初始化为零通常导致已知稳定性问题比随机初始化更糟糕的结果。 This answer有关于权重初始化问题的更多详细信息。