如何在caffe中正确实现自定义丢失功能?

时间:2018-01-11 22:30:43

标签: c++ caffe euclidean-distance loss-function

我是caffe的初学者,我正在实现caffe的自定义丢失功能。但错误发生在运行测试中。

我的损失函数与欧几里德损失类似。最初的欧几里德损失方程如下。

enter image description here

我想实现2D距离损失。所以我做了如下的等式。

enter image description here

然后,运行测试结果是向后功能的错误。我认为回写传播的方法是错误的。但是,我不确定是什么问题。我只是简单地修改欧几里德损失以调整我的损失函数,并在反向传播中编写了我的损失函数的梯度。你知道错误发生的原因吗?

imgdist_loss_layer.cpp

syn clear
syn sync linebreaks=2
syn sync minlines=10
syn sync maxlines=1000

syn match colora '^=\+\n.*' display
syn match colorb '^-.*' display
syn match colorc '^[^=-].*' display

highlight colora ctermfg=DarkRed  cterm=bold
highlight colorb ctermfg=DarkGreen cterm=bold
highlight colorc ctermfg=DarkCyan

imgdist_loss_layer.cu

#include <vector>

#include "caffe/layers/imgdist_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

    template <typename Dtype>
    void ImgdistLossLayer<Dtype>::Reshape(
        const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
        LossLayer<Dtype>::Reshape(bottom, top);
        CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1))
            << "Inputs must have the same dimension.";
        diff_.ReshapeLike(*bottom[0]);
    }

// forward propagation
// calculate loss
    template <typename Dtype>
    void ImgdistLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
        const vector<Blob<Dtype>*>& top) {
        int count = bottom[0]->count() / 2;

        Dtype loss = 0;
        for (int i = 0; i < count; ++i) {
            Dtype x_sub = bottom[0]->cpu_data()[2 * i] - bottom[1]->cpu_data()[2 * i];
            Dtype y_sub = bottom[0]->cpu_data()[2 * i + 1] - bottom[1]->cpu_data()[2 * i + 1];

            loss += x_sub*x_sub + y_sub*y_sub;
        }
        loss = loss / bottom[0]->num();
        top[0]->mutable_cpu_data()[0] = loss;
    }

// back propagation
// calculate gradient
    template <typename Dtype>
    void ImgdistLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
        const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {

        for (int i = 0; i < 2; ++i) {
            if (propagate_down[i]) {
                const Dtype* bottom_data_0 = bottom[0]->cpu_data();
                const Dtype* bottom_data_1 = bottom[1]->cpu_data();
                Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();


                const int count = bottom[0]->count() / 2;
                for (int j = 0; j < count; ++j) {

                    const Dtype x_sub = bottom_data_0[2 * j] - bottom_data_1[2 * j];
                    const Dtype y_sub = bottom_data_0[2 * j + 1] - bottom_data_1[2 * j + 1];

                    const Dtype sign = (i == 0) ? 1 : -1;
                    const Dtype alpha_0 = (sign * Dtype(2) * x_sub + y_sub * y_sub) / bottom[i]->num();
                    const Dtype alpha_1 = (x_sub * x_sub + sign * Dtype(2) * y_sub) / bottom[i]->num();


                    bottom_diff[2 * j] = top[0]->cpu_diff()[0] * alpha_0;
                    bottom_diff[2 * j + 1] = top[0]->cpu_diff()[0] * alpha_1;

                } // j
            }
        } // i
    }


#ifdef CPU_ONLY
    STUB_GPU(ImgDistLossLayer);
#endif

    INSTANTIATE_CLASS(ImgdistLossLayer);
    REGISTER_LAYER_CLASS(ImgdistLoss);

} // namespace caffe

imgdist_loss_layer.hpp(仅更改班级名称)

#include <vector>

#include "caffe/layers/imgdist_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

// forward propagation loop
template <typename Dtype>
__global__ void imgdistLossForwardGPU(const int nthreads, 
    const Dtype* input_data, const Dtype* target, Dtype* loss) {
        CUDA_KERNEL_LOOP(i, nthreads) {
            loss[i] = (input_data[2 * i] - target[2 * i]) * (input_data[2 * i] - target[2 * i])
                + (input_data[2 * i + 1] - target[2 * i + 1]) * (input_data[2 * i + 1] - target[2 * i + 1]);

        }

}


// forward propagation
template <typename Dtype>
void ImgdistLossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
        const int count = bottom[0]->count() / 2;

        const Dtype* input_data = bottom[0]->gpu_data();
        const Dtype* target = bottom[1]->gpu_data();
        Dtype* loss_data = bottom[0]->mutable_gpu_diff();

        imgdistLossForwardGPU<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
            count, input_data, target, loss_data);
        CUDA_POST_KERNEL_CHECK;


        Dtype loss;
        caffe_gpu_asum(count, loss_data, &loss);

        loss = loss / bottom[0]->num();
        top[0]->mutable_cpu_data()[0] = loss;

    }

// back propagation loop
template <typename Dtype>
__global__ void imgdistLossBackwardGPU(const int nthreads,
    const Dtype* input_data, const Dtype* target, Dtype* diff, 
    const Dtype sign, const Dtype toploss, const Dtype bottom_num) {
        CUDA_KERNEL_LOOP(i, nthreads) {

            const Dtype x_sub = input_data[2 * i] - target[2 * i];
            const Dtype y_sub = input_data[2 * i + 1] - target[2 * i + 1];
            const Dtype alpha_0 = (sign * Dtype(2) * x_sub + y_sub * y_sub) / bottom_num;
            const Dtype alpha_1 = (x_sub * x_sub + sign * Dtype(2) * y_sub) / bottom_num;

            diff[2 * i] = toploss * alpha_0;
            diff[2 * i + 1] = toploss * alpha_1;
        }


    }

// back propagation
template <typename Dtype>
void ImgdistLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {

        for (int i = 0; i < 2; ++i) {
            if (propagate_down[i]) {
                const Dtype sign = (i == 0) ? 1 : -1;
                const int count = bottom[0]->count() / 2;
                const Dtype* input_data = bottom[0]->gpu_data();
                const Dtype* target = bottom[1]->gpu_data();
                const Dtype toploss = top[0]->cpu_diff()[0];
                const Dtype bottom_num = bottom[i]->num();
                Dtype* bottom_diff = bottom[i]->mutable_gpu_diff();


                imgdistLossBackwardGPU<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
                    count, input_data, target, bottom_diff, sign, toploss, bottom_num);
                CUDA_POST_KERNEL_CHECK;
            }
        }


    }

INSTANTIATE_LAYER_GPU_FUNCS(ImgdistLossLayer);

}  // namespace caffe

test_imgdist_loss_layer.cpp

#ifndef CAFFE_IMGDIST_LOSS_LAYER_HPP_
#define CAFFE_IMGDIST_LOSS_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

#include "caffe/layers/loss_layer.hpp"


namespace caffe {

    template <typename Dtype>
    class ImgdistLossLayer : public LossLayer<Dtype> {
    public:
        explicit ImgdistLossLayer(const LayerParameter& param)
            : LossLayer<Dtype>(param), diff_() {}
        virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
            const vector<Blob<Dtype>*>& top);

        virtual inline const char* type() const { return "ImgdistLoss"; }

        virtual inline bool AllowForceBackward(const int bottom_index) const {
            return true;
        }


    protected:
        /// @copydoc EuclideanLossLayer
        virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
            const vector<Blob<Dtype>*>& top);
        virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
            const vector<Blob<Dtype>*>& top);


        virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
            const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
        virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
            const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

        Blob<Dtype> diff_;
    };

}  // namespace caffe

#endif  // CAFFE_EUCLIDEAN_LOSS_LAYER_HPP_

错误日志如下。

#include <cmath>
#include <vector>

#include "gtest/gtest.h"

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/filler.hpp"
#include "caffe/layers/imgdist_loss_layer.hpp"

#include "caffe/test/test_caffe_main.hpp"
#include "caffe/test/test_gradient_check_util.hpp"

namespace caffe {

    template<typename TypeParam>
    class ImgdistLossLayerTest : public MultiDeviceTest<TypeParam> {
        typedef typename TypeParam::Dtype Dtype;


    protected:
        ImgdistLossLayerTest()
            : blob_bottom_data_(new Blob<Dtype>(10, 5, 1, 1)),
            blob_bottom_label_(new Blob<Dtype>(10, 5, 1, 1)),
            blob_top_loss_(new Blob<Dtype>()) {
            // fill the values
            FillerParameter filler_param;
            GaussianFiller<Dtype> filler(filler_param);
            filler.Fill(this->blob_bottom_data_);
            blob_bottom_vec_.push_back(blob_bottom_data_);
            filler.Fill(this->blob_bottom_label_);
            blob_bottom_vec_.push_back(blob_bottom_label_);
            blob_top_vec_.push_back(blob_top_loss_);
        }
        virtual ~ImgdistLossLayerTest() {
            delete blob_bottom_data_;
            delete blob_bottom_label_;
            delete blob_top_loss_;
        }

        void TestForward() {
            // Get the loss without a specified objective weight -- should be
            // equivalent to explicitly specifying a weight of 1.
            LayerParameter layer_param;
            ImgdistLossLayer<Dtype> layer_weight_1(layer_param);
            layer_weight_1.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
            const Dtype loss_weight_1 =
                layer_weight_1.Forward(this->blob_bottom_vec_, this->blob_top_vec_);

            // Get the loss again with a different objective weight; check that it is
            // scaled appropriately.
            const Dtype kLossWeight = 3.7;
            layer_param.add_loss_weight(kLossWeight);
            ImgdistLossLayer<Dtype> layer_weight_2(layer_param);
            layer_weight_2.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
            const Dtype loss_weight_2 =
                layer_weight_2.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
            const Dtype kErrorMargin = 1e-5;
            EXPECT_NEAR(loss_weight_1 * kLossWeight, loss_weight_2, kErrorMargin);
            // Make sure the loss is non-trivial.
            const Dtype kNonTrivialAbsThresh = 1e-1;
            EXPECT_GE(fabs(loss_weight_1), kNonTrivialAbsThresh);
        }

        Blob<Dtype>* const blob_bottom_data_;
        Blob<Dtype>* const blob_bottom_label_;
        Blob<Dtype>* const blob_top_loss_;
        vector<Blob<Dtype>*> blob_bottom_vec_;
        vector<Blob<Dtype>*> blob_top_vec_;
    };

    TYPED_TEST_CASE(ImgdistLossLayerTest, TestDtypesAndDevices);

    TYPED_TEST(ImgdistLossLayerTest, TestForward) {
        this->TestForward();
    }

    TYPED_TEST(ImgdistLossLayerTest, TestGradient) {
        typedef typename TypeParam::Dtype Dtype;
        LayerParameter layer_param;
        const Dtype kLossWeight = 3.7;
        layer_param.add_loss_weight(kLossWeight);
        ImgdistLossLayer<Dtype> layer(layer_param);
        layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
        GradientChecker<Dtype> checker(1e-2, 1e-2, 1701);
        checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
            this->blob_top_vec_);
    }

}

0 个答案:

没有答案