我是caffe的初学者,我正在实现caffe的自定义丢失功能。但错误发生在运行测试中。
我的损失函数与欧几里德损失类似。最初的欧几里德损失方程如下。
我想实现2D距离损失。所以我做了如下的等式。
然后,运行测试结果是向后功能的错误。我认为回写传播的方法是错误的。但是,我不确定是什么问题。我只是简单地修改欧几里德损失以调整我的损失函数,并在反向传播中编写了我的损失函数的梯度。你知道错误发生的原因吗?
imgdist_loss_layer.cpp
syn clear
syn sync linebreaks=2
syn sync minlines=10
syn sync maxlines=1000
syn match colora '^=\+\n.*' display
syn match colorb '^-.*' display
syn match colorc '^[^=-].*' display
highlight colora ctermfg=DarkRed cterm=bold
highlight colorb ctermfg=DarkGreen cterm=bold
highlight colorc ctermfg=DarkCyan
imgdist_loss_layer.cu
#include <vector>
#include "caffe/layers/imgdist_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
template <typename Dtype>
void ImgdistLossLayer<Dtype>::Reshape(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
LossLayer<Dtype>::Reshape(bottom, top);
CHECK_EQ(bottom[0]->count(1), bottom[1]->count(1))
<< "Inputs must have the same dimension.";
diff_.ReshapeLike(*bottom[0]);
}
// forward propagation
// calculate loss
template <typename Dtype>
void ImgdistLossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
int count = bottom[0]->count() / 2;
Dtype loss = 0;
for (int i = 0; i < count; ++i) {
Dtype x_sub = bottom[0]->cpu_data()[2 * i] - bottom[1]->cpu_data()[2 * i];
Dtype y_sub = bottom[0]->cpu_data()[2 * i + 1] - bottom[1]->cpu_data()[2 * i + 1];
loss += x_sub*x_sub + y_sub*y_sub;
}
loss = loss / bottom[0]->num();
top[0]->mutable_cpu_data()[0] = loss;
}
// back propagation
// calculate gradient
template <typename Dtype>
void ImgdistLossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
const Dtype* bottom_data_0 = bottom[0]->cpu_data();
const Dtype* bottom_data_1 = bottom[1]->cpu_data();
Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
const int count = bottom[0]->count() / 2;
for (int j = 0; j < count; ++j) {
const Dtype x_sub = bottom_data_0[2 * j] - bottom_data_1[2 * j];
const Dtype y_sub = bottom_data_0[2 * j + 1] - bottom_data_1[2 * j + 1];
const Dtype sign = (i == 0) ? 1 : -1;
const Dtype alpha_0 = (sign * Dtype(2) * x_sub + y_sub * y_sub) / bottom[i]->num();
const Dtype alpha_1 = (x_sub * x_sub + sign * Dtype(2) * y_sub) / bottom[i]->num();
bottom_diff[2 * j] = top[0]->cpu_diff()[0] * alpha_0;
bottom_diff[2 * j + 1] = top[0]->cpu_diff()[0] * alpha_1;
} // j
}
} // i
}
#ifdef CPU_ONLY
STUB_GPU(ImgDistLossLayer);
#endif
INSTANTIATE_CLASS(ImgdistLossLayer);
REGISTER_LAYER_CLASS(ImgdistLoss);
} // namespace caffe
imgdist_loss_layer.hpp(仅更改班级名称)
#include <vector>
#include "caffe/layers/imgdist_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
// forward propagation loop
template <typename Dtype>
__global__ void imgdistLossForwardGPU(const int nthreads,
const Dtype* input_data, const Dtype* target, Dtype* loss) {
CUDA_KERNEL_LOOP(i, nthreads) {
loss[i] = (input_data[2 * i] - target[2 * i]) * (input_data[2 * i] - target[2 * i])
+ (input_data[2 * i + 1] - target[2 * i + 1]) * (input_data[2 * i + 1] - target[2 * i + 1]);
}
}
// forward propagation
template <typename Dtype>
void ImgdistLossLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const int count = bottom[0]->count() / 2;
const Dtype* input_data = bottom[0]->gpu_data();
const Dtype* target = bottom[1]->gpu_data();
Dtype* loss_data = bottom[0]->mutable_gpu_diff();
imgdistLossForwardGPU<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, input_data, target, loss_data);
CUDA_POST_KERNEL_CHECK;
Dtype loss;
caffe_gpu_asum(count, loss_data, &loss);
loss = loss / bottom[0]->num();
top[0]->mutable_cpu_data()[0] = loss;
}
// back propagation loop
template <typename Dtype>
__global__ void imgdistLossBackwardGPU(const int nthreads,
const Dtype* input_data, const Dtype* target, Dtype* diff,
const Dtype sign, const Dtype toploss, const Dtype bottom_num) {
CUDA_KERNEL_LOOP(i, nthreads) {
const Dtype x_sub = input_data[2 * i] - target[2 * i];
const Dtype y_sub = input_data[2 * i + 1] - target[2 * i + 1];
const Dtype alpha_0 = (sign * Dtype(2) * x_sub + y_sub * y_sub) / bottom_num;
const Dtype alpha_1 = (x_sub * x_sub + sign * Dtype(2) * y_sub) / bottom_num;
diff[2 * i] = toploss * alpha_0;
diff[2 * i + 1] = toploss * alpha_1;
}
}
// back propagation
template <typename Dtype>
void ImgdistLossLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
const Dtype sign = (i == 0) ? 1 : -1;
const int count = bottom[0]->count() / 2;
const Dtype* input_data = bottom[0]->gpu_data();
const Dtype* target = bottom[1]->gpu_data();
const Dtype toploss = top[0]->cpu_diff()[0];
const Dtype bottom_num = bottom[i]->num();
Dtype* bottom_diff = bottom[i]->mutable_gpu_diff();
imgdistLossBackwardGPU<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, input_data, target, bottom_diff, sign, toploss, bottom_num);
CUDA_POST_KERNEL_CHECK;
}
}
}
INSTANTIATE_LAYER_GPU_FUNCS(ImgdistLossLayer);
} // namespace caffe
test_imgdist_loss_layer.cpp
#ifndef CAFFE_IMGDIST_LOSS_LAYER_HPP_
#define CAFFE_IMGDIST_LOSS_LAYER_HPP_
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/layers/loss_layer.hpp"
namespace caffe {
template <typename Dtype>
class ImgdistLossLayer : public LossLayer<Dtype> {
public:
explicit ImgdistLossLayer(const LayerParameter& param)
: LossLayer<Dtype>(param), diff_() {}
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual inline const char* type() const { return "ImgdistLoss"; }
virtual inline bool AllowForceBackward(const int bottom_index) const {
return true;
}
protected:
/// @copydoc EuclideanLossLayer
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
Blob<Dtype> diff_;
};
} // namespace caffe
#endif // CAFFE_EUCLIDEAN_LOSS_LAYER_HPP_
错误日志如下。
#include <cmath>
#include <vector>
#include "gtest/gtest.h"
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/filler.hpp"
#include "caffe/layers/imgdist_loss_layer.hpp"
#include "caffe/test/test_caffe_main.hpp"
#include "caffe/test/test_gradient_check_util.hpp"
namespace caffe {
template<typename TypeParam>
class ImgdistLossLayerTest : public MultiDeviceTest<TypeParam> {
typedef typename TypeParam::Dtype Dtype;
protected:
ImgdistLossLayerTest()
: blob_bottom_data_(new Blob<Dtype>(10, 5, 1, 1)),
blob_bottom_label_(new Blob<Dtype>(10, 5, 1, 1)),
blob_top_loss_(new Blob<Dtype>()) {
// fill the values
FillerParameter filler_param;
GaussianFiller<Dtype> filler(filler_param);
filler.Fill(this->blob_bottom_data_);
blob_bottom_vec_.push_back(blob_bottom_data_);
filler.Fill(this->blob_bottom_label_);
blob_bottom_vec_.push_back(blob_bottom_label_);
blob_top_vec_.push_back(blob_top_loss_);
}
virtual ~ImgdistLossLayerTest() {
delete blob_bottom_data_;
delete blob_bottom_label_;
delete blob_top_loss_;
}
void TestForward() {
// Get the loss without a specified objective weight -- should be
// equivalent to explicitly specifying a weight of 1.
LayerParameter layer_param;
ImgdistLossLayer<Dtype> layer_weight_1(layer_param);
layer_weight_1.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
const Dtype loss_weight_1 =
layer_weight_1.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
// Get the loss again with a different objective weight; check that it is
// scaled appropriately.
const Dtype kLossWeight = 3.7;
layer_param.add_loss_weight(kLossWeight);
ImgdistLossLayer<Dtype> layer_weight_2(layer_param);
layer_weight_2.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
const Dtype loss_weight_2 =
layer_weight_2.Forward(this->blob_bottom_vec_, this->blob_top_vec_);
const Dtype kErrorMargin = 1e-5;
EXPECT_NEAR(loss_weight_1 * kLossWeight, loss_weight_2, kErrorMargin);
// Make sure the loss is non-trivial.
const Dtype kNonTrivialAbsThresh = 1e-1;
EXPECT_GE(fabs(loss_weight_1), kNonTrivialAbsThresh);
}
Blob<Dtype>* const blob_bottom_data_;
Blob<Dtype>* const blob_bottom_label_;
Blob<Dtype>* const blob_top_loss_;
vector<Blob<Dtype>*> blob_bottom_vec_;
vector<Blob<Dtype>*> blob_top_vec_;
};
TYPED_TEST_CASE(ImgdistLossLayerTest, TestDtypesAndDevices);
TYPED_TEST(ImgdistLossLayerTest, TestForward) {
this->TestForward();
}
TYPED_TEST(ImgdistLossLayerTest, TestGradient) {
typedef typename TypeParam::Dtype Dtype;
LayerParameter layer_param;
const Dtype kLossWeight = 3.7;
layer_param.add_loss_weight(kLossWeight);
ImgdistLossLayer<Dtype> layer(layer_param);
layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_);
GradientChecker<Dtype> checker(1e-2, 1e-2, 1701);
checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_,
this->blob_top_vec_);
}
}