每次运行caffe时,我的模型在图像数据集上运行时计算的初始损失都保持不变。这种行为不是很奇怪,因为当我们每次运行模型时,最初的损失应该是不同的(至少是轻微的)。
如果我在损耗层中给出SoftmaxWithLoss函数,则损失值在进一步的迭代中保持不变。此外,如果我在反卷积层中给出lr_mult = 0,则损失在迭代中显得恒定。如果我给lr_mult = 1,那么损失值会改变,但最终会在几千次迭代后满足初始损失值。
这方面有什么想法吗?
在这方面,我们非常感谢任何帮助。
以下是我的解算器文件:
test_iter: 100
test_interval: 100 # py solving tests
display: 100
#average_loss: 100
lr_policy: "fixed"
base_lr: 0.0000001
momentum: 0.5
iter_size: 1
# base_lr: 1e-9
# momentum: 0.99
# iter_size: 1
max_iter: 1000000
#weight_decay: 0.0005
snapshot: 1000
snapshot_diff: true
#test_initialization: false
solver_mode: GPU
以下是我的卷积神经网络的架构。
# Input 128 640 3
# Conv1 (kernel=3) 126 638 64
# ReLU
# Conv2 (kernel=3) 124 636 128
# ReLU
# Conv3 (kernel=3) 122 634 256
# ReLU
# Pool1 (kernel=2) 61 317 256
# Conv4 (kernel=4) 58 314 512
# ReLU
# Conv5 (kernel=4) 55 311 1024
# ReLU
# Conv6 (kernel=4) 52 308 512
# ReLU
# Pool2 (kernel=2) 26 154 512
# Conv7 (kernel=4,stride=2,pad=3) 15 79 5
# ReLU
# Decon1 (kernel=16,stride=8,pad=3) 128 640 5
# ReLU
# Loss
name: "Conv-Deconv-Net"
layer {
name: "data"
type: "Data"
top: "data"
include {
phase: TRAIN
}
data_param {
source: "F:/shripati/v9/Models/3_Conv_Deconv_Arch_SoftmaxWithLoss/Data/training_lmdb_files/training_files_orig_IMG_LMDB_olpywm"
batch_size: 4
backend: LMDB
}
}
layer {
name: "label"
type: "Data"
top: "label"
include {
phase: TRAIN
}
data_param {
source: "F:/shripati/v9/Models/3_Conv_Deconv_Arch_SoftmaxWithLoss/Data/training_lmdb_files/training_files_orig_LBL_LMDB_olpywm"
batch_size: 4
backend: LMDB
}
}
layer {
name: "data"
type: "Data"
top: "data"
include {
phase: TEST
}
data_param {
source: "F:/shripati/v9/Models/3_Conv_Deconv_Arch_SoftmaxWithLoss/Data/testing_lmdb_files/testing_files_IMG_LMDB_olpywm"
batch_size: 4
backend: LMDB
}
}
layer {
name: "label"
type: "Data"
top: "label"
include {
phase: TEST
}
data_param {
source: "F:/shripati/v9/Models/3_Conv_Deconv_Arch_SoftmaxWithLoss/Data/testing_lmdb_files/testing_files_LBL_LMDB_olpywm"
batch_size: 4
backend: LMDB
}
}
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 64
kernel_size: 3
stride: 1
pad: 0
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "relu1"
}
layer {
name: "conv2"
type: "Convolution"
bottom: "relu1"
top: "conv2"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 3
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "relu2"
}
layer {
name: "conv3"
type: "Convolution"
bottom: "relu2"
top: "conv3"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.001
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "relu3"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "relu3"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv4"
type: "Convolution"
bottom: "pool1"
top: "conv4"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 4
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.001
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "relu4"
}
layer {
name: "conv5"
type: "Convolution"
bottom: "relu4"
top: "conv5"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 256
pad: 0
kernel_size: 4
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.001
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "relu5"
}
layer {
name: "conv6"
type: "Convolution"
bottom: "relu5"
top: "conv6"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 128
pad: 0
kernel_size: 4
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.001
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "conv6"
top: "relu6"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "relu6"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv7"
type: "Convolution"
bottom: "pool2"
top: "conv7"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
convolution_param {
num_output: 5
pad: 3
kernel_size: 4
stride: 2
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.001
}
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "conv7"
top: "relu7"
}
layer {
name: "deconv1"
type: "Deconvolution"
bottom: "relu7"
top: "deconv1"
param {
lr_mult: 1
}
convolution_param {
num_output: 5
bias_term: false
kernel_size: 16
stride: 8
pad: 0
weight_filler {
type: "bilinear"
}
}
}
layer {
name: "relu8"
type: "ReLU"
bottom: "deconv1"
top: "relu8"
}
#layer {
# name: "crop"
# type: "Crop"
# bottom: "deconv3"
# bottom: "data"
# top: "score"
#}
layer {
name: "prob"
type: "SoftmaxWithLoss"
bottom: "relu8"
bottom: "label"
top: "loss"
loss_param {
# ignore_label: 255
# normalize: true
normalize: false
}
}
答案 0 :(得分:0)
您的base_lr
似乎太小了。保持在0.00001
开头,并尝试将momentum
保持在0.9
。如果你的学习速度太小,收敛会很慢,如果它太高,梯度下降会超过局部最小值(当你看到你的损失飙升时)。所以你必须迭代地达到最佳值。没有神奇的数字。