我已经调整了bvlc_reference_caffenet中的train_val.prototxt来在Caffe中实现VGG-16克隆,并且能够使用带有batch_size: 6
和base_lr: 0.0648 (~ 0.01 * sqrt(256/6) ~ 0.01 * sqrt(42))
的GTX 1050进行训练。但是,我想将输入数据从[0; 255]缩放到[0; 1],因为此CNN的目标平台具有有限的精度。为了扩展数据,我引入了scale: 0.00390625
参数(这取自在目标平台上运行良好的Caffe LeNet示例)。但是使用scale
参数,准确度不会增加(> 40000次迭代),并且在训练期间损失也不会改变。
如何使用scale
参数训练此CNN?
train_val.prototxt
name: "ES VGG"
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
scale: 0.00390625
mirror: true
crop_size: 224
mean_file: "/local/datasets/imagenet/ilsvrc12/imagenet_mean.binaryproto"
}
data_param {
source: "/local/datasets/imagenet/ilsvrc12_train_lmdb"
batch_size: 6
backend: LMDB
}
}
layer {
name: "data"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
scale: 0.00390625
mirror: false
crop_size: 224
mean_file: "/local/datasets/imagenet/ilsvrc12/imagenet_mean.binaryproto"
}
data_param {
source: "/local/datasets/imagenet/ilsvrc12_val_lmdb"
batch_size: 6
backend: LMDB
}
}
layer {
name: "conv1_1"
type: "Convolution"
bottom: "data"
top: "conv1_1"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1_1"
type: "ReLU"
bottom: "conv1_1"
top: "conv1_1"
}
layer {
name: "conv1_2"
type: "Convolution"
bottom: "conv1_1"
top: "conv1_2"
convolution_param {
num_output: 64
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu1_2"
type: "ReLU"
bottom: "conv1_2"
top: "conv1_2"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1_2"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv2_1"
type: "Convolution"
bottom: "pool1"
top: "conv2_1"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu2_1"
type: "ReLU"
bottom: "conv2_1"
top: "conv2_1"
}
layer {
name: "conv2_2"
type: "Convolution"
bottom: "conv2_1"
top: "conv2_2"
convolution_param {
num_output: 128
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu2_2"
type: "ReLU"
bottom: "conv2_2"
top: "conv2_2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2_2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv3_1"
type: "Convolution"
bottom: "pool2"
top: "conv3_1"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3_1"
type: "ReLU"
bottom: "conv3_1"
top: "conv3_1"
}
layer {
name: "conv3_2"
type: "Convolution"
bottom: "conv3_1"
top: "conv3_2"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3_2"
type: "ReLU"
bottom: "conv3_2"
top: "conv3_2"
}
layer {
name: "conv3_3"
type: "Convolution"
bottom: "conv3_2"
top: "conv3_3"
convolution_param {
num_output: 256
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu3_3"
type: "ReLU"
bottom: "conv3_3"
top: "conv3_3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3_3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv4_1"
type: "Convolution"
bottom: "pool3"
top: "conv4_1"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu4_1"
type: "ReLU"
bottom: "conv4_1"
top: "conv4_1"
}
layer {
name: "conv4_2"
type: "Convolution"
bottom: "conv4_1"
top: "conv4_2"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu4_2"
type: "ReLU"
bottom: "conv4_2"
top: "conv4_2"
}
layer {
name: "conv4_3"
type: "Convolution"
bottom: "conv4_2"
top: "conv4_3"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu4_3"
type: "ReLU"
bottom: "conv4_3"
top: "conv4_3"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4_3"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "conv5_1"
type: "Convolution"
bottom: "pool4"
top: "conv5_1"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu5_1"
type: "ReLU"
bottom: "conv5_1"
top: "conv5_1"
}
layer {
name: "conv5_2"
type: "Convolution"
bottom: "conv5_1"
top: "conv5_2"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "relu5_2"
type: "ReLU"
bottom: "conv5_2"
top: "conv5_2"
}
layer {
name: "conv5_3"
type: "Convolution"
bottom: "conv5_2"
top: "conv5_3"
convolution_param {
num_output: 512
kernel_size: 3
pad: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5_3"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 2
stride: 2
}
}
layer {
name: "fc6"
type: "InnerProduct"
bottom: "pool5"
top: "fc6"
inner_product_param {
num_output: 4096
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.01
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "fc6"
top: "fc6"
}
layer {
name: "drop6"
type: "Dropout"
bottom: "fc6"
top: "fc6"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc7"
type: "InnerProduct"
bottom: "fc6"
top: "fc7"
inner_product_param {
num_output: 4096
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.01
}
}
}
layer {
name: "relu7"
type: "ReLU"
bottom: "fc7"
top: "fc7"
}
layer {
name: "drop7"
type: "Dropout"
bottom: "fc7"
top: "fc7"
dropout_param {
dropout_ratio: 0.5
}
}
layer {
name: "fc8"
type: "InnerProduct"
bottom: "fc7"
top: "fc8"
inner_product_param {
num_output: 1000
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0.01
}
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "fc8"
bottom: "label"
top: "loss"
}
layer {
name: "accuracytop1"
type: "Accuracy"
bottom: "fc8"
bottom: "label"
top: "accuracytop1"
accuracy_param {
top_k: 1
}
include {
phase: TEST
}
}
layer {
name: "accuracytop5"
type: "Accuracy"
bottom: "fc8"
bottom: "label"
top: "accuracytop5"
accuracy_param {
top_k: 5
}
include {
phase: TEST
}
}
solver.prototxt
net: "models/es_vgg/train_val.prototxt"
test_iter: 1000
test_interval: 1000
base_lr: 0.0648
lr_policy: "step"
gamma: 0.1
stepsize: 100000
display: 20
max_iter: 18900000
momentum: 0.9
weight_decay: 0.0005
snapshot: 10000
snapshot_prefix: "models/es_vgg/es_vgg_train"
solver_mode: GPU
答案 0 :(得分:1)
如果您将输入除以sed -i 's/my/our/g; s/xyz/abc/g' text.txt
,则需要将第一个转换图层255
的权重乘以"conv1_1"
以补偿此更改。
请查看net surgery,看看如何做到这一点。
例如(在python中):
255
现在您需要使用import caffe
net = caffe.Net('models/es_vgg/train_val.prototxt', caffe.TEST) # no .caffemodel weights supplied - weights are randomly init
# scale kernels of first conv layer by 255
net.params['conv1_1'][0].data[...] = 255. * net.params['conv1_1'][0].data
# save the scaled weights
net.save('models/es_vgg/init_scaled.caffemodel')
开始培训。