我正在从事一些有关图像标记的项目,并且对caffe还是陌生的。我从我读过的有关图像标记的论文之一中获得了Caffe代码,用于该数据的数据集是CIFAR100。 根据代码和层的结构,我必须将LMDB格式的图像特征和HDF5格式的标签分别输入代码中。我将功能转换为lmdb格式,并将标签转换为hdf5格式,但是我无法运行代码。我认为应该先对标签进行一些预处理,然后再将标签送入网络,但我不知道该怎么做!我给论文的作者写了一封电子邮件,但不幸的是他们还没有回复。
正如论文作者告诉我的那样,数据集应采用分层结构,因此我尝试使用具有两级分层结构的CIFAR100数据集:每个图像都必须具有标签,粗略标签和精细标签标签。 我将代码放在这里,因此,如果有人可以使用CIFAR100数据集运行它,请教我如何操作。
这是proto.txt文件:
name: "res_to_lstm"
layer {
name: "image_feature"
type: "Data"
top: "global_pool"
include { phase: TRAIN }
data_param {
source: "./examples/coarse-to-fine/Imagenet/ResNet/feature/global_pool_train"
batch_size: 100
backend: LMDB
}
}
layer {
name: "data"
type: "HDF5Data"
top: "cont_sentence"
top: "input_sentence"
top: "target_sentence"
include { phase: TRAIN }
hdf5_data_param {
source: "/home/destiny/Datasets/Imagenet/train_h5_caffe/hdf5_chunk_list_shuffle.txt"
batch_size: 10
}
}
layer {
name: "image_feature"
type: "Data"
top: "global_pool"
include { phase: TEST }
data_param {
source: "./examples/coarse-to-fine/Imagenet/ResNet/feature/global_pool_val"
batch_size: 100
backend: LMDB
}
}
layer {
name: "data"
type: "HDF5Data"
top: "cont_sentence"
top: "input_sentence"
top: "target_sentence"
include { phase: TEST }
hdf5_data_param {
source: "/home/destiny/Datasets/Imagenet/val_h5_caffe/hdf5_chunk_list.txt"
batch_size: 10
}
}
layer {
name: "embedding"
type: "Embed"
bottom: "input_sentence"
top: "embedded_input_sentence"
param {
lr_mult: 1
}
embed_param {
bias_term: false
input_dim: 1861
num_output: 1000
weight_filler {
type: "uniform"
min: -0.08
max: 0.08
}
}
}
layer {
name: "lstm1"
type: "LSTM"
bottom: "embedded_input_sentence"
bottom: "cont_sentence"
bottom: "global_pool"
top: "lstm1"
recurrent_param {
num_output: 1000
weight_filler {
type: "uniform"
min: -0.08
max: 0.08
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "predict"
type: "InnerProduct"
bottom: "lstm1"
top: "predict"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 1861
weight_filler {
type: "uniform"
min: -0.08
max: 0.08
}
bias_filler {
type: "constant"
value: 0
}
axis: 2
}
}
layer {
name: "cross_entropy_loss"
type: "SoftmaxWithLoss"
bottom: "predict"
bottom: "target_sentence"
top: "cross_entropy_loss"
loss_weight: 10
loss_param {
ignore_label: -1
}
softmax_param {
axis: 2
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "predict"
bottom: "target_sentence"
top: "accuracy"
include { phase: TEST }
accuracy_param {
axis: 2
ignore_label: -1
}
这是Solver.prototxt文件:
net: "./examples/coarse-to-fine/Imagenet/ResNet-BN/ResNet_train_lstm.prototxt"
test_iter: 500
test_interval: 1000
test_initialization: true
base_lr: 0.1
lr_policy: "step"
gamma: 0.5
stepsize: 60000
display: 200
max_iter: 260000
momentum: 0.9
weight_decay: 0.0000
snapshot: 10000
snapshot_prefix: "./examples/coarse-to-fine/Imagenet/ResNet/models/global_pool_lstm"
solver_mode: GPU
random_seed: 1701
average_loss: 100
clip_gradients: 10