Question

我正在从事一些有关图像标记的项目，并且对caffe还是陌生的。我从我读过的有关图像标记的论文之一中获得了Caffe代码，用于该数据的数据集是CIFAR100。根据代码和层的结构，我必须将LMDB格式的图像特征和HDF5格式的标签分别输入代码中。我将功能转换为lmdb格式，并将标签转换为hdf5格式，但是我无法运行代码。我认为应该先对标签进行一些预处理，然后再将标签送入网络，但我不知道该怎么做！我给论文的作者写了一封电子邮件，但不幸的是他们还没有回复。

正如论文作者告诉我的那样，数据集应采用分层结构，因此我尝试使用具有两级分层结构的CIFAR100数据集：每个图像都必须具有标签，粗略标签和精细标签标签。我将代码放在这里，因此，如果有人可以使用CIFAR100数据集运行它，请教我如何操作。

这是proto.txt文件：

name: "res_to_lstm"

layer {
  name: "image_feature"
  type: "Data"
  top: "global_pool"
  include { phase: TRAIN }
  data_param {
    source: "./examples/coarse-to-fine/Imagenet/ResNet/feature/global_pool_train"
    batch_size: 100
    backend: LMDB
  }
}

layer {
  name: "data"
  type: "HDF5Data"
  top: "cont_sentence"
  top: "input_sentence"
  top: "target_sentence"
  include { phase: TRAIN }
  hdf5_data_param {
    source: "/home/destiny/Datasets/Imagenet/train_h5_caffe/hdf5_chunk_list_shuffle.txt"
    batch_size: 10
  }
}

layer {
  name: "image_feature"
  type: "Data"
  top: "global_pool"
  include { phase: TEST }
  data_param {
    source: "./examples/coarse-to-fine/Imagenet/ResNet/feature/global_pool_val"
    batch_size: 100
    backend: LMDB
  }
}

layer {
  name: "data"
  type: "HDF5Data"
  top: "cont_sentence"
  top: "input_sentence"
  top: "target_sentence"
  include { phase: TEST }
  hdf5_data_param {
    source: "/home/destiny/Datasets/Imagenet/val_h5_caffe/hdf5_chunk_list.txt"
    batch_size: 10
  }
}

layer {
  name: "embedding"
  type: "Embed"
  bottom: "input_sentence"
  top: "embedded_input_sentence"
  param {
    lr_mult: 1
  }
  embed_param {
    bias_term: false
    input_dim: 1861
    num_output: 1000
    weight_filler {
      type: "uniform"
      min: -0.08
      max: 0.08
    }
  }
}
layer {
  name: "lstm1"
  type: "LSTM"
  bottom: "embedded_input_sentence"
  bottom: "cont_sentence"
  bottom: "global_pool"
  top: "lstm1"
  recurrent_param {
    num_output: 1000
    weight_filler {
      type: "uniform"
      min: -0.08
      max: 0.08
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}

layer {
  name: "predict"
  type: "InnerProduct"
  bottom: "lstm1"
  top: "predict"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 1861
    weight_filler {
      type: "uniform"
      min: -0.08
      max: 0.08
    }
    bias_filler {
      type: "constant"
      value: 0
    }
    axis: 2
  }
}

layer {
  name: "cross_entropy_loss"
  type: "SoftmaxWithLoss"
  bottom: "predict"
  bottom: "target_sentence"
  top: "cross_entropy_loss"
  loss_weight: 10
  loss_param {
    ignore_label: -1
  }
  softmax_param {
    axis: 2
  }
}
layer {
  name: "accuracy"
  type: "Accuracy"
  bottom: "predict"
  bottom: "target_sentence"
  top: "accuracy"
  include { phase: TEST }
  accuracy_param {
    axis: 2
    ignore_label: -1
  }

}

这是Solver.prototxt文件：

net: "./examples/coarse-to-fine/Imagenet/ResNet-BN/ResNet_train_lstm.prototxt"
test_iter: 500
test_interval: 1000
test_initialization: true
base_lr: 0.1
lr_policy: "step"
gamma: 0.5
stepsize: 60000
display: 200
max_iter: 260000
momentum: 0.9
weight_decay: 0.0000
snapshot: 10000
snapshot_prefix: "./examples/coarse-to-fine/Imagenet/ResNet/models/global_pool_lstm"
solver_mode: GPU
random_seed: 1701
average_loss: 100
clip_gradients: 10

无法将CIFAR100喂给我拥有的Caffe代码

}

0 个答案: