使用给定数据集实现深度学习体系结构

时间:2016-11-01 10:15:55

标签: machine-learning neural-network deep-learning caffe protocol-buffers

我很喜欢咖啡和深度学习。我只是想实现深度学习架构。 Architecture

这是我想要实现的架构。架构和Parse27k数据集由亚琛工业大学视觉计算研究所的 计算机视觉小组创建和构建

下面你可以看到我需要改进的模型:

Train_val.prototxt

name: "Parse27"
layer {
  name: "data"
  type: "HDF5Data"
  top: "crops"
  top: "labels"
  include {
    phase: TRAIN
  }

  hdf5_data_param {
    source: "/home/nail/caffe/caffe/examples/hdf5_classification/data/train.txt"
    batch_size: 256
  }
}
layer {
  name: "data"
  type: "HDF5Data"
  top: "crops"
  top: "labels"
  include {
    phase: TEST
  }
  hdf5_data_param {
    source: "/home/nail/caffe/caffe/examples/hdf5_classification/data/test.txt"
    batch_size: 256
  }
}
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "crops"
  top: "conv1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 96
    kernel_size: 11
    stride: 4
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "conv1"
  top: "conv1"
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "norm1"
  type: "LRN"
  bottom: "pool1"
  top: "norm1"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "norm1"
  top: "conv2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 2
    kernel_size: 5
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layer {
  name: "relu2"
  type: "ReLU"
  bottom: "conv2"
  top: "conv2"
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "norm2"
  type: "LRN"
  bottom: "pool2"
  top: "norm2"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layer {
  name: "conv3"
  type: "Convolution"
  bottom: "norm2"
  top: "conv3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu3"
  type: "ReLU"
  bottom: "conv3"
  top: "conv3"
}
layer {
  name: "conv4"
  type: "Convolution"
  bottom: "conv3"
  top: "conv4"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layer {
  name: "relu4"
  type: "ReLU"
  bottom: "conv4"
  top: "conv4"
}
layer {
  name: "conv5"
  type: "Convolution"
  bottom: "conv4"
  top: "conv5"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    group: 2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layer {
  name: "relu5"
  type: "ReLU"
  bottom: "conv5"
  top: "conv5"
}
layer {
  name: "pool5"
  type: "Pooling"
  bottom: "conv5"
  top: "pool5"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "fc6"
  type: "InnerProduct"
  bottom: "pool5"
  top: "fc6"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layer {
  name: "relu6"
  type: "ReLU"
  bottom: "fc6"
  top: "fc6"
}
layer {
  name: "drop6"
  type: "Dropout"
  bottom: "fc6"
  top: "fc6"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layer {
  name: "fc7"
  type: "InnerProduct"
  bottom: "fc6"
  top: "fc7"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 1
    }
  }
}
layer {
  name: "relu7"
  type: "ReLU"
  bottom: "fc7"
  top: "fc7"
}
layer {
  name: "drop7"
  type: "Dropout"
  bottom: "fc7"
  top: "fc7"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layer {
  name: "fc8"
  type: "InnerProduct"
  bottom: "fc7"
  top: "fc8"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 1000
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "accuracy"
  type: "Accuracy"
  bottom: "fc8"
  bottom: "labels"
  top: "accuracy"
  include {
    phase: TEST
  }
}
layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "fc8"
  bottom: "labels"  
  top: "loss"
}

Solver.prototxt

net: "models/Parse27/train_val.prototxt"
test_iter: 1000
test_interval: 1000
base_lr: 0.01
lr_policy: "step"
gamma: 0.1
stepsize: 100000
display: 20
max_iter: 450000
momentum: 0.9
weight_decay: 0.0005
snapshot: 10000
snapshot_prefix: "models/Parse27/Parse27_train"
solver_mode: GPU

我在实现这种架构方面有两个主要的困难。

  1. 如上所示,我的模型不包含自定义丢失图层。我的模型几乎是caffeNet架构。但我应该用自定义损失图层(绿色框)替换红色框内的最后一层。

  2. 我的火车数据集有以下结构。

  3. crops       Dataset {27482, 3, 128, 192}
    labels      Dataset {27482, 12}
    mean        Dataset {3, 128, 192}
    pids        Dataset {27482}
    

    如此处所示,作物和标签中的行数(示例)相同27482.但我的标签数据集中有12列。当只有一个标签时我的模型就可以工作了。我如何能够为所有标签进行培训?

    我在Train_val.prototxt中的模型现在看起来像这样:

    enter image description here

    任何形式的帮助或建议都将受到高度赞赏。

1 个答案:

答案 0 :(得分:2)

如果我理解正确,您正在尝试为每个输入示例预测12个离散标签(属性)。在这种情况下,您应该"Slice"标签:

layer {
  type: "Slice"
  name: "slice_labels"
  bottom: "label"
  top: "attr_00"
  top: "attr_01"
  top: "attr_02"
  top: "attr_03"
  top: "attr_04"
  top: "attr_05"
  top: "attr_06"
  top: "attr_07"
  top: "attr_08"
  top: "attr_09"
  top: "attr_10"
  top: "attr_11"
  slice_param {
    axis: -1 # slice the last dimension
    slice_point: 1
    slice_point: 2
    slice_point: 3
    slice_point: 4
    slice_point: 5
    slice_point: 6
    slice_point: 7
    slice_point: 8
    slice_point: 9
    slice_point: 10
    slice_point: 11
  }
}

现在,每个属性都有一个“标量”标签。我相信你可以从这里拿走它。