Question

在浏览googlenet的原型文件时，我发现初始层在末尾有一个连接层，需要几个底部输入。

e.g：

layer {
  name: "inception_3a/output"
  type: "Concat"
  bottom: "inception_3a/1x1"
  bottom: "inception_3a/3x3"
  bottom: "inception_3a/5x5"
  bottom: "inception_3a/pool_proj"
  top: "inception_3a/output"
}

可以看出，有一个1x1转换层，一个3x3转换层，一个5x5转换层，最后是一个合并层。这些层描述如下：

layer {
  name: "inception_3a/1x1"
  type: "Convolution"
  bottom: "pool2/3x3_s2"
  top: "inception_3a/1x1"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 64
    kernel_size: 1
    weight_filler {
      type: "xavier"
      std: 0.03
    }
    bias_filler {
      type: "constant"
      value: 0.2
    }
  }
}
layer {
  name: "inception_3a/relu_1x1"
  type: "ReLU"
  bottom: "inception_3a/1x1"
  top: "inception_3a/1x1"
}
layer {
  name: "inception_3a/3x3_reduce"
  type: "Convolution"
  bottom: "pool2/3x3_s2"
  top: "inception_3a/3x3_reduce"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 96
    kernel_size: 1
    weight_filler {
      type: "xavier"
      std: 0.09
    }
    bias_filler {
      type: "constant"
      value: 0.2
    }
  }
}
layer {
  name: "inception_3a/relu_3x3_reduce"
  type: "ReLU"
  bottom: "inception_3a/3x3_reduce"
  top: "inception_3a/3x3_reduce"
}
layer {
  name: "inception_3a/3x3"
  type: "Convolution"
  bottom: "inception_3a/3x3_reduce"
  top: "inception_3a/3x3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 128
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "xavier"
      std: 0.03
    }
    bias_filler {
      type: "constant"
      value: 0.2
    }
  }
}
layer {
  name: "inception_3a/relu_3x3"
  type: "ReLU"
  bottom: "inception_3a/3x3"
  top: "inception_3a/3x3"
}
layer {
  name: "inception_3a/5x5_reduce"
  type: "Convolution"
  bottom: "pool2/3x3_s2"
  top: "inception_3a/5x5_reduce"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 16
    kernel_size: 1
    weight_filler {
      type: "xavier"
      std: 0.2
    }
    bias_filler {
      type: "constant"
      value: 0.2
    }
  }
}
layer {
  name: "inception_3a/relu_5x5_reduce"
  type: "ReLU"
  bottom: "inception_3a/5x5_reduce"
  top: "inception_3a/5x5_reduce"
}
layer {
  name: "inception_3a/5x5"
  type: "Convolution"
  bottom: "inception_3a/5x5_reduce"
  top: "inception_3a/5x5"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 32
    pad: 2
    kernel_size: 5
    weight_filler {
      type: "xavier"
      std: 0.03
    }
    bias_filler {
      type: "constant"
      value: 0.2
    }
  }
}
layer {
  name: "inception_3a/relu_5x5"
  type: "ReLU"
  bottom: "inception_3a/5x5"
  top: "inception_3a/5x5"
}
layer {
  name: "inception_3a/pool"
  type: "Pooling"
  bottom: "pool2/3x3_s2"
  top: "inception_3a/pool"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 1
    pad: 1
  }
}
layer {
  name: "inception_3a/pool_proj"
  type: "Convolution"
  bottom: "inception_3a/pool"
  top: "inception_3a/pool_proj"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 32
    kernel_size: 1
    weight_filler {
      type: "xavier"
      std: 0.1
    }
    bias_filler {
      type: "constant"
      value: 0.2
    }
  }
}

可以看出，它们具有不同的输出数量和不同的过滤器大小，无论如何，concat层上的文档如下：

输入：

n_i * c_i * h * w for each input blob i from 1 to K.

输出：

如果axis = 0：(n_1 + n_2 + ... + n_K) * c_1 * h * w，并且所有输入都是c_i   应该是一样的。

如果axis = 1：n_1 * (c_1 + c_2 + ... + c_K) * h * w，则所有输入n_i应该相同。

首先，我不确定默认值是什么，其次我不确定哪个Dimensions将具有输出音量，因为宽度和高度应该保持不变，但所有的WI conv层都会产生不同的输出。任何指针都会非常感激

Answer 1

'Concat'轴的默认值为1，因此通过通道维度连接。为此，所有连接的图层应具有相同的高度和宽度。查看日志，尺寸为（假设批次大小为32）：

inception_3a / 1x1 - ＆gt; [32,64,28,28]
inception_3a / 3x3 - ＆gt; [32,128,28,28]
inception_3a / 5x5 - ＆gt; [32,32,28,28]
inception_3a / pool_proj - ＆gt; [32,32,28,28]

因此最终输出将具有维度：
inception_3a / output - ＆gt; [32（64 + 128 + 32 + 32）28,28] - ＆gt; [32,256,28,28]

正如Caffe日志所预期的那样：

Creating Layer inception_3a/output
inception_3a/output <- inception_3a/1x1
inception_3a/output <- inception_3a/3x3
inception_3a/output <- inception_3a/5x5
inception_3a/output <- inception_3a/pool_proj
inception_3a/output -> inception_3a/output
Setting up inception_3a/output
Top shape: 32 256 28 28 (6422528)

在concat图层之后，googlenet中的输出维度是多少？

1 个答案: