了解火炬并在张量流上实现

时间:2018-09-18 03:48:39

标签: tensorflow torch

我正在实现LBCNN的张量流版本,其代码在这里:https://github.com/juefeix/lbcnn.torch。这里的问题是,当我尝试在张量流中重写时,代价函数会保持较高且波动。即使我调试了所有内容,这也让我感到痛苦2星期,但我仍然不知道我在哪里错了。

火炬代码:

-- resnet.lua

local function createModel(opt)
   local function basicblock(nChIn,nChOut,sz)
     local s = nn.Sequential()
     local shareConv = Convolution(nChIn,nChOut,sz,sz,1,
                       1,(sz-1)/2,(sz-1)/2)
     s:add(SBatchNorm(nChIn))      
     s:add(shareConv)
     s:add(ReLU())
     s:add(Convolution(nChOut,nChIn,1,1))
     local identity = nn.Identity()
     local output= nn.Sequential(): add(nn.ConcatTable() 
         :add(s):add(identity)):add(nn.CAddTable(true))
     return output
   end

   local sz = opt.convSize
   local nInputPlane = opt.nInputPlane
   local nChIn = opt.numChannels
   local nChOut = opt.numWeights

   -- define model to train
   model = nn.Sequential()
   model:add(Convolution(nInputPlane,nChIn,sz,sz,1,1,1,1))
   model:add(SBatchNorm(nChIn))
   model:add(ReLU(true))

   for stages = 1,opt.depth do
      model:add(basicblock(nChIn,nChOut,sz))
    end
   model:add(Avg(5,5,5,5))

   -- stage 3 : standard 2-layer neural network
   model:add(nn.Reshape(nChIn*opt.view))
   model:add(nn.Dropout(0.5))
   model:add(nn.Linear(nChIn*opt.view, 
   math.max(opt.nClasses,opt.full)))
   model:add(cudnn.ReLU())
   model:add(nn.Dropout(0.5))

   model: add(nn.Linear (math.max(opt.full,opt.nClasses), opt.nClasses))
   model:cuda()

   return model
end

return createModel

Tensorflow代码:

def cnn(prev_input, lbc_size, lbc_channels, output_channels):

   shortcut = tf.identity(prev_input)
   B = tf.contrib.layers.batch_norm(prev_input)

   Z = tf.contrib.layers.conv2d(inputs = B, num_outputs = lbc_channels,
   kernel_size = 3, stride = 1, padding = "SAME", activation_fn = None)

   A1 = tf.nn.relu(Z)

   A2 = tf.contrib.layers.conv2d(inputs = A1, 
        num_outputs=output_channels, 
        kernel_size = 1, stride = 1, 
        padding = "SAME", activation_fn = None)

   A3 = tf.add(A2, shortcut)

   return A3

def model(X, Keep_probability):

  with tf.name_scope("Pre-Conv"):
    X1 = tf.contrib.layers.conv2d(inputs = X, num_outputs = 
          output_channels,kernel_size = lbc_size, stride = 1, 
          padding = "SAME", activation_fn = None)
    X2 = tf.contrib.layers.batch_norm(X1)
    X3 = tf.nn.relu(X2)

  X_in = X3

  for i in range(conv_layers):
    with tf.name_scope("conv"):
       X_out,BB,ZZ,AA,AAA = cnn(X_in, lbc_size, lbc_channels, 
         out_channels)
       X_in = X_out

  with tf.name_scope("AvgPool"):
    Z = tf.nn.avg_pool(value = X_in, ksize = [1, 5, 5, 1], 
       strides = [1, 5, 5, 1], padding = "VALID")

  with tf.name_scope("Flatter"):
    P = tf.contrib.layers.flatten(Z)

  with tf.name_scope("Dropout"):
    F1 = tf.nn.dropout(x = P, keep_prob = 0.5)

  with tf.name_scope("Fully"):
    F2 = tf.contrib.layers.fully_connected(inputs = F1, 
       num_outputs = fc_hidden_units, activation_fn = tf.nn.relu)

  with tf.name_scope("Dropout"):
    F3 = tf.nn.dropout(x = F2, keep_prob = 0.5)

  with tf.name_scope("Fully"):
    F4 = tf.contrib.layers.fully_connected(inputs = F3, 
      num_outputs = output_classes, activation_fn = None)

  return F4

假设所有参数,我都正确传递了。我只想问两个架构是否相同?还有一件事是当我使用AdamOptimizer时,他们的代码使用带有动量和weight_decay的SGD,这会有所作为吗?非常感谢。

1 个答案:

答案 0 :(得分:0)

我没有查看整个代码来检查是否确实相同,但是当您将优化器从动量更改为亚当时,示例在收敛方面通常会有所不同。您还需要重新调整超参数以获得良好的性能。