我使用完全卷积的神经网络进行图像像素分类。
输入:3080张大小为512x512的图像,每张图像的输出:8x512x512(我有8个类) 我通过将图像裁剪成16个大小为512x512的图像来获取数据集。然后我按批次16进行训练(我可以立即给出整个图像)。以下是我正在使用的神经网络:
--nn graph
x1=nn.Identity()()
P1=nn.SpatialReflectionPadding(1,1,1,1)(x1) --reflection padding
C1=nn.SpatialConvolution(1, 64, 3, 3)(P1)
P2=nn.SpatialReflectionPadding(1,1,1,1)(C1) --reflection padding
C2=nn.SpatialConvolution(64, 64, 3, 3)(P2)
M1=nn.SpatialMaxPooling(2,2,2,2)(C2)
P3=nn.SpatialReflectionPadding(1,1,1,1)(M1) --reflection padding
C3=nn.SpatialConvolution(64, 128, 3, 3)(P3)
P4=nn.SpatialReflectionPadding(1,1,1,1)(C3) --reflection padding
C4=nn.SpatialConvolution(128,128, 3, 3)(P4)
M2=nn.SpatialMaxPooling(2,2,2,2)(C4)
P5=nn.SpatialReflectionPadding(1,1,1,1)(M2) --reflection padding
C5=nn.SpatialConvolution(128, 256, 3, 3)(P5)
P6=nn.SpatialReflectionPadding(1,1,1,1)(C5) --reflection padding
C6=nn.SpatialConvolution(256, 256, 3, 3)(P6)
M3=nn.SpatialMaxPooling(2,2,2,2)(C6)
P7=nn.SpatialReflectionPadding(1,1,1,1)(M3) --reflection padding
C7=nn.SpatialConvolution(256, 512, 3, 3)(P7)
P8=nn.SpatialReflectionPadding(1,1,1,1)(C7) --reflection padding
C8=nn.SpatialConvolution(512, 512, 3, 3)(P8) --512x32x32
M4=nn.SpatialMaxPooling(2,2,2,2)(C8)
P9=nn.SpatialReflectionPadding(1,1,1,1)(M4) --reflection padding
C9=nn.SpatialConvolution(512, 1024, 3, 3)(P9)
P10=nn.SpatialReflectionPadding(1,1,1,1)(C9) --reflection padding
C10=nn.SpatialConvolution(1024, 1024, 3, 3)(P10)
U1=nn.SpatialFullConvolution(1024, 512, 2, 2, 2, 2)(C10)-- up convolve --512x32x32
P11=nn.SpatialReflectionPadding(1,1,1,1)(nn.JoinTable(2)({C8,U1}))--concatenation + reflection padding
C11=nn.SpatialConvolution(1024, 512, 3, 3)(P11)
P12=nn.SpatialReflectionPadding(1,1,1,1)(C11) --reflection padding
C12=nn.SpatialConvolution(512, 512, 3, 3)(P12)
U2=nn.SpatialFullConvolution(512, 256, 2, 2, 2, 2)(C12) -- up convolve
P13=nn.SpatialReflectionPadding(1,1,1,1)(nn.JoinTable(2)({C6,U2})) --concatenation + reflection padding
C13=nn.SpatialConvolution(512, 256, 3, 3)(P13)
P14=nn.SpatialReflectionPadding(1,1,1,1)(C13) --reflection padding
C14=nn.SpatialConvolution(256, 256, 3, 3)(P14)
U3=nn.SpatialFullConvolution(256, 128, 2, 2, 2 ,2)(C14) --upconvolve
P15=nn.SpatialReflectionPadding(1,1,1,1)(nn.JoinTable(2)({C4,U3})) --concatenation + reflection padding
C15=nn.SpatialConvolution(256, 128, 3, 3)(P15)
P16=nn.SpatialReflectionPadding(1,1,1,1)(C15) --reflection padding
C16=nn.SpatialConvolution(128, 128, 3, 3)(P16)
U4=nn.SpatialFullConvolution(128, 64, 2, 2, 2, 2)(C16) -- up convolve
P17=nn.SpatialReflectionPadding(1,1,1,1)(nn.JoinTable(2)({C2,U4})) --concatenation + reflection padding
C17=nn.SpatialConvolution(128, 64, 3, 3)(P17)
P18=nn.SpatialReflectionPadding(1,1,1,1)(C17) --reflection padding
C18=nn.SpatialConvolution(64, 64, 3, 3)(P18)
C19=nn.SpatialConvolution(64, 8, 1, 1)(C18) --output size
--Training
print("Starting training and validation")
unet=nn.gModule({x1},{C19})
cudnn.convert(unet, cudnn, function(m) return torch.type(m):find('MaxPooling') end)
unet:cuda()
criterion=nn.MultiLabelSoftMarginCriterion():cuda()
培训和验证按以下方式进行:
learningRate_init=0.01
params, gradParams = unet:getParameters()
optimState = {learningRate=learningRate_init}
epochs=100
error1=torch.CudaTensor(epochs):zero()
batch=16
for epoch=1,epochs do
print("epoch=",epoch)
-- if epoch - math.floor(epoch/5)*5==0 then --dynamic learning rate
-- optimState={learningRate=0.5*learningRate_init}
-- learningRate_init=0.5*learningRate_init
-- end
--training
for i=1,((k-1)/batch) do
print("image_num=",i)
collectgarbage()
input1=table_of_tensors_to_batch(dataset,i,batch,1)
output1=table_of_tensors_to_batch(dataset,i,batch,2)
input1=input1:cuda()
output1=output1:cuda()
local function feval(params)
gradParams:zero()
prediction1 = unet:forward(input1)
prediction1:cuda()
local loss = criterion:forward(prediction1:view(input1:size(1)*input1:size(4)*input1:size(3),8), output1:view(input1:size(1)*input1:size(4)*input1:size(3),8))
error1[epoch]=loss+error1[epoch]
local dloss_doutput = criterion:backward(prediction1:view(input1:size(1)*input1:size(4)*input1:size(3),8), output1:view(input1:size(1)*input1:size(4)*input1:size(3),8))
unet:backward(input1, dloss_doutput:view(input1:size(1),8,input1:size(3),input1:size(4)))
collectgarbage()
return loss,gradParams
end
optim.sgd(feval, params, optimState)
end
for i_v=1,((k_v-1)/batch) do
print("validation_num=",i_v)
collectgarbage()
input1_v=table_of_tensors_to_batch(dataset_v,i_v,batch,1)
output1_v=table_of_tensors_to_batch(dataset_v,i_v,batch,2)
input1_v=input1_v:cuda()
output1_v=output1_v:cuda()
prediction1_v = unet:forward(input1_v)
prediction1_v:cuda()
loss_v = criterion:forward(prediction1_v:view(input1_v:size(1)*input1_v:size(4)*input1_v:size(3),8), output1_v:view(input1_v:size(1)*input1_v:size(4)*input1_v:size(3),8))
error_v[epoch]=loss_v+error_v[epoch]
collectgarbage()
matrix_v[epoch]=update_confusion_matrix(matrix_v[epoch],output1_v,prediction1_v,8,batch)
end
end
我想知道这里的哪一步可能是缓慢训练的原因。我能够在2000ms内训练一批16张图像(每张图像大小为128x128)。