我想创建一个具有完全卷积网络的自动编码器,以重新创建50,50张图像,但是错误不会减少。请注意,我还没有庞大的数据集,因此我没有试图让它正确学习,但是即使仅对一张图像进行过拟合也不起作用。
因此,我正在使用MSE损失和SGD优化将输入/输出图像标准化为[0,1]。改变学习速度根本不会改变任何东西。我可能会想念一些东西...但这是代码:
class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
""" fully convolutional network autoencoder architecture:
- "decode" reverses all convolutional and maxpool instructions of "decode"
- input image-matrix (3,50,50) is also output input-matrix (3,50,50)
"""
# model structur:
""" encode (50,50,3) image to (30,3,3) features """
self.conv1 = nn.Conv2d(3, 10, kernel_size=(3,3), stride=(1,1))
self.conv2 = nn.Conv2d(10, 15, kernel_size=(3,3), stride=(1,1))
self.maxpool1 = nn.MaxPool2d(2,2)
self.conv3 = nn.Conv2d(15, 20, kernel_size=(3,3), stride=(1,1))
self.conv4 = nn.Conv2d(20, 25, kernel_size=(4,4), stride=(1,1))
self.maxpool2 = nn.MaxPool2d(2,2)
self.conv5 = nn.Conv2d(25, 30, kernel_size=(4,4), stride=(1,1))
self.maxpool3 = nn.MaxPool2d(2,2)
""" (30,3,3) features to (50,50,3) image """
self.upsample1 = nn.Upsample(scale_factor=2, mode='nearest')
self.deconv1 = nn.ConvTranspose2d(30, 25, kernel_size=(3,3), stride=(1,1), padding=1, dilation=2, output_padding=1)
self.upsample2 = nn.Upsample(scale_factor=2, mode='nearest')
self.deconv2 = nn.ConvTranspose2d(25, 20, kernel_size=(3,3), stride=(1,1), padding=1, dilation=2, output_padding=1)
self.deconv3 = nn.ConvTranspose2d(20, 15, kernel_size=(5,5), stride=(1,1), padding=1, dilation=1, output_padding=0)
self.upsample3 = nn.Upsample(scale_factor=2, mode='nearest')
self.deconv4 = nn.ConvTranspose2d(15, 10, kernel_size=(3,3), stride=(1,1), padding=0, dilation=1, output_padding=0)
self.deconv5 = nn.ConvTranspose2d(10, 3, kernel_size=(3,3), stride=(1,1), padding=0, dilation=1, output_padding=0)
def forward(self, x):
#-------------encode------------------
x = F.relu(self.conv1(x)) # 10, 48
x = F.relu(self.conv2(x)) # 15, 46
x = m1 = self.maxpool1(x) # 15, 23
x = F.relu(self.conv3(x)) # 20, 21
x = F.relu(self.conv4(x)) # 25, 18
x = m2 = self.maxpool2(x) # 25, 9
x = F.relu(self.conv5(x)) # 30, 6, 6
x = self.maxpool3(x) # 30, 3, 3
#-------------decode------------------
x = self.upsample1(x) # 6, 6
x = F.relu(self.deconv1(x)) # 9, 9
x = self.upsample2(x) # 18, 18
x = F.relu(self.deconv2(x)) # 21, 21
x = F.relu(self.deconv3(x)) # 23, 23
x = self.upsample3(x) # 46, 46
x = F.relu(self.deconv4(x)) # 48, 48
x = F.relu(self.deconv5(x)) # 50, 50
return x
def train(train_set, validation_set, plot_training=True):
model = Model().cuda()
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)
# gather data to plot loss and accuracy curve afterwards
loss_data = []
for epoch in range(EPOCHS):
epoch_loss = []
for _, data in enumerate(train_set, 0):
# load batch
# ignore labels
samples, targets = data
# set data as cuda varibale
# use sample as label
samples = Variable(samples.float().cuda())
targets = Variable(targets.float().cuda())
# initialize optimizer
optimizer.zero_grad()
# predict
predictions = model.train()(samples)
#->|
loss = criterion(predictions, targets)
loss.backward()
optimizer.step()
epoch_loss.append(loss.item())
if epoch == 5000:
plt.imshow((samples[0]*255).cpu().detach().numpy().reshape(50,50,3).astype(np.uint8))
plt.show()
plt.imshow((predictions[0]*255).cpu().detach().numpy().reshape(50,50,3).astype(np.uint8))
plt.show()
# info
current_loss = np.mean(epoch_loss)
# current_accuracy = accuracy(model, validation_set)
print("loss after epoch [", (epoch + 1) , "|", EPOCHS, "] :", current_loss.round(5))
loss_data.append(current_loss)
除了我仅在一张图像上训练网络时,损失至少在变化并且在减少,我就不同了。但是,正如我所说,它保持不变:
loss after epoch [ 1 | 10000 ] : 0.35558
loss after epoch [ 2 | 10000 ] : 0.35558
loss after epoch [ 3 | 10000 ] : 0.35558
...
loss after epoch [ 467 | 10000 ] : 0.35558
loss after epoch [ 468 | 10000 ] : 0.35558
loss after epoch [ 469 | 10000 ] : 0.35558
感谢您的帮助!