Question

我想创建一个具有完全卷积网络的自动编码器，以重新创建50,50张图像，但是错误不会减少。请注意，我还没有庞大的数据集，因此我没有试图让它正确学习，但是即使仅对一张图像进行过拟合也不起作用。

因此，我正在使用MSE损失和SGD优化将输入/输出图像标准化为[0，1]。改变学习速度根本不会改变任何东西。我可能会想念一些东西...但这是代码：

class Model(nn.Module):

    def __init__(self):
        super(Model, self).__init__()

        """ fully convolutional network autoencoder architecture:

            - "decode" reverses all convolutional and maxpool instructions of "decode"
            - input image-matrix (3,50,50) is also output input-matrix (3,50,50)

        """

        # model structur:

        """ encode (50,50,3) image to (30,3,3) features """
        self.conv1 = nn.Conv2d(3, 10, kernel_size=(3,3), stride=(1,1))
        self.conv2 = nn.Conv2d(10, 15, kernel_size=(3,3), stride=(1,1))
        self.maxpool1 = nn.MaxPool2d(2,2)
        self.conv3 = nn.Conv2d(15, 20, kernel_size=(3,3), stride=(1,1))
        self.conv4 = nn.Conv2d(20, 25, kernel_size=(4,4), stride=(1,1))
        self.maxpool2 = nn.MaxPool2d(2,2)
        self.conv5 = nn.Conv2d(25, 30, kernel_size=(4,4), stride=(1,1))
        self.maxpool3 = nn.MaxPool2d(2,2)

        """ (30,3,3) features to (50,50,3) image """
        self.upsample1 = nn.Upsample(scale_factor=2, mode='nearest')
        self.deconv1 = nn.ConvTranspose2d(30, 25, kernel_size=(3,3), stride=(1,1), padding=1, dilation=2, output_padding=1)
        self.upsample2 = nn.Upsample(scale_factor=2, mode='nearest')
        self.deconv2 = nn.ConvTranspose2d(25, 20, kernel_size=(3,3), stride=(1,1), padding=1, dilation=2, output_padding=1)
        self.deconv3 = nn.ConvTranspose2d(20, 15, kernel_size=(5,5), stride=(1,1), padding=1, dilation=1, output_padding=0)
        self.upsample3 = nn.Upsample(scale_factor=2, mode='nearest')
        self.deconv4 = nn.ConvTranspose2d(15, 10, kernel_size=(3,3), stride=(1,1), padding=0, dilation=1, output_padding=0)
        self.deconv5 = nn.ConvTranspose2d(10, 3, kernel_size=(3,3), stride=(1,1), padding=0, dilation=1, output_padding=0)


    def forward(self, x):

        #-------------encode------------------
        x = F.relu(self.conv1(x))   # 10, 48
        x = F.relu(self.conv2(x))   # 15, 46
        x = m1 = self.maxpool1(x)   # 15, 23

        x = F.relu(self.conv3(x))   # 20, 21
        x = F.relu(self.conv4(x))   # 25, 18
        x = m2 = self.maxpool2(x)   # 25, 9

        x = F.relu(self.conv5(x))   # 30, 6, 6
        x = self.maxpool3(x)        # 30, 3, 3

        #-------------decode------------------
        x = self.upsample1(x)       # 6, 6
        x = F.relu(self.deconv1(x)) # 9, 9

        x = self.upsample2(x)       # 18, 18
        x = F.relu(self.deconv2(x)) # 21, 21
        x = F.relu(self.deconv3(x)) # 23, 23

        x = self.upsample3(x)       # 46, 46
        x = F.relu(self.deconv4(x)) # 48, 48
        x = F.relu(self.deconv5(x)) # 50, 50

        return x

def train(train_set, validation_set, plot_training=True):

    model = Model().cuda()

    criterion = nn.MSELoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)

    # gather data to plot loss and accuracy curve afterwards
    loss_data = []

    for epoch in range(EPOCHS):

        epoch_loss = []
        for _, data in enumerate(train_set, 0):

            # load batch
            # ignore labels
            samples, targets = data

            # set data as cuda varibale
            # use sample as label
            samples = Variable(samples.float().cuda())
            targets = Variable(targets.float().cuda())

            # initialize optimizer
            optimizer.zero_grad()

            # predict
            predictions = model.train()(samples)
            #->|

            loss = criterion(predictions, targets)

            loss.backward()
            optimizer.step()

            epoch_loss.append(loss.item())

        if epoch == 5000:
            plt.imshow((samples[0]*255).cpu().detach().numpy().reshape(50,50,3).astype(np.uint8))
            plt.show()
            plt.imshow((predictions[0]*255).cpu().detach().numpy().reshape(50,50,3).astype(np.uint8))
            plt.show()

        # info
        current_loss = np.mean(epoch_loss)
        # current_accuracy = accuracy(model, validation_set)

        print("loss after epoch [", (epoch + 1) , "|", EPOCHS, "] :", current_loss.round(5))

        loss_data.append(current_loss)

除了我仅在一张图像上训练网络时，损失至少在变化并且在减少，我就不同了。但是，正如我所说，它保持不变：

loss after epoch [ 1 | 10000 ] : 0.35558
loss after epoch [ 2 | 10000 ] : 0.35558
loss after epoch [ 3 | 10000 ] : 0.35558
...
loss after epoch [ 467 | 10000 ] : 0.35558
loss after epoch [ 468 | 10000 ] : 0.35558
loss after epoch [ 469 | 10000 ] : 0.35558

感谢您的帮助！

我的CNN自动编码器的损失没有减少

0 个答案: