卷积自动编码器错误-'RuntimeError:输入类型(torch.cuda.ByteTensor)和权重类型(torch.FloatTensor)应该相同'

时间:2019-03-12 11:50:39

标签: deep-learning conv-neural-network pytorch autoencoder

对于以下模型,我收到错误消息“预期的步幅为单个值整数或列表”。我使用了https://discuss.pytorch.org/t/expected-stride-to-be-a-single-integer-value-or-a-list/17612/2的建议答案  并添加了

img.unsqueeze_(0)

我现在收到错误消息:

RuntimeError: Input type (torch.cuda.ByteTensor) and weight type (torch.FloatTensor) should be the same

对于下面的代码,我给出了三个示例图像,并尝试使用自动编码器来学习它们的表示形式:

%reset -f

import torch.utils.data as data_utils
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import metrics
import datetime
from sklearn.preprocessing import MultiLabelBinarizer
import seaborn as sns
sns.set_style("darkgrid")
from ast import literal_eval
import numpy as np
from sklearn.preprocessing import scale
import seaborn as sns
sns.set_style("darkgrid")
import torch
import torch
import torchvision
import torch.nn as nn
from torch.autograd import Variable
from os import listdir
import cv2
import torch.nn.functional as F
import numpy as np
from numpy.polynomial.polynomial import polyfit
import matplotlib.pyplot as plt


number_channels = 3

%matplotlib inline

x = np.arange(10)
m = 1
b = 2
y = x * x
plt.plot(x, y)
plt.axis('off')
plt.savefig('1-increasing.jpg')

x = np.arange(10)
m = 0.01
b = 2
y = x * x * x
plt.plot(x, y)
plt.axis('off')
plt.savefig('2-increasing.jpg')

x = np.arange(10)
m = 0
b = 2
y = (m*x)+b
plt.plot(x, y)
plt.axis('off')
plt.savefig('constant.jpg')

batch_size_value = 2

train_image = []

train_image.append(cv2.imread('1-increasing.jpg', cv2.IMREAD_UNCHANGED).reshape(3, 288, 432))
train_image.append(cv2.imread('2-increasing.jpg', cv2.IMREAD_UNCHANGED).reshape(3, 288, 432))
train_image.append(cv2.imread('decreasing.jpg', cv2.IMREAD_UNCHANGED).reshape(3, 288, 432))
train_image.append(cv2.imread('constant.jpg', cv2.IMREAD_UNCHANGED).reshape(3, 288, 432))

data_loader = data_utils.DataLoader(train_image, batch_size=batch_size_value, shuffle=False,drop_last=True)

import torch
import torchvision
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.utils import save_image
from torchvision.datasets import MNIST
import os

if not os.path.exists('./dc_img'):
    os.mkdir('./dc_img')


def to_img(x):
    x = 0.5 * (x + 1)
    x = x.clamp(0, 1)
    x = x.view(x.size(0), 1, 28, 28)
    return x


num_epochs = 100
# batch_size = 128
batch_size = 2

learning_rate = 1e-3
dataloader = data_loader

class autoencoder(nn.Module):
    def __init__(self):
        super(autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 16, 3, stride=3, padding=1),  # b, 16, 10, 10
            nn.ReLU(True),
            nn.MaxPool2d(2, stride=2),  # b, 16, 5, 5
            nn.Conv2d(16, 8, 3, stride=2, padding=1),  # b, 8, 3, 3
            nn.ReLU(True),
            nn.MaxPool3d(3, stride=1)  # b, 8, 2, 2
        )
        self.decoder = nn.Sequential(
            nn.ConvTranspose3d(8, 16, 3, stride=2),  # b, 16, 5, 5
            nn.ReLU(True),
            nn.ConvTranspose3d(16, 8, 5, stride=3, padding=1),  # b, 8, 15, 15
            nn.ReLU(True),
            nn.ConvTranspose3d(8, 1, 2, stride=2, padding=1),  # b, 1, 28, 28
            nn.Tanh()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


model = autoencoder()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate,
                             weight_decay=1e-5)

for epoch in range(num_epochs):
    for data in dataloader:
        img, _ = data
        img.unsqueeze_(0)
#         img.unsqueeze_(0)
#         print(img)
#         img.unsqueeze_(0)
        img = Variable(img).cuda()
        # ===================forward=====================
        output = model(img)
        loss = criterion(output, img)
        # ===================backward====================
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # ===================log=================to_img=======
    print('epoch [{}/{}], loss:{:.4f}'
          .format(epoch+1, num_epochs, loss.data[0]))
    if epoch % 10 == 0:
        pic = to_img(output.cpu().data)
        save_image(pic, './dc_img/image_{}.png'.format(epoch))

torch.save(model.state_dict(), './conv_autoencoder.pth')

但是如前所述,这会导致错误:

299     def forward(self, input):
300         return F.conv2d(input, self.weight, self.bias, self.stride,
     

-> 301 self.padding,self.dilation,self.groups)       302       303

     

RuntimeError:输入类型(torch.cuda.ByteTensor)和权重类型   (torch.FloatTensor)应该相同

问题似乎与img.unsqueeze_(0)有关?

如何在这些图像上训练自动编码器?

1 个答案:

答案 0 :(得分:0)

0

如果您希望使用cuda训练模型。确保您的模型和模型输入都转移到GPU。仅移动模型或仅输入会引起这种错误。