Question

我有图像的数据集，看起来像这样：

array([[[[0.35980392, 0.26078431, 0.14313725],
     [0.38137255, 0.26470588, 0.15196078],
     [0.51960784, 0.3745098 , 0.26176471],
     ...,
     [0.34313725, 0.22352941, 0.15      ],
     [0.30784314, 0.2254902 , 0.15686275],
     [0.28823529, 0.22843137, 0.16862745]],

    [[0.38627451, 0.28235294, 0.16764706],
     [0.45098039, 0.32843137, 0.21666667],
     [0.62254902, 0.47254902, 0.36470588],
     ...,
     [0.34607843, 0.22745098, 0.15490196],
     [0.30686275, 0.2245098 , 0.15588235],
     [0.27843137, 0.21960784, 0.16176471]],

    [[0.41568627, 0.30098039, 0.18431373],
     [0.51862745, 0.38529412, 0.27352941],
     [0.67745098, 0.52058824, 0.40980392],
     ...,
     [0.34901961, 0.22941176, 0.15588235],
     [0.29901961, 0.21666667, 0.14901961],
     [0.26078431, 0.20098039, 0.14313725]],

    ...,

这是我如何下载：

data, attrs = fetch_dataset()

这是如何fetch_dataset()功能作品：

import numpy as np
import os
from skimage.transform import resize 
import skimage.io
import pandas as pd

def fetch_dataset(attrs_name = "lfw_attributes.txt",
                  images_name = "lfw-deepfunneled",
                  dx=80,dy=80,
                  dimx=45,dimy=45
):

#download if not exists
if not os.path.exists(images_name):
    print("images not found, donwloading...")
    os.system("wget http://vis-www.cs.umass.edu/lfw/lfw-deepfunneled.tgz -O tmp.tgz")
    print("extracting...")
    os.system("tar xvzf tmp.tgz && rm tmp.tgz")
    print("done")
    assert os.path.exists(images_name)

if not os.path.exists(attrs_name):
    print("attributes not found, downloading...")
    os.system("wget http://www.cs.columbia.edu/CAVE/databases/pubfig/download/%s" % attrs_name)
    print("done")

#read attrs
df_attrs = pd.read_csv("lfw_attributes.txt",sep='\t',skiprows=1,) 
df_attrs = pd.DataFrame(df_attrs.iloc[:,:-1].values, columns = df_attrs.columns[1:])


#read photos
photo_ids = []
for dirpath, dirnames, filenames in os.walk(images_name):
    for fname in filenames:
        if fname.endswith(".jpg"):
            fpath = os.path.join(dirpath,fname)
            photo_id = fname[:-4].replace('_',' ').split()
            person_id = ' '.join(photo_id[:-1])
            photo_number = int(photo_id[-1])
            photo_ids.append({'person':person_id,'imagenum':photo_number,'photo_path':fpath})

photo_ids = pd.DataFrame(photo_ids)
# print(photo_ids)
#mass-merge
#(photos now have same order as attributes)
df = pd.merge(df_attrs,photo_ids,on=('person','imagenum'))

assert len(df)==len(df_attrs),"lost some data when merging dataframes"

# print(df.shape)
#image preprocessing
all_photos =df['photo_path'].apply(skimage.io.imread)\
                            .apply(lambda img:img[dy:-dy,dx:-dx])\
                            .apply(lambda img: resize(img,[dimx,dimy]))

all_photos = np.stack(all_photos.values)#.astype('uint8')
all_attrs = df.drop(["photo_path","person","imagenum"],axis=1)

return all_photos,all_attrs

接下来，我想我的数据集转换为张量：

import torch
import torchvision.transforms
from torchvision import transforms as transforms

class MyDataset(torch.utils.data.Dataset):
def __init__(self, data):
        self.dataset = data

def __getitem__(self, idx):
    sample = self.dataset[idx]
    data, label = sample[0], sample[1]
    
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    return transform(data), torch.tensor(label)

def __len__(self):
    return len(self.dataset)

那么：

from sklearn.model_selection import train_test_split

train, val = train_test_split(data, test_size=0.25, random_state=42)

train_dataset = MyDataset(train)
val_dataset = MyDataset(val)

自动编码类：

from copy import deepcopy

class Autoencoder(nn.Module):
def __init__(self):
    #<определите архитектуры encoder и decoder>
      super(Autoencoder,self).__init__()

      self.encoder = nn.Sequential(
        nn.Conv2d(3, 6, kernel_size=5),
        nn.ReLU(True),
        nn.Conv2d(6,16,kernel_size=5),
        nn.ReLU(True))
      self.decoder = nn.Sequential(
        nn.Conv2d(16, 6, kernel_size=5),
        nn.ReLU(True),
        nn.Conv2d(6,3,kernel_size=5),
        nn.ReLU(True))
    
def forward(self, x):

    latent_code = self.encoder(x)
    reconstruction = self.decoder(latent_code)
    
    return reconstruction, latent_code

那么：

criterion = nn.BCELoss()

autoencoder = Autoencoder()

optimizer = torch.optim.Adam(autoencoder.parameters())

在训练本身：

#<тут Ваш код тренировки автоэнкодера>
num_epochs = 5
for epoch in range(num_epochs):
for x in train_dataset:
    img, _ = x
    img = Variable(img).cpu()
    
    output = autoencoder(img)
    loss = criterion(output, img)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
print('epoch [{}/{}], loss:{:.4f}'.format(epoch+1, num_epochs, loss.data()))

我收到此错误：

RuntimeError: output with shape [1, 45, 3] doesn't match the broadcast shape [3, 45, 3]

我觉得我在准备我的数据出错的地方。可以someome请解释我如何正确地做到这一点在我的情况？感谢您的帮助。

如何准备数据并执行自动编码器的培训？

0 个答案: