python:读取RGB图像以进行深度学习

时间:2017-12-01 17:57:36

标签: python deep-learning rgb reshape cv2

在深度学习教程网站中,我发现此代码可以读取如下图像。

cv2.resize(cv2.imread(folder + name, 1), (100, 200)).reshape(3, 100, 200)

此代码将图像的形状从(100,200,3)更改为(3,100,200)。 我试着看看函数如何改变矩阵的形状,我得到了奇怪的输出 请假设2x4 RGB图像(d)如下所示。

d = array([[[ 1,  2,  3],[ 4,  5,  6],[ 7,  8,  9], [10, 11, 12]],[[13, 14, 15],[16, 17, 18], [19, 20, 21],[22, 23, 24]]])
d.shape: (2, 4, 3) 

应用重塑后,显示

d.reshape(3,2,4) 
array([[[ 1,  2,  3,  4],
        [ 5,  6,  7,  8]],
       [[ 9,  10,  11,  12],
        [ 13,  14,  15,  16]],
       [[ 17,  18,  19,  20],
        [ 21,  22,  23,  24]]])

但我不认为这是正确的表示,因为我们想要表示如下的图像。

enter image description here 因此,我认为我们应该将图像转换成如下。

d.reshape(3,2,4) 
array([[[ 1,  4,  7,  10],  
        [ 13,  16,  19,  22]],#R layer
       [[ 2,  5,   8,  11], 
        [ 14,  17,  20,  23]],#G layer
       [[  3,  6,  9,  12], 
        [ 15,  18,  21,  24]]])#B layer

我的理解错了吗?如果你有所了解,请帮助我。

我把整个代码放在下面。

    from torch import nn
    from torch.nn import functional as F
    from torch.autograd import Variable
    from sklearn.model_selection import train_test_split
    import numpy as np
    from collections import Counter
    import os
    import cv2
    import torch.optim as optim
    import torch.utils.data


    def read_labels(file):
      dic = {}
      with open(file) as f:
        reader = f
        for row in reader:
            dic[row.split(",")[0]]  = row.split(",")[1].rstrip() #rstrip(): eliminate "\n"
      return dic

    image_names= os.listdir("../train")
    label_dic = read_labels("../labels.csv")

    labels = []
    images =[]

    for name in image_names:
        images.append(cv2.resize(cv2.imread("../train/"+name,1), (100, 200)).reshape(3,100,200))
        labels.append(label_dic[os.path.splitext(name)[0]])

    images = np.asarray(images)


    """
    Assign numbers for each labels
    """

    tmp_labels = labels
    uniq_labels = set(tmp_labels) # eliminate duplication
    num_breeds = len(Counter(labels)) # number of breeds
    uniqu_labels_index = dict((label, i) for i, label in enumerate(uniq_labels)) #create dictionary and assign number for each labels

    labels_num = [uniqu_labels_index[label] for i,label in enumerate(labels)]
    labels_num = np.array(labels_num)

    """
    Data distribution
    """
    N = len(images)
    N_train = int(N * 0.7)
    N_test = int(N*0.2)

    X_train, X_tmp, Y_train, Y_tmp = train_test_split(images, labels_num, train_size=N_train)
    X_validation, X_test, Y_validation, Y_test = train_test_split(X_tmp, Y_tmp, test_size=N_test)

    """
    Model Definition
    """


    # CNN Model (2 conv layer)
    class CNN(nn.Module):
        def __init__(self):
            super(CNN, self).__init__()
            self.layer1 = nn.Sequential(
                nn.Conv2d(3,34, kernel_size=5,padding= 2),
                nn.Dropout2d(),
                nn.BatchNorm2d(34),
                nn.ReLU(),
                nn.MaxPool2d(2))
            self.layer2 = nn.Sequential(
                nn.Conv2d(34, 68, kernel_size=5,padding= 2),
                nn.BatchNorm2d(68),
                nn.ReLU(),
                nn.MaxPool2d(2))
            self.fc1 = nn.Linear(1700,300)
            self.fc2 = nn.Linear(300,num_breeds)

        def forward(self, x):
            out = self.layer1(x)
            #print out.data.shape
            out = self.layer2(out)
            #print out.data.shape
            out = out.view(out.size(0), -1)
            #print out.data.shape
            out =self.fc1(out)
            #out = F.dropout(out)
            #out = self.fc2(out)
            return F.log_softmax(out)

        def accuracy(self,outputs,labels):
            #for i, (images_val, labels_val) in enumerate(val_loader):

                # print images.shape
             #   images_val = Variable(images_val).float()
              #  labels_val = Variable(labels_val).float().type(torch.LongTensor)
              #  outputs_val = CNN(images_val)

            inference =  np.argmax(outputs.data.numpy(),axis=1)
            answers = labels.data.numpy()
            correction =  np.equal(inference,answers)
            return  np.sum(correction)/float(len(correction))

    CNN = CNN()

    """
    Training
    """
    batch_size = 100
    learning_rate =0.01
    # Data Loader (Input Pipeline)
    train = torch.utils.data.TensorDataset(torch.from_numpy(X_train), torch.from_numpy(Y_train))
    train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)

    val = torch.utils.data.TensorDataset(torch.from_numpy(X_validation), torch.from_numpy(Y_validation))
    val_loader = torch.utils.data.DataLoader(val, batch_size=len(X_validation), shuffle=True)

    test = torch.utils.data.TensorDataset(torch.from_numpy(X_test), torch.from_numpy(Y_test))
    test_loader = torch.utils.data.DataLoader(test, batch_size=len(X_test), shuffle=True)


    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(CNN.parameters(), lr=learning_rate)

    for epoch in range(250):  # loop over the dataset multiple times
        running_loss = 0.0
        for i, (images, labels) in enumerate(train_loader):

            images = Variable(images).float()
            labels = Variable(labels).float().type(torch.LongTensor)
            # Forward + Backward + Optimize
            optimizer.zero_grad()
            outputs = CNN(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.data[0]


        accuracy = CNN.accuracy(outputs,labels)
        print
        print "epoch :",epoch
        print 'loss:' ,float(running_loss) / 2000
        print "accuracy :",accuracy
        running_loss = 0.0

    print('Finished Training')
    for i, (images, labels) in enumerate(test_loader):

            images = Variable(images).float()
            labels = Variable(labels).float().type(torch.LongTensor)
            optimizer.zero_grad()
            outputs = CNN(images)
    inference =  np.argmax(outputs.data.numpy(),axis=1)
    answers = labels.data.numpy()
    correction =  np.equal(inference,answers)
    print  np.sum(correction)/float(len(correction))

1 个答案:

答案 0 :(得分:0)

reshape功能取自Numpy

shape方法表示每个图层的数组中有多少个元素。 所以,在你的例子中:

d = array([
            [[ 1,  2,  3],[ 4,  5,  6],[ 7,  8,  9],[10, 11, 12]], #1st layer 1st element (4 lists inside with 3 numbers each)
            [[13, 14, 15],[16, 17, 18], [19, 20, 21],[22, 23, 24]] #1st layer 2nd element (4 lists inside with 3 numbers each)
         ])

第一层有两个列表,第二层有4个列表,第三层有三个数字。

当你致电reshape(3,2,4)时,你会在第一层获得3个列表,第二层有2个列表,第三层有4个数字,保留你提供的相同元素。

它不会改变元素的顺序,只是改变形状。在您的示例中,如果您尝试使用imshow查看修改后的图像,您会看到reshape命令弄乱了图像。

尝试:

image = cv2.imread(folder + name, 1)
cv2.imshow('image',image)
cv2.waitKey(0)
cv2.destroyAllWindows()

然后:

reshapedimage =cv2.resize(cv2.imread(folder + name, 1), (100, 200))
cv2.imshow('image',reshapedimage)
cv2.waitKey(0)
cv2.destroyAllWindows()

您将能够看到每个命令对您的图像做了什么