在深度学习教程网站中,我发现此代码可以读取如下图像。
cv2.resize(cv2.imread(folder + name, 1), (100, 200)).reshape(3, 100, 200)
此代码将图像的形状从(100,200,3)更改为(3,100,200)。 我试着看看函数如何改变矩阵的形状,我得到了奇怪的输出 请假设2x4 RGB图像(d)如下所示。
d = array([[[ 1, 2, 3],[ 4, 5, 6],[ 7, 8, 9], [10, 11, 12]],[[13, 14, 15],[16, 17, 18], [19, 20, 21],[22, 23, 24]]])
d.shape: (2, 4, 3)
应用重塑后,显示
d.reshape(3,2,4)
array([[[ 1, 2, 3, 4],
[ 5, 6, 7, 8]],
[[ 9, 10, 11, 12],
[ 13, 14, 15, 16]],
[[ 17, 18, 19, 20],
[ 21, 22, 23, 24]]])
但我不认为这是正确的表示,因为我们想要表示如下的图像。
d.reshape(3,2,4)
array([[[ 1, 4, 7, 10],
[ 13, 16, 19, 22]],#R layer
[[ 2, 5, 8, 11],
[ 14, 17, 20, 23]],#G layer
[[ 3, 6, 9, 12],
[ 15, 18, 21, 24]]])#B layer
我的理解错了吗?如果你有所了解,请帮助我。
我把整个代码放在下面。
from torch import nn
from torch.nn import functional as F
from torch.autograd import Variable
from sklearn.model_selection import train_test_split
import numpy as np
from collections import Counter
import os
import cv2
import torch.optim as optim
import torch.utils.data
def read_labels(file):
dic = {}
with open(file) as f:
reader = f
for row in reader:
dic[row.split(",")[0]] = row.split(",")[1].rstrip() #rstrip(): eliminate "\n"
return dic
image_names= os.listdir("../train")
label_dic = read_labels("../labels.csv")
labels = []
images =[]
for name in image_names:
images.append(cv2.resize(cv2.imread("../train/"+name,1), (100, 200)).reshape(3,100,200))
labels.append(label_dic[os.path.splitext(name)[0]])
images = np.asarray(images)
"""
Assign numbers for each labels
"""
tmp_labels = labels
uniq_labels = set(tmp_labels) # eliminate duplication
num_breeds = len(Counter(labels)) # number of breeds
uniqu_labels_index = dict((label, i) for i, label in enumerate(uniq_labels)) #create dictionary and assign number for each labels
labels_num = [uniqu_labels_index[label] for i,label in enumerate(labels)]
labels_num = np.array(labels_num)
"""
Data distribution
"""
N = len(images)
N_train = int(N * 0.7)
N_test = int(N*0.2)
X_train, X_tmp, Y_train, Y_tmp = train_test_split(images, labels_num, train_size=N_train)
X_validation, X_test, Y_validation, Y_test = train_test_split(X_tmp, Y_tmp, test_size=N_test)
"""
Model Definition
"""
# CNN Model (2 conv layer)
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(3,34, kernel_size=5,padding= 2),
nn.Dropout2d(),
nn.BatchNorm2d(34),
nn.ReLU(),
nn.MaxPool2d(2))
self.layer2 = nn.Sequential(
nn.Conv2d(34, 68, kernel_size=5,padding= 2),
nn.BatchNorm2d(68),
nn.ReLU(),
nn.MaxPool2d(2))
self.fc1 = nn.Linear(1700,300)
self.fc2 = nn.Linear(300,num_breeds)
def forward(self, x):
out = self.layer1(x)
#print out.data.shape
out = self.layer2(out)
#print out.data.shape
out = out.view(out.size(0), -1)
#print out.data.shape
out =self.fc1(out)
#out = F.dropout(out)
#out = self.fc2(out)
return F.log_softmax(out)
def accuracy(self,outputs,labels):
#for i, (images_val, labels_val) in enumerate(val_loader):
# print images.shape
# images_val = Variable(images_val).float()
# labels_val = Variable(labels_val).float().type(torch.LongTensor)
# outputs_val = CNN(images_val)
inference = np.argmax(outputs.data.numpy(),axis=1)
answers = labels.data.numpy()
correction = np.equal(inference,answers)
return np.sum(correction)/float(len(correction))
CNN = CNN()
"""
Training
"""
batch_size = 100
learning_rate =0.01
# Data Loader (Input Pipeline)
train = torch.utils.data.TensorDataset(torch.from_numpy(X_train), torch.from_numpy(Y_train))
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
val = torch.utils.data.TensorDataset(torch.from_numpy(X_validation), torch.from_numpy(Y_validation))
val_loader = torch.utils.data.DataLoader(val, batch_size=len(X_validation), shuffle=True)
test = torch.utils.data.TensorDataset(torch.from_numpy(X_test), torch.from_numpy(Y_test))
test_loader = torch.utils.data.DataLoader(test, batch_size=len(X_test), shuffle=True)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(CNN.parameters(), lr=learning_rate)
for epoch in range(250): # loop over the dataset multiple times
running_loss = 0.0
for i, (images, labels) in enumerate(train_loader):
images = Variable(images).float()
labels = Variable(labels).float().type(torch.LongTensor)
# Forward + Backward + Optimize
optimizer.zero_grad()
outputs = CNN(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.data[0]
accuracy = CNN.accuracy(outputs,labels)
print
print "epoch :",epoch
print 'loss:' ,float(running_loss) / 2000
print "accuracy :",accuracy
running_loss = 0.0
print('Finished Training')
for i, (images, labels) in enumerate(test_loader):
images = Variable(images).float()
labels = Variable(labels).float().type(torch.LongTensor)
optimizer.zero_grad()
outputs = CNN(images)
inference = np.argmax(outputs.data.numpy(),axis=1)
answers = labels.data.numpy()
correction = np.equal(inference,answers)
print np.sum(correction)/float(len(correction))
答案 0 :(得分:0)
reshape
功能取自Numpy。
shape
方法表示每个图层的数组中有多少个元素。
所以,在你的例子中:
d = array([
[[ 1, 2, 3],[ 4, 5, 6],[ 7, 8, 9],[10, 11, 12]], #1st layer 1st element (4 lists inside with 3 numbers each)
[[13, 14, 15],[16, 17, 18], [19, 20, 21],[22, 23, 24]] #1st layer 2nd element (4 lists inside with 3 numbers each)
])
第一层有两个列表,第二层有4个列表,第三层有三个数字。
当你致电reshape(3,2,4)
时,你会在第一层获得3个列表,第二层有2个列表,第三层有4个数字,保留你提供的相同元素。
它不会改变元素的顺序,只是改变形状。在您的示例中,如果您尝试使用imshow
查看修改后的图像,您会看到reshape
命令弄乱了图像。
尝试:
image = cv2.imread(folder + name, 1)
cv2.imshow('image',image)
cv2.waitKey(0)
cv2.destroyAllWindows()
然后:
reshapedimage =cv2.resize(cv2.imread(folder + name, 1), (100, 200))
cv2.imshow('image',reshapedimage)
cv2.waitKey(0)
cv2.destroyAllWindows()
您将能够看到每个命令对您的图像做了什么