我使用Pytorch创建了一个字母分类的CNN模型,然后使用该模型对一个我从未见过的图像进行测试。我使用opencv在手写图像中提取了一个边界框,但我不知道如何将其应用于模型。
这是自定义数据集
class CustomDatasetFromCSV(Dataset):
def __init__(self, csv_path, height, width, transforms=None):
"""
Args:
csv_path (string): path to csv file
height (int): image height
width (int): image width
transform: pytorch transforms for transforms and tensor conversion
"""
self.data = pd.read_csv(csv_path)
self.labels = np.asarray(self.data.iloc[:, 0])
self.height = height
self.width = width
self.transforms = transforms
def __getitem__(self, index):
single_image_label = self.labels[index]
# Read each 784 pixels and reshape the 1D array ([784]) to 2D array ([28,28])
img_as_np = np.asarray(self.data.iloc[index][1:]).reshape(28,28).astype('uint8')
# Convert image from numpy array to PIL image, mode 'L' is for grayscale
img_as_img = Image.fromarray(img_as_np)
img_as_img = img_as_img.convert('L')
# Transform image to tensor
if self.transforms is not None:
img_as_tensor = self.transforms(img_as_img)
# Return image and the label
return (img_as_tensor, single_image_label)
def __len__(self):
return len(self.data.index)
transformations = transforms.Compose([
transforms.ToTensor()
])
alphabet_from_csv = CustomDatasetFromCSV("/content/drive/My Drive/A_Z Handwritten Data.csv",
28, 28, transformations)
random_seed = 50
data_size = len(alphabet_from_csv)
indices = list(range(data_size))
split = int(np.floor(0.2 * data_size))
if True:
np.random.seed(random_seed)
np.random.shuffle(indices)
train_indices, test_indices = indices[split:], indices[:split]
train_dataset = SubsetRandomSampler(train_indices)
test_dataset = SubsetRandomSampler(test_indices)
train_loader = torch.utils.data.DataLoader(dataset = alphabet_from_csv,
batch_size = batch_size,
sampler = train_dataset)
test_loader = torch.utils.data.DataLoader(dataset = alphabet_from_csv,
batch_size = batch_size,
sampler = test_dataset)
这是我的模特
class ConvNet3(nn.Module):
def __init__(self, num_classes=26):
super().__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 28, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(28),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer2 = nn.Sequential(
nn.Conv2d(28, 56, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(56),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.fc = nn.Sequential(
nn.Dropout(p = 0.5),
nn.Linear(56 * 7 * 7, 512),
nn.BatchNorm1d(512),
nn.ReLU(),
nn.Dropout(p = 0.5),
nn.Linear(512, 26),
)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.fc(out)
return out
model = ConvNet3(num_classes).to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
def train():
# train phase
model.train()
# create a progress bar
batch_loss_list = []
progress = ProgressMonitor(length=len(train_dataset))
for batch, target in train_loader:
# Move the training data to the GPU
batch, target = batch.to(device), target.to(device)
# forward propagation
output = model( batch )
# calculate the loss
loss = loss_func( output, target )
# clear previous gradient computation
optimizer.zero_grad()
# backpropagate to compute gradients
loss.backward()
# update model weights
optimizer.step()
# update progress bar
batch_loss_list.append(loss.item())
progress.update(batch.shape[0], sum(batch_loss_list)/len(batch_loss_list) )
def test():
# test phase
model.eval()
correct = 0
# We don't need gradients for test, so wrap in
# no_grad to save memory
with torch.no_grad():
for batch, target in test_loader:
# Move the training batch to the GPU
batch, target = batch.to(device), target.to(device)
# forward propagation
output = model( batch )
# get prediction
output = torch.argmax(output, 1)
# accumulate correct number
correct += (output == target).sum().item()
# Calculate test accuracy
acc = 100 * float(correct) / len(test_dataset)
print( 'Test accuracy: {}/{} ({:.2f}%)'.format( correct, len(test_dataset), acc ) )
for epoch in range(num_epochs):
print("{}'s try".format(int(epoch)+1))
train()
test()
print("-----------------------------------------------------------------------------")
这是我要绑定的图片
import cv2
import matplotlib.image as mpimg
im = cv2.imread('/content/drive/My Drive/my_handwritten.jpg')
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (5, 5), 0)
thresh = cv2.adaptiveThreshold(blur, 255, 1, 1, 11, 2)
contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[1]
rects=[]
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if h < 20: continue
red = (0, 0, 255)
cv2.rectangle(im, (x, y), (x+w, y+h), red, 2)
rects.append((x,y,w,h))
cv2.imwrite('my_handwritten_bounding.png', im)
img_result = []
img_for_class = im.copy()
margin_pixel = 60
for rect in rects:
#[y:y+h, x:x+w]
img_result.append(
img_for_class[rect[1]-margin_pixel : rect[1]+rect[3]+margin_pixel,
rect[0]-margin_pixel : rect[0]+rect[2]+margin_pixel])
# Draw the rectangles
cv2.rectangle(im, (rect[0], rect[1]),
(rect[0] + rect[2], rect[1] + rect[3]), (0, 0, 255), 2)
count = 0
nrows = 4
ncols = 7
plt.figure(figsize=(12,8))
for n in img_result:
count += 1
plt.subplot(nrows, ncols, count)
plt.imshow(cv2.resize(n,(28,28)), cmap='Greys', interpolation='nearest')
plt.tight_layout()
plt.show()
答案 0 :(得分:0)
您已经编写了函数test
来测试您的网络。您唯一需要做的就是-用一张与数据集中的图像预处理相同的图像创建批处理。
def test_one_image(I, model):
'''
I - 28x28 uint8 numpy array
'''
# test phase
model.eval()
# convert image to torch tensor and add batch dim
batch = torch.tensor(I / 255).unsqueeze(0)
# We don't need gradients for test, so wrap in
# no_grad to save memory
with torch.no_grad():
batch = batch.to(device)
# forward propagation
output = model( batch )
# get prediction
output = torch.argmax(output, 1)
return output