Question

我正在开发用于图像识别的CNN。我有一组不同的图像，其中每个图像中都有一组不同的边界框（每个图像至少有3个边界框）。

我想自动在边界框内提取该部分，然后对其进行裁剪以获得一组与每个边界框的内容相对应的裁剪图像。我创建了一个 voc xml 和一个累积的 .csv文件，其中包含每个图像的所有详细信息，此处为摘录：

,filepath,x1,x2,y1,y2,class_name
0,71.jpeg,81,118,98,122,os
1,71.jpeg,120,156,83,110,od
2,71.jpeg,107,161,136,154,m

基本上，我在专用文件夹（\ train_images）和注释文件中具有.jpeg格式的上述图像。您是否有一个可以解决此问题的快速实施方案？

谢谢

Answer 1

好吧，我找到了使用以下代码提取裁剪图像的方法：

#crop images
import numpy as np # linear algebra
import xml.etree.ElementTree as ET # for parsing XML
import matplotlib.pyplot as plt # to show images
from PIL import Image # to read images
import os
import glob

root_images="/content/images"
root_annots="/content/annotation"

all_images=os.listdir("/content/images/")
print(f"Total images : {len(all_images)}")

breeds = glob.glob('/content/annotation/')
annotation=[]
for b in breeds:
    annotation+=glob.glob(b+"/*")
print(f"Total annotation : {len(annotation)}")

breed_map={}
for annot in annotation:
    breed=annot.split("/")[-2]
    index=breed.split("-")[0]
    breed_map.setdefault(index,breed)
    
print(f"Total Breeds : {len(breed_map)}")

def bounding_box(image):
    #bpath=root_annots+str(breed_map[image.split("_")[0]])+"/"+str(image.split(".")[0])
    #print (bpath)
    #print(root_annots)
    #print (str(breed_map[image.split("_")[0]]))
    #print (str(image.split(".")[0]))
    bpath=root_annots+"/"+str(image.split(".")[0]+".xml")
    tree = ET.parse(bpath)
    root = tree.getroot()
    objects = root.findall('object')
    
    for o in objects:
        bndbox = o.find('bndbox') # reading bound box
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)
        
        
    return (xmin,ymin,xmax,ymax)
    
plt.figure(figsize=(10,10))
bbox=[]
for i,image in enumerate(all_images):
    bbox=bounding_box(image) 
    print(bbox)  
    im=Image.open(os.path.join(root_images,image))
    im=im.crop(bbox)           
    im.save('/content/results_imgs/{}.jpeg'.format(i,im))

但是，如果运行此代码，它将仅从每个xml中的多个边界框中提取一张图像。 我应该如何修改它以便从每个xml中带有注释的多个边界框中获取所有图像？

Answer 2

我假设您要裁剪边框的图像。您可以简单地使用一个numpy数组：

请在此处找到一个有效的示例。

import matplotlib.pyplot as plt

mydic = {
  "annotations": [
  {
    "class": "rect",
    "height": 98,
    "width": 113,
    "x": 177,
    "y": 12
  },
  {
    "class": "rect",
    "height": 80,
    "width": 87,
    "x": 373,
    "y": 43
  }
 ],
   "class": "image",
   "filename": "https://i.stack.imgur.com/9qe6z.png"
}


def crop(dic, i):
    image = plt.imread(dic["filename"])
    x0 = dic["annotations"][i]["x"]
    y0 = dic["annotations"][i]["y"]
    width = dic["annotations"][i]["width"]
    height = dic["annotations"][i]["height"]
    return image[y0:y0+height , x0:x0+width, :]


fig = plt.figure()
ax = fig.add_subplot(121)
ax.imshow(plt.imread(mydic["filename"]))

ax1 = fig.add_subplot(222)
ax1.imshow(crop(mydic, 0))

ax2 = fig.add_subplot(224)
ax2.imshow(crop(mydic, 1))

plt.show()

注意：这不是我的代码，但我是在搜索同一问题时发现的。

Answer 3

我找到了一个 git 存储库，可以从 Pascal VOC 图像（带有由 LabelImg 生成的边界框的图像）中检测到的对象的所有边界框创建裁剪图像： https://github.com/giovannicimolin/PascalVOC-to-Images

代码运行良好。希望它能帮助您解决您的问题。

Answer 4

您可以只返回一个元组数组，然后按如下方式迭代它。

map

Answer 5

如果有人仍在寻找答案，您可以看到这些脚本：

此脚本将裁剪每个边界框并自动将它们保存到相应的 class 文件夹

from PIL import Image
import ast
import os
import cv2
import os
import glob
import xml.etree.ElementTree as ET

original_file = './images/' #you images directory
dst = './save/'


def check_folder_exists(path):
        if not os.path.exists(path):
            try:
                os.makedirs(path)
                print ('create ' + path)
            except OSError as e:
                if e.errno != errno.EEXIST:
                    raise




seed_arr = []
for xml_file in glob.glob('./labels/*.xml'): #your xml directory 
    root = ET.parse(xml_file).getroot()
    filename = root.find('filename').text

    for type_tag in root.findall('size'):
        #file_name = type_tag.find('filename').text
        width = type_tag.find('width').text
        height = type_tag.find('height').text

    for type_tag in root.findall('object'):
        class_name = type_tag.find('name').text
        xmin = type_tag.find('bndbox/xmin').text
        ymin = type_tag.find('bndbox/ymin').text
        xmax = type_tag.find('bndbox/xmax').text
        ymax = type_tag.find('bndbox/ymax').text
        all_list = [filename, width,height,class_name,xmin, ymin, xmax,ymax]

        seed_arr.append(all_list)
    
seed_arr.sort()
#print(str(len(seed_arr)))
#print(str(seed_arr))


for index, line in enumerate(seed_arr):
    filename = line[0]
    width = line[1]
    height = line[2]
    class_name = line[3]
    xmin = line[4]
    ymin = line[5]
    xmax = line[6]
    ymax = line[7]
    

#print(len(class_name))
    

    
    load_img_path = os.path.join(original_file, filename)
    #save img path

#save img path----------
    save_class_path = os.path.join(dst, class_name)
    check_folder_exists(save_class_path)
    save_img_path = os.path.join(save_class_path, str(index)+'_'+filename)
    
    img = Image.open(load_img_path)
    crop_img = img.crop((int(xmin) ,int(ymin) ,int(xmax) ,int(ymax)))
    newsize = (224, 224) 
    im1 = crop_img.resize(newsize) 
    im1.save(save_img_path, 'JPEG')
    print('save ' + save_img_path)

https://github.com/Laudarisd/Project_Root/blob/master/Data-preprocessing/img_manuplating/crop_from_xml/crop_from_xml.py

如果您尝试从 csv 中裁剪，请查看此链接：

https://github.com/Laudarisd/Project_Root/blob/master/Data-preprocessing/img_manuplating/crop_from_csv_bbox/crop_image_from_csv.py

通过边界框注释python

5 个答案: