我正在从事计算机视觉工作,我正在尝试将Microsoft Coco数据集转换为png图像,以便我可以直接在Caffe上使用它们。
我修改了它们的API的一些功能,以便我有正确的分割颜色(每个类id在rgb中有(id,id,id)颜色,而background是(0,0,0))。这是修改后的功能:
def showAnns(self, anns):
"""
Display the specified annotations.
:param anns (array of object): annotations to display
:return: None
"""
if len(anns) == 0:
return 0
if 'segmentation' in anns[0]:
datasetType = 'instances'
elif 'caption' in anns[0]:
datasetType = 'captions'
if datasetType == 'instances':
ax = plt.gca()
polygons = []
color = []
# sort annotations from biggest to smallest to avoid occlusions
anns.sort(key=lambda x: x['area'], reverse=True)
for ann in anns:
pixelvalue = ann['category_id']/255.0
c = [pixelvalue, pixelvalue, pixelvalue]
if type(ann['segmentation']) == list:
# polygon
for seg in ann['segmentation']:
poly = np.array(seg).reshape((len(seg)/2, 2))
polygons.append(Polygon(poly, True,alpha=1))
color.append(c)
else:
# mask
t = self.imgs[ann['image_id']]
if type(ann['segmentation']['counts']) == list:
rle = mask.frPyObjects([ann['segmentation']], t['height'], t['width'])
else:
rle = [ann['segmentation']]
m = mask.decode(rle)
img = np.ones( (m.shape[0], m.shape[1], 3) )
color_mask = c
for i in range(3):
img[:,:,i] = color_mask[i]
ax.imshow(np.dstack( (img, m*0.5) ))
p = PatchCollection(polygons, facecolors=color, edgecolors=(1,1,1,1), linewidths=1, alpha=1)
ax.add_collection(p)
elif datasetType == 'captions':
for ann in anns:
print ann['caption']
此代码在我的python脚本中调用:
from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
dataDir='..'
dataType='train2014'
annFile='%s/annotations/instance`enter code here`s_%s.json'%(dataDir,dataType)
# initialize COCO api for instance annotations
coco=COCO(annFile)
#display COCO categories and supercategories
cats = coco.loadCats(coco.getCatIds())
# get all images
catIds = coco.getCatIds();
imgIds = coco.getImgIds()
imgs = coco.loadImgs(imgIds)
imgs.sort(key=lambda x: x['id'])
for img in imgs:
# load and display image
I = io.imread('%s/images/%s/%s'%(dataDir,dataType,img['file_name']))
# 0 all values
I[:] = 0
axes = plt.gca()
axes.set_frame_on(False)
axes.set_xticks([]); axes.set_yticks([])
plt.axis('off')
plt.imshow(I)
# load and display instance annotations
annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
anns = coco.loadAnns(annIds)
coco.showAnns(anns)
path='vocstyle_cocoimages/'+str(img['file_name'])[:-4]+'.png'
plt.savefig(path, bbox_inches='tight', pad_inches=0)
plt.clf()
所以这里是我的问题:当我这样做时,我获得了一个与原始版本不同的保存png。我希望具有完全相同的尺寸,因为它对于评估很重要:任何错位的像素都会降低深度学习阶段的准确性。
我还没有找到一个正确的方法来保存"图像"该图的一部分被剥去了图中的任何其他东西。我们说我有一个640 * 480的图像输入,我想要与输出相同的尺寸。
感谢您阅读
答案 0 :(得分:0)
使用计数器,并将其附加到路径字符串:
COUNT = 0
for img in imgs:
# load and display image
I = io.imread('%s/images/%s/%s'%(dataDir,dataType,img['file_name']))
# 0 all values for black background
I[:] = 0
plt.imshow(I)
# load and display instance annotations
annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
anns = coco.loadAnns(annIds)
coco.showAnns(anns)
print img['file_name']
#path='vocstyle_cocoimages/'+str(img['file_name'])[:-4]+'.png'
#your new line
path='vocstyle_cocoimages/'+str(img['file_name'])[:-4]+ '_' + str(COUNT) + '.png'
plt.savefig(path, bbox_inches='tight', pad_inches=0)
#increment counter
COUNT = COUNT + 1