要创建带注释图像的COCO数据集,您需要根据对象的类型将二进制蒙版转换为多边形或未压缩的运行长度编码表示。
pycocotools库具有对压缩 RLE进行编码和解码的功能,但对多边形和未压缩的RLE没有任何作用。
我可以将skimage的度量库用于generate polygons of masks,但我不确定如何创建未压缩的RLE。
我可以使用this RLE encoder从图像创建RLE的 表示,但我不确定COCO期望的格式。 COCO只是提到他们使用“自定义运行长度编码(RLE)方案”
例如,
ground_truth_binary_mask = np.array([[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[ 0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[ 0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[ 0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.uint8)
fortran_ground_truth_binary_mask = np.asfortranarray(ground_truth_binary_mask)
rle(fortran_ground_truth_binary_mask)
输出:
(array([26, 36, 46, 56, 61]), array([3, 3, 3, 3, 1]))
这就是可可RLE的样子:
{
"segmentation": {
"counts": [
272,
2,
4,
4,
4,
4,
2,
9,
1,
2,
16,
43,
143,
24,
5,
8,
16,
44,
141,
25,
8,
5,
17,
44,
140,
26,
10,
2,
17,
45,
129,
4,
5,
27,
24,
5,
1,
45,
127,
38,
23,
52,
125,
40,
22,
53,
123,
43,
20,
54,
122,
46,
18,
54,
121,
54,
12,
53,
119,
57,
11,
53,
117,
59,
13,
51,
117,
59,
13,
51,
117,
60,
11,
52,
117,
60,
10,
52,
118,
60,
9,
53,
118,
61,
8,
52,
119,
62,
7,
52,
119,
64,
1,
2,
2,
51,
120,
120,
120,
101,
139,
98,
142,
96,
144,
93,
147,
90,
150,
87,
153,
85,
155,
82,
158,
76,
164,
66,
174,
61,
179,
57,
183,
54,
186,
52,
188,
49,
191,
47,
193,
21,
8,
16,
195,
20,
13,
8,
199,
18,
222,
17,
223,
16,
224,
16,
224,
15,
225,
15,
225,
15,
225,
15,
225,
15,
225,
15,
225,
15,
225,
15,
225,
15,
225,
14,
226,
14,
226,
14,
39,
1,
186,
14,
39,
3,
184,
14,
39,
4,
183,
13,
40,
6,
181,
14,
39,
7,
180,
14,
39,
9,
178,
14,
39,
10,
177,
14,
39,
11,
176,
14,
38,
14,
174,
14,
36,
19,
171,
15,
33,
32,
160,
16,
30,
35,
159,
18,
26,
38,
158,
19,
23,
41,
157,
20,
19,
45,
156,
21,
15,
48,
156,
22,
10,
53,
155,
23,
9,
54,
154,
23,
8,
55,
154,
24,
7,
56,
153,
24,
6,
57,
153,
25,
5,
57,
153,
25,
5,
58,
152,
25,
4,
59,
152,
26,
3,
59,
152,
26,
3,
59,
152,
27,
1,
60,
152,
27,
1,
60,
152,
86,
154,
80,
160,
79,
161,
42,
8,
29,
161,
41,
11,
22,
2,
3,
161,
40,
13,
18,
5,
3,
161,
40,
15,
2,
5,
8,
7,
2,
161,
40,
24,
6,
170,
35,
30,
4,
171,
34,
206,
34,
41,
1,
164,
34,
39,
3,
164,
34,
37,
5,
164,
34,
35,
10,
161,
36,
1,
3,
28,
17,
155,
41,
27,
16,
156,
41,
26,
17,
156,
41,
26,
16,
157,
27,
4,
10,
25,
16,
158,
27,
6,
8,
11,
2,
12,
6,
2,
7,
159,
27,
7,
14,
3,
4,
19,
6,
160,
26,
8,
22,
18,
5,
161,
26,
8,
22,
18,
4,
162,
26,
8,
23,
15,
4,
164,
23,
11,
23,
11,
7,
165,
19,
17,
22,
9,
6,
167,
19,
22,
18,
8,
3,
170,
18,
25,
16,
7,
1,
173,
17,
28,
15,
180,
17,
30,
12,
181,
16,
34,
6,
184,
15,
225,
14,
226,
13,
227,
12,
228,
11,
229,
10,
230,
9,
231,
9,
231,
9,
231,
9,
231,
8,
232,
8,
232,
8,
232,
8,
232,
8,
232,
8,
232,
7,
233,
7,
233,
7,
233,
7,
233,
8,
232,
8,
232,
8,
232,
9,
231,
9,
231,
9,
231,
10,
230,
10,
230,
11,
229,
13,
227,
14,
226,
16,
224,
17,
223,
19,
221,
23,
217,
31,
3,
5,
201,
39,
201,
39,
201,
39,
201,
39,
201,
39,
201,
40,
200,
40,
200,
41,
199,
41,
199,
41,
199,
22,
8,
12,
198,
22,
12,
8,
198,
22,
14,
6,
198,
22,
15,
6,
197,
22,
16,
5,
197,
22,
17,
5,
196,
22,
18,
4,
196,
22,
19,
4,
195,
22,
19,
5,
194,
22,
20,
4,
194,
25,
21,
1,
193,
27,
213,
29,
211,
30,
210,
35,
6,
6,
193,
49,
191,
50,
190,
50,
190,
51,
189,
51,
189,
52,
188,
53,
187,
53,
187,
54,
186,
54,
186,
54,
186,
55,
185,
55,
185,
55,
185,
55,
185,
55,
185,
55,
185,
55,
185,
55,
185,
55,
185,
55,
185,
55,
185,
55,
185,
55,
185,
55,
185,
55,
185,
28,
1,
26,
185,
23,
11,
21,
185,
20,
17,
17,
186,
18,
21,
15,
186,
16,
23,
14,
187,
14,
25,
14,
187,
14,
26,
12,
188,
14,
28,
10,
188,
14,
226,
14,
226,
16,
224,
17,
223,
19,
221,
20,
220,
22,
218,
24,
18,
3,
12,
3,
180,
25,
10,
1,
4,
6,
10,
6,
178,
28,
7,
12,
8,
8,
177,
49,
3,
12,
176,
65,
175,
67,
173,
69,
171,
53,
3,
14,
170,
37,
20,
9,
4,
1,
169,
36,
21,
8,
175,
35,
22,
7,
176,
34,
23,
7,
176,
34,
23,
6,
177,
35,
22,
6,
177,
35,
22,
8,
175,
35,
23,
9,
173,
35,
205,
36,
204,
39,
201,
43,
197,
48,
36,
1,
155,
48,
35,
3,
154,
49,
33,
5,
154,
48,
32,
6,
155,
49,
27,
10,
155,
51,
24,
11,
154,
54,
21,
11,
155,
56,
19,
11,
155,
56,
18,
11,
156,
56,
17,
11,
157,
56,
16,
12,
157,
56,
14,
13,
159,
56,
12,
13,
160,
61,
5,
14,
162,
78,
165,
75,
167,
73,
168,
72,
170,
70,
171,
69,
173,
67,
176,
64,
179,
61,
182,
58,
183,
57,
185,
54,
187,
53,
188,
51,
191,
49,
192,
47,
195,
45,
196,
43,
198,
42,
199,
40,
201,
38,
203,
36,
205,
34,
207,
32,
210,
28,
213,
26,
216,
22,
221,
16,
228,
8,
10250
],
"size": [
240,
320
]
}
}
答案 0 :(得分:3)
作为@waspinator答案的改进。这快了35%。
def binary_mask_to_rle(binary_mask):
rle = {'counts': [], 'size': list(binary_mask.shape)}
counts = rle.get('counts')
last_elem = 0
running_length = 0
for i, elem in enumerate(binary_mask.ravel(order='F')):
if elem == last_elem:
pass
else:
counts.append(running_length)
running_length = 0
last_elem = elem
running_length += 1
counts.append(running_length)
return rle
答案 1 :(得分:1)
有关格式的信息,请访问:https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/mask.py
RLE是一种用于存储二进制掩码的简单而有效的格式。 RLE 首先将矢量(或矢量化图像)划分为一系列 分段恒定区域,然后每个部分只是存储 那件长度。例如,给定M = [0 0 1 1 1 0 1] RLE 计数将是[2 3 1 1],或对于M = [1 1 1 1 1 1 0]计数 是[0 6 1](请注意,奇数总是零的数量。)
import numpy as np
from itertools import groupby
def binary_mask_to_rle(binary_mask):
rle = {'counts': [], 'size': list(binary_mask.shape)}
counts = rle.get('counts')
for i, (value, elements) in enumerate(groupby(binary_mask.ravel(order='F'))):
if i == 0 and value == 1:
counts.append(0)
counts.append(len(list(elements)))
return rle
test_list_1 = np.array([0, 0, 1, 1, 1, 0, 1])
test_list_2 = np.array([1, 1, 1, 1, 1, 1, 0])
print(binary_mask_to_rle(test_list_1))
print(binary_mask_to_rle(test_list_2))
输出:
{'counts': [2, 3, 1, 1], 'size': [7]}
{'counts': [0, 6, 1], 'size': [7]}
您可以使用mask.frPyObjects(rle, size_x, size_y)
对RLE进行编码,然后执行所有常规mask
操作。
import json
import numpy as np
from pycocotools import mask
from skimage import measure
ground_truth_binary_mask = np.array([[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[ 0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[ 0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[ 0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.uint8)
fortran_ground_truth_binary_mask = np.asfortranarray(ground_truth_binary_mask)
将掩码编码为RLE:
rle = binary_mask_to_rle(fortran_ground_truth_binary_mask)
print(rle)
输出:
{'counts': [6, 1, 40, 4, 5, 4, 5, 4, 21], 'size': [9, 10]}
压缩RLE,然后解码:
compressed_rle = mask.frPyObjects(rle, rle.get('size')[0], rle.get('size')[1])
mask.decode(compressed_rle)
输出:
array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[0, 0, 0, 0, 0, 1, 1, 1, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=uint8)
答案 2 :(得分:0)
为了解码在COCO批注中编码的二进制掩码,您需要首先使用COCO's API获得RLE,然后使用opencv获得如下轮廓:
# Import libraries
import numpy as np
import cv2
import json
import mask
# Read the annotations
file_path = "coco/annotations/stuff_annotations_trainval2017/stuff_train2017.json"
with open(file_path, 'r') as f:
data = json.load(f)
updated_data = []
# For each annotation
for annotation in data['annotations']:
# Initialize variables
obj = {}
segmentation = []
segmentation_polygons = []
# Decode the binary mask
mask_list = mask.decode(annotation['segmentation'])
mask_list = np.ascontiguousarray(mask_list, dtype=np.uint8)
mask_new, contours, hierarchy = cv2.findContours((mask_list).astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Get the contours
for contour in contours:
contour = contour.flatten().tolist()
segmentation.append(contour)
if len(contour) > 4:
segmentation.append(contour)
if len(segmentation) == 0:
continue
# Get the polygons as (x, y) coordinates
for i, segment in enumerate(segmentation):
poligon = []
poligons = []
for j in range(len(segment)):
poligon.append(segment[j])
if (j+1)%2 == 0:
poligons.append(poligon)
poligon = []
segmentation_polygons.append(poligons)
# Save the segmentation and polygons for the current annotation
obj['segmentation'] = segmentation
obj['segmentation_polygons'] = segmentation_polygons
updated_data.append(obj)
注意:只有COCO stuff 2017
批注使用二进制掩码,COCO person 2017
批注未使用二进制掩码,因此您无需解码后者并找到其轮廓。
受到this解决方案的启发。