检测图像上的对象边界太慢

时间:2020-07-19 00:51:22

标签: python pandas image-processing image-segmentation scikit-image

我有一个label_image,像这样的数据框。请注意,某些对象(标记为13的对象已被其他对象完全吞没/包围,没有背景将这些对象彼此分开。 label_image在此处作为输入给出。我没有自己制作图像,因为我没有实际图像(如tif,jpg等)

dummy_img = pd.DataFrame(np.array([
    [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
    [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
    [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
    [0,0,0,2,2,2,2,2,2,2,2,2,4,4,4,4,4,0,0,0],
    [0,0,0,2,2,2,2,2,2,2,2,2,4,4,4,4,4,0,0,0],
    [0,0,0,2,2,2,2,2,2,2,2,2,3,3,3,4,4,0,0,0],
    [0,0,0,2,2,1,1,1,1,1,3,3,3,3,3,4,4,0,0,0],
    [0,0,0,2,2,1,1,1,1,1,3,3,3,3,3,4,4,0,0,0],
    [0,0,0,2,2,1,1,1,1,1,3,3,3,3,3,4,4,0,0,0],
    [0,0,0,2,2,1,1,1,1,1,3,3,3,3,3,4,4,0,0,0],
    [0,0,0,2,2,2,2,2,2,2,2,2,3,3,3,4,4,0,0,0],
    [0,0,0,2,2,2,2,2,2,2,2,2,4,4,4,4,4,0,0,0],
    [0,0,0,2,2,2,2,2,2,2,2,2,4,4,4,4,4,0,0,0],
    [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
    [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
    [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
]))

我想得到对象的轮廓。

我最初写的是:

import pandas as pd
import numpy as np
from scipy.sparse import coo_matrix
from scipy.ndimage import binary_erosion

def outline(label_image):
    res_list = []
    
    mask = binary_erosion(label_image.values)
    label_image[mask] = 0
    c = coo_matrix(label_image)
    if c.data.size > 0:
        df = pd.DataFrame({'coords': list(zip(c.col, c.row)), 'label': c.data})
        df = df.groupby('label')['coords'].apply(lambda group_series: group_series.tolist()).reset_index()
        df = df.astype({"label": int})
    else:
        df = pd.DataFrame()
    return df

作为输出:

label   coords
2       [(3, 3), (4, 3), (5, 3), (6, 3), (7, 3), (8, 3...
4       [(12, 3), (13, 3), (14, 3), (15, 3), (16, 3), ...

这是错误的。它不仅错过了内部对象,而且为检测到的对象导出的坐标也是错误的。例如,对于label 4,它返回labels 4 and 0之间的轮廓,而忽略labels 4 and 3之间的轮廓。

我做了此修复程序

def outline_fix(label_image):
    res_list = []
    coo = coo_matrix(label_image)
    labels = np.unique(coo.data)
    for label in sorted(set(labels)):
        #print('label: %d' % label)
        c = coo.copy()
        c.data[c.data != label] = 0
        c = c.toarray()
        mask = binary_erosion(c)
        c[mask] = 0
        c = coo_matrix(c)
        if c.data.size > 0:
            df = pd.DataFrame({'coords': list(zip(c.col, c.row)), 'label': c.data})
            df = df.groupby('label')['coords'].apply(lambda group_series: group_series.tolist()).reset_index()
            df = df.astype({"label": int})
        else:
            df = pd.DataFrame()
        res_list.append(df)
        
    if res_list:
        out = pd.concat(res_list).astype({"label": int})
    else:
        out = pd.DataFrame()
        
    return out

返回:

label   coords
1       [(5, 6), (6, 6), (7, 6), (8, 6), (9, 6), (5, 7...
2       [(3, 3), (4, 3), (5, 3), (6, 3), (7, 3), (8, 3...
3       [(12, 5), (13, 5), (14, 5), (10, 6), (11, 6), ...
4       [(12, 3), (13, 3), (14, 3), (15, 3), (16, 3), ...

效果很好。不是100%完美,因为例如label 4会丢失两对坐标。我得到的坐标列表的长度应为length = 32而不是30,但这是我可以忍受的,并不是很重要。

功能更正的问题是速度太慢。在实际情况下,我有一个2000-by-2000对象超过2800个的数组。并非所有的对象都是“嵌套”的,但可能很多,具体取决于输入的图像。

代码在大约4分钟内完成(对于一个真实的现实情况),这太长了。请问有没有其他选择,或者有什么想法可以加快速度?

任何帮助表示赞赏

1 个答案:

答案 0 :(得分:0)

根据我的经验,当您必须使用.apply函数时,熊猫的速度非常慢。因此,我更喜欢对.values进行操作,然后重新分配。话虽如此,熊猫可以做得很快,但这是art of its own。检查此代码,对于大图像,它肯定比熊猫实现要快:

import numpy as np
import pandas as pd
from collections import Counter

dummy_img = pd.DataFrame(np.array([
    [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
    [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
    [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
    [0,0,0,2,2,2,2,2,2,2,2,2,4,4,4,4,4,0,0,0],
    [0,0,0,2,2,2,2,2,2,2,2,2,4,4,4,4,4,0,0,0],
    [0,0,0,2,2,2,2,2,2,2,2,2,3,3,3,4,4,0,0,0],
    [0,0,0,2,2,1,1,1,1,1,3,3,3,3,3,4,4,0,0,0],
    [0,0,0,2,2,1,1,1,1,1,3,3,3,3,3,4,4,0,0,0],
    [0,0,0,2,2,1,1,1,1,1,3,3,3,3,3,4,4,0,0,0],
    [0,0,0,2,2,1,1,1,1,1,3,3,3,3,3,4,4,0,0,0],
    [0,0,0,2,2,2,2,2,2,2,2,2,3,3,3,4,4,0,0,0],
    [0,0,0,2,2,2,2,2,2,2,2,2,4,4,4,4,4,0,0,0],
    [0,0,0,2,2,2,2,2,2,2,2,2,4,4,4,4,4,0,0,0],
    [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
    [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
    [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
]))

# Array of boundary points
# row - label, col - point coordinates

img_np = dummy_img.values
b_pts = [[] for i in range(4)] 

# idea is simple - run a 3x3 window and check whenever the mean value
# inside the window differs from element at the center. 

img_padded = np.pad(img_np, ((1, 1), (1, 1)))
    
for r, row in enumerate(img_np):
  for c, elem in enumerate(row):
    if Flase in (img_padded[r:r+3, c:c+3] == elem):
      b_pts[elem-1].append((r, c))

for elem in b_pts:     
  print(elem)
相关问题