初步

Question

我使用Open CV和skimage进行数据表的文档分析。我正在尝试将阴影区域分开。

我目前能够将零件和编号细分为不同的簇。

使用skimage中的felzenszwalb()细分零件：

import matplotlib.pyplot as plt
import numpy as np     
from skimage.segmentation import felzenszwalb
from skimage.io import imread

img = imread('test.jpg')

segments_fz = felzenszwalb(img, scale=100, sigma=0.2, min_size=50)

print("Felzenszwalb number of segments {}".format(len(np.unique(segments_fz))))

plt.imshow(segments_fz)
plt.tight_layout()
plt.show()

但无法连接它们。有条理地连接并用零件号和零件号标记相应段的任何想法都会很有帮助。在此先感谢您的宝贵时间。如果我错过了任何内容，请在注释中让我知道。

Answer 1

初步

一些初步代码：

%matplotlib inline
%load_ext Cython
import numpy as np
import cv2
from matplotlib import pyplot as plt
import skimage as sk
import skimage.morphology as skm
import itertools

def ShowImage(title,img,ctype):
  plt.figure(figsize=(20, 20))
  if ctype=='bgr':
    b,g,r = cv2.split(img)       # get b,g,r
    rgb_img = cv2.merge([r,g,b])     # switch it to rgb
    plt.imshow(rgb_img)
  elif ctype=='hsv':
    rgb = cv2.cvtColor(img,cv2.COLOR_HSV2RGB)
    plt.imshow(rgb)
  elif ctype=='gray':
    plt.imshow(img,cmap='gray')
  elif ctype=='rgb':
    plt.imshow(img)
  else:
    raise Exception("Unknown colour type")
  plt.axis('off')
  plt.title(title)
  plt.show()

作为参考，这是您的原始图片：

#Read in image
img         = cv2.imread('part.jpg')
ShowImage('Original',img,'bgr')

识别数字

为简化起见，我们将像素分类为开或关。我们可以通过阈值来做到这一点。由于我们的图像包含两类清晰的像素（黑白），因此我们可以使用Otsu's method。我们将反转配色方案，因为我们正在使用的库考虑到黑色像素无聊而白色像素有趣。

#Convert image to grayscale
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)

#Apply Otsu's method to eliminate pixels of intermediate colour
ret, thresh = cv2.threshold(gray,0,255,cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)

ShowImage('Applying Otsu',thresh,'gray')

#Verify that pixels are either black or white and nothing in between
np.unique(thresh)

我们的策略将是找到数字，然后沿着它们附近的线找到各个部分，然后标记这些部分。由于所有阿拉伯数字都方便地由连续像素组成，因此我们可以从找到连接的分量开始。

ret, components = cv2.connectedComponents(thresh)
#Each component is a different colour
ShowImage('Connected Components', components, 'rgb')

然后我们可以通过过滤维数来过滤连接的组件以找到编号。请注意，这不是一种超级健壮的方法。更好的选择是使用字符识别，但这是读者的练习：-）

class Box:
    def __init__(self,x0,x1,y0,y1):
        self.x0, self.x1, self.y0, self.y1 = x0,x1,y0,y1
    def overlaps(self,box2,tol):
        if self.x0 is None or box2.x0 is None:
            return False
        return not (self.x1+tol<=box2.x0 or self.x0-tol>=box2.x1 or self.y1+tol<=box2.y0 or self.y0-tol>=box2.y1)
    def merge(self,box2):
        self.x0 = min(self.x0,box2.x0)
        self.x1 = max(self.x1,box2.x1)
        self.y0 = min(self.y0,box2.y0)
        self.y1 = max(self.y1,box2.y1)
        box2.x0 = None #Used to mark `box2` as being no longer valid. It can be removed later
    def dist(self,x,y):
        #Get center point
        ax = (self.x0+self.x1)/2
        ay = (self.y0+self.y1)/2
        #Get distance to center point
        return np.sqrt((ax-x)**2+(ay-y)**2)
    def good(self):
        return not (self.x0 is None)

def ExtractComponent(original_image, component_matrix, component_number):
    """Extracts a component from a ConnectedComponents matrix"""
    #Create a true-false matrix indicating if a pixel is part of a particular component
    is_component = component_matrix==component_number
    #Find the coordinates of those pixels
    coords = np.argwhere(is_component)

    # Bounding box of non-black pixels.
    y0, x0 = coords.min(axis=0)
    y1, x1 = coords.max(axis=0) + 1   # slices are exclusive at the top

    # Get the contents of the bounding box.
    return x0,x1,y0,y1,original_image[y0:y1, x0:x1]

numbers_img = thresh.copy() #This is used purely to show that we can identify numbers
numbers = []
for component in range(components.max()):
    tx0,tx1,ty0,ty1,this_component = ExtractComponent(thresh, components, component)
    #ShowImage('Component #{0}'.format(component), this_component, 'gray')
    cheight, cwidth = this_component.shape
    #print(cwidth,cheight) #Enable this to see dimensions
    #Identify numbers based on aspect ratio
    if (abs(cwidth-14)<3 or abs(cwidth-7)<3) and abs(cheight-24)<3:
        numbers_img[ty0:ty1,tx0:tx1] = 128
        numbers.append(Box(tx0,tx1,ty0,ty1))
ShowImage('Numbers', numbers_img, 'gray')

我们现在通过稍微扩展其边界框并寻找重叠来将数字连接到连续的块中。

#This is kind of a silly way to do this, but it will work find for small quantities (hundreds)
merged=True                                       #If true, then a merge happened this round
while merged:                                     #Continue until there are no more mergers
    merged=False                                  #Reset merge indicator
    for a,b in itertools.combinations(numbers,2): #Consider all pairs of numbers
        if a.overlaps(b,10):                      #If this pair overlaps
            a.merge(b)                            #Merge it
            merged=True                           #Make a note that we've merged
numbers = [x for x in numbers if x.good()]        #Eliminate those boxes that were gobbled by the mergers

#This is used purely to show that we can identify numbers
numbers_img = thresh.copy() 
for n in numbers:
    numbers_img[n.y0:n.y1,n.x0:n.x1] = 128
    thresh[n.y0:n.y1,n.x0:n.x1] = 0 #Drop numbers from thresholded image
ShowImage('Numbers', numbers_img, 'gray')

好的，现在我们确定了数字！稍后我们将使用它们来识别零件。

识别箭头

接下来，我们将要弄清楚数字所指向的部分。为此，我们要检测线。霍夫变换对此很有用。为了减少误报的数量，我们将数据骨架化，然后将其转换为最多一个像素宽的表示形式。

skel = sk.img_as_ubyte(skm.skeletonize(thresh>0))
ShowImage('Skeleton', skel, 'gray')

现在，我们执行霍夫变换。我们正在寻找一种可以识别从数字到零件的所有线的线。正确设置此参数可能需要花些时间。

lines = cv2.HoughLinesP(
    skel,
    1,           #Resolution of r in pixels
    np.pi / 180, #Resolution of theta in radians
    30,          #Minimum number of intersections to detect a line
    None,
    80,          #Min line length
    10           #Max line gap
)
lines = [x[0] for x in lines]

line_img = thresh.copy()
line_img = cv2.cvtColor(line_img, cv2.COLOR_GRAY2BGR)
for l in lines:
    color = tuple(map(int, np.random.randint(low=0, high=255, size=3)))
    cv2.line(line_img, (l[0], l[1]), (l[2], l[3]), color, 3, cv2.LINE_AA)
ShowImage('Lines', line_img, 'bgr')

我们现在要查找最接近每个数字的一行并仅保留这些行。我们实质上是在过滤掉所有不是箭头的线。为此，我们将每条线的端点与每个数字框的中心点进行比较。

  comp_labels = np.zeros(img.shape[0:2], dtype=np.uint8)

for n_idx,n in enumerate(numbers):
    distvals = []
    for i,l in enumerate(lines):
        #Distances from each point of line to midpoint of rectangle
        dists    = [n.dist(l[0],l[1]),n.dist(l[2],l[3])] 
        #Minimum distance and the end point (0 or 1) of the line associated with that point
        #Tuples of (Line Number, Line Point, Dist to Line Point) are produced
        distvals.append( (i,np.argmin(dists),np.min(dists)) )
    #Sort by distance between the number box and the line
    distvals = sorted(distvals, key=lambda x: x[2])
    #Include nearby lines, not just the closest one. This accounts for forking.
    distvals = [x for x in distvals if x[2]<1.5*distvals[0][2]]

    #Draw a white rectangle where the number box was
    cv2.rectangle(comp_labels, (n.x0,n.y0), (n.x1,n.y1), 1, cv2.FILLED)

    #Draw white lines where the arrows are
    for dv in distvals:
        l = lines[dv[0]]
        lp = (l[0],l[1]) if dv[1]==0 else (l[2],l[3])
        cv2.line(comp_labels, (l[0], l[1]), (l[2], l[3]), 1, 3, cv2.LINE_AA)
        cv2.line(comp_labels, (lp[0], lp[1]), ((n.x0+n.x1)//2, (n.y0+n.y1)//2), 1, 3, cv2.LINE_AA)
ShowImage('Lines', comp_labels, 'gray')

查找零件

这部分很难！现在，我们要分割图像中的部分。如果有某种方法可以断开将子零件链接在一起的线，这将很容易。不幸的是，连接子零件的线与构成零件的许多线的宽度相同。

要解决此问题，我们可以使用很多逻辑。这将是痛苦且容易出错。

或者，我们可以假设您有一个专家。这位专家的唯一工作是切断连接子部件的线。对于他们来说，这应该既容易又快速。标记所有东西对于人类来说将是缓慢而悲伤的，但是对于计算机而言则是快速的。分离事物对人类来说很容易，但是对于计算机却很难。所以我们让他们都做自己最擅长的事情。

在这种情况下，您可能会训练某人在几分钟内完成这项工作，因此，真正的“专家”并不是必需的。只是一个有能力的人。

如果您要这样做，则需要在循环工具中编写专家。为此，请保存骨架图像，让专家进行修改，然后再读回骨架图像。

#Save the image, or display it on a GUI
#cv2.imwrite("/z/skel.png", skel);
#EXPERT DOES THEIR THING HERE
#Read the expert-mediated image back in
skelhuman = cv2.imread('/z/skel.png')
#Convert back to the form we need
skelhuman = cv2.cvtColor(skelhuman,cv2.COLOR_BGR2GRAY)
ret, skelhuman = cv2.threshold(skelhuman,0,255,cv2.THRESH_OTSU)
ShowImage('SkelHuman', skelhuman, 'gray')

现在，我们将各个部分分开了，我们将消除尽可能多的箭头。我们已经在上面提取了这些内容，因此以后可以根据需要将它们重新添加。

为消除箭头，我们将找到所有终止于除另一条线以外的位置的线。也就是说，我们将定位只有一个相邻像素的像素。然后，我们将消除像素并查看其邻居。反复进行此操作将消除箭头。由于我不知道它的另一个术语，因此我将其称为 Fuse Transform 。由于这将需要处理单个像素，因此在Python中 super 会比较慢，因此我们将在Cython中编写转换。

%%cython -a --cplus
import cython

from libcpp.queue cimport queue
import numpy as np
cimport numpy as np

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.nonecheck(False)
@cython.cdivision(True) 
cpdef void FuseTransform(unsigned char [:, :] image):
    # set the variable extension types
    cdef int c, x, y, nx, ny, width, height, neighbours
    cdef queue[int] q

    # grab the image dimensions
    height = image.shape[0]
    width  = image.shape[1]

    cdef int dx[8]
    cdef int dy[8]

    #Offsets to neighbouring cells
    dx[:] = [-1,-1,0,1,1,1,0,-1]
    dy[:] = [0,-1,-1,-1,0,1,1,1]

    #Find seed cells: those with only one neighbour
    for y in range(1, height-1):
        for x in range(1, width-1):
            if image[y,x]==0: #Seed cells cannot be blank cells
                continue
            neighbours = 0
            for n in range(0,8):   #Looks at all neighbours
                nx = x+dx[n]
                ny = y+dy[n]
                if image[ny,nx]>0: #This neighbour has a value
                    neighbours += 1
            if neighbours==1:      #Was there only one neighbour?
                q.push(y*width+x)  #If so, this is a seed cell

    #Starting with the seed cells, gobble up the lines
    while not q.empty():
        c = q.front()
        q.pop()
        y = c//width         #Convert flat index into 2D x-y index
        x = c%width
        image[y,x] = 0       #Gobble up this part of the fuse
        neighbour  = -1      #No neighbours yet
        for n in range(0,8): #Look at all neighbours
            nx = x+dx[n]     #Find coordinates of neighbour cells
            ny = y+dy[n]
            #If the neighbour would be off the side of the matrix, ignore it
            if nx<0 or ny<0 or nx==width or ny==height:
                continue
            if image[ny,nx]>0:      #Is the neighbouring cell active?
                if neighbour!=-1:   #If we've already found an active neighbour
                    neighbour=-1    #Then pretend we found no neighbours
                    break           #And stop looking. This is the end of the fuse.
                else:               #Otherwise, make a note of the neighbour's index.
                    neighbour = ny*width+nx
        if neighbour!=-1:           #If there was only one neighbour
            q.push(neighbour)       #Continue burning the fuse

返回标准Python：

#Apply the Fuse Transform
skh_dilated=skelhuman.copy()
FuseTransform(skh_dilated)
ShowImage('Fuse Transform', skh_dilated, 'gray')

现在，我们已经消除了连接零件的所有箭头和线条，我们将剩余的像素扩展了很多[em] 。

kernel = np.ones((3,3),np.uint8)
dilated  = cv2.dilate(skh_dilated, kernel, iterations=6)
ShowImage('Dilation', dilated, 'gray')

将它们放在一起

并覆盖我们之前分割出的标签和箭头...

comp_labels_dilated  = cv2.dilate(comp_labels, kernel, iterations=5)
labels_combined = np.uint8(np.logical_or(comp_labels_dilated,dilated))
ShowImage('Comp Labels', labels_combined, 'gray')

最后，我们将合并的数字框，组件箭头和零件都用Color Brewer中的漂亮颜色进行着色。然后，将其叠加在原始图像上以获得所需的突出显示。

ret, labels = cv2.connectedComponents(labels_combined)
colormask = np.zeros(img.shape, dtype=np.uint8)
#Colors from Color Brewer
colors = [(228,26,28),(55,126,184),(77,175,74),(152,78,163),(255,127,0),(255,255,51),(166,86,40),(247,129,191),(153,153,153)]
for l in range(labels.max()):
    if l==0: #Background component
        colormask[labels==0] = (255,255,255)
    else:
        colormask[labels==l] = colors[l]
ShowImage('Comp Labels', colormask, 'bgr')
blended = cv2.addWeighted(img,0.7,colormask,0.3,0)
ShowImage('Blended', blended, 'bgr')

最终图片

因此，回顾一下，我们确定了数字，箭头和零件。在某些情况下，我们能够自动将它们分开。在其他情况下，我们在循环中使用了专家。在必须单独操纵像素的地方，我们使用Cython来提高速度。

当然，这种事情的危险在于其他图像会破坏我在这里所做的（许多）假设。但这是您尝试使用单个图像提出问题时要承担的风险。

连接线段和标签线段中最近的点

1 个答案:

初步

识别数字

识别箭头

查找零件

将它们放在一起

最终图片