Question

我是 Python 和 Opencv 的新手。我正在开展一个项目，以构建一个应用程序来识别本地语言的笔迹。它的关键部分之一是找到单词和字符的顺序。到目前为止设法分割单词和字母。问题是轮廓不按顺序排列。我关注了这个问题 Python opencv sorting contours 并得到了不错的结果。仍然有一些水平轮廓不在顺序中。我该如何解决这个问题？

def image_process(self):
    image = cv2.imread(self.img)

    #grayscale
    gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)

    se=cv2.getStructuringElement(cv2.MORPH_RECT , (8,8))
    bg=cv2.morphologyEx(gray, cv2.MORPH_DILATE, se)
    out_gray=cv2.divide(gray, bg, scale=255)
    out_binary=cv2.threshold(out_gray, 0, 255, cv2.THRESH_OTSU )[1]

    #binary
    ret,thresh = cv2.threshold(out_binary,127,255,cv2.THRESH_BINARY_INV)

    # opening
    kernel = np.ones((3,3),np.uint8)
    opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)

    #dilation 40 for segmenting words 15 for letters
    kernel = np.ones(5,40), np.uint8)

    img_dilation = cv2.dilate(opening, kernel, iterations=1)

    #find contours
    # ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

    contours, h = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours.sort(key=lambda x:get_contour_precedence(x, img_dilation.shape[0]))

    for i, ctr in enumerate(contours):

        # Get bounding box
        x, y, w, h = cv2.boundingRect(ctr)

        M = cv2.moments(ctr)
        cX = int(M["m10"] / M["m00"])
        cY = int(M["m01"] / M["m00"])
        
        cv2.putText(image, "#{}".format(i + 1), (cX - 20, cY), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0), 2)


        cv2.rectangle(image,(x,y),( x + w, y + h ),(90,0,255),2)



def get_contour_precedence(contour, cols):
    tolerance_factor = 61
    origin = cv2.boundingRect(contour)
    return ((origin[1] // tolerance_factor) * tolerance_factor) * cols + origin[0]

Answer 1

我扩展了 excellent answer 中的 Ann Zen。

您必须调整两个变量：

threshold：for 区域低于此阈值的轮廓将被丢弃
row_amt：图像中的行数

Ann Zen 使用的概念是将图像沿 y 轴分成 n 段，形成 n 行。对于图像的每个片段，找到其中心在该片段中的每个形状。最后，按 x 坐标对每个线段中的形状进行排序。

导入必要的库。我有一个 DEBUG 标志，它将显示一些在调试过程中可以提供帮助的额外功能。

import cv2
import numpy as np
from collections import OrderedDict

DEBUG = False

定义一个函数，该函数将接收图像输入并将处理后的图像返回到允许 Python 稍后检索其轮廓的内容：

def process_img(image):
    # grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    se = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))
    bg = cv2.morphologyEx(gray, cv2.MORPH_DILATE, se)
    out_gray = cv2.divide(gray, bg, scale=255)
    out_binary = cv2.threshold(out_gray, 0, 255, cv2.THRESH_OTSU)[1]

    # binary
    (ret, thresh) = cv2.threshold(out_binary, 127, 255,
                                  cv2.THRESH_BINARY_INV)
    # opening
    kernel = np.ones((3, 3), np.uint8)
    opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)

    # dilation 40 for segmenting words 15 for letters
    kernel = np.ones((5, 40), np.uint8)

    img_dilation = cv2.dilate(opening, kernel, iterations=1)
    return img_dilation

定义一个返回轮廓中心的函数：

def get_centeroid(cnt):
    length = len(cnt)
    sum_x = np.sum(cnt[..., 0])
    sum_y = np.sum(cnt[..., 1])
    return int(sum_x / length), int(sum_y / length)

定义一个函数，该函数返回该区域高于阈值的所有轮廓：

def get_contours(processed_img, threshold):
    (contours, hierarchies) = cv2.findContours(processed_img,
            cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    return [cnt for cnt in contours if cv2.contourArea(cnt) > threshold]

定义一个函数，该函数将接收轮廓列表并返回图像中找到的形状的中心点：

def get_centers(contours):
    return [get_centeroid(cnt) for cnt in contours]

定义一个函数，该函数将沿着轮廓的 y 轴找到上限和下限。根据您提供的行数，它会将这些边界之间的区域划分为 amt_row 行高度 row_h。

def get_bounds(contours, img, row_amt):
    min_y = img.shape[0]
    max_y = 0
    for ctr in contours:
        (x, y, w, h) = cv2.boundingRect(ctr)
        if y < min_y:
            min_y = y
        if y + h > max_y:
            max_y = y + h

    row_h = (max_y - min_y) / row_amt

    if DEBUG:
        line_thickness = 2
        x1 = 0
        x2 = img.shape[1]
        for i in range(row_amt + 1):
            y1 = y2 = int(min_y + row_h * i)
            cv2.line(img, (x1, y1), (x2, y2), (0, 255, 0),
                     thickness=line_thickness)
    return (min_y, max_y, row_h)

定义一个函数，该函数将接收图像数组 img、图像的段数、row_amt 和 threshold。面积低于此阈值的轮廓将被丢弃。它将返回 row_amt OrderedDicts。每个 OrderedDict 包含其对应行的中心作为键，按其 x 坐标排序，每个键的值是其对应的轮廓。

def get_rows(img, row_amt, threshold):
    processed_img = process_img(img)
    contours = get_contours(processed_img, threshold)
    centers = get_centers(contours)
    centers_to_contours = dict(zip(centers, contours))
    centers = np.array(centers)
    min_y, max_y, row_h = get_bounds(contours, img, row_amt)

    for i in range(row_amt):
        f = centers[:, 1] - min_y - row_h * i
        a = centers[(f < row_h) & (f > 0)]
        c = a[a.argsort(0)[:, 0]]
        od = OrderedDict()
        for center in map(tuple, c):
            od[center] = centers_to_contours[center]
        yield od

读入图像，遍历行并在每行中遍历中心/轮廓并绘制矩形和数字。

img = cv2.imread('RrU0o.jpg')

count = 0
for row in get_rows(img, row_amt=7, threshold=1330):
    if DEBUG:
        centerpoints = np.array(list(row.keys()))
        cv2.polylines(img, [centerpoints], False, (255, 0, 255), 2)

    for ((x, y), ctr) in row.items():
        count += 1
        if DEBUG:
            cv2.circle(img, (x, y), 10, (0, 0, 255), -1)
        cv2.putText(img, f'#{count}', (x - 10, y + 5), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0), 2)

        (x, y, w, h) = cv2.boundingRect(ctr)
        cv2.rectangle(img, (x, y), (x + w, y + h), (90, 0, 255), 2)

最后，显示图像：

cv2.imshow("Final", img)
cv2.waitKey(0)

结果：

结果为 DEBUG = True。

绿色水平线显示线段
红点表示轮廓的中心
粉色线按顺序连接线段中的中心

总和：

#!/usr/bin/python

import cv2
import numpy as np
from google.colab.patches import cv2_imshow
from collections import OrderedDict

DEBUG = False


def process_img(image):

    # grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    se = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))
    bg = cv2.morphologyEx(gray, cv2.MORPH_DILATE, se)
    out_gray = cv2.divide(gray, bg, scale=255)
    out_binary = cv2.threshold(out_gray, 0, 255, cv2.THRESH_OTSU)[1]

    # binary
    (ret, thresh) = cv2.threshold(out_binary, 127, 255,
                                  cv2.THRESH_BINARY_INV)

    # opening
    kernel = np.ones((3, 3), np.uint8)
    opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)

    # dilation 40 for segmenting words 15 for letters
    kernel = np.ones((5, 40), np.uint8)

    img_dilation = cv2.dilate(opening, kernel, iterations=1)
    return img_dilation

def get_centeroid(cnt):
    length = len(cnt)
    sum_x = np.sum(cnt[..., 0])
    sum_y = np.sum(cnt[..., 1])
    return (int(sum_x / length), int(sum_y / length))


def get_contours(processed_img, threshold):
    (contours, hierarchies) = cv2.findContours(processed_img,
            cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    return [cnt for cnt in contours if cv2.contourArea(cnt) > threshold]


def get_centers(contours):
    return [get_centeroid(cnt) for cnt in contours]


def get_bounds(contours, img, row_amt):
    min_y = img.shape[0]
    max_y = 0
    for ctr in contours:
        (x, y, w, h) = cv2.boundingRect(ctr)
        if y < min_y:
            min_y = y
        if y + h > max_y:
            max_y = y + h

    row_h = (max_y - min_y) / row_amt

    if DEBUG:
        line_thickness = 2
        x1 = 0
        x2 = img.shape[1]
        for i in range(row_amt + 1):
            y1 = y2 = int(min_y + row_h * i)
            cv2.line(img, (x1, y1), (x2, y2), (0, 255, 0),
                     thickness=line_thickness)
    return (min_y, max_y, row_h)


def get_rows(img, row_amt, threshold):
    processed_img = process_img(img)
    contours = get_contours(processed_img, threshold)
    centers = get_centers(contours)
    centers_to_contours = dict(zip(centers, contours))
    centers = np.array(centers)
    (min_y, max_y, row_h) = get_bounds(contours, img, row_amt)

    for i in range(row_amt):
        f = centers[:, 1] - min_y - row_h * i
        a = centers[(f < row_h) & (f > 0)]
        c = a[a.argsort(0)[:, 0]]
        od = OrderedDict()
        for center in map(tuple, c):
            od[center] = centers_to_contours[center]
        yield od


img = cv2.imread('RrU0o.jpg')

count = 0
for row in get_rows(img, row_amt=7, threshold=1330):
    if DEBUG:
        centerpoints = np.array(list(row.keys()))
        cv2.polylines(img, [centerpoints], False, (255, 0, 255), 2)

    for ((x, y), ctr) in row.items():
        count += 1
        if DEBUG:
            cv2.circle(img, (x, y), 10, (0, 0, 255), -1)
        cv2.putText(img, f'#{count}', (x - 10, y + 5), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0), 2)

        (x, y, w, h) = cv2.boundingRect(ctr)
        cv2.rectangle(img, (x, y), (x + w, y + h), (90, 0, 255), 2)

cv2.imshow("Final", img)
cv2.waitKey(0)

Answer 2

我认为您使用 get_contour_precedence() 方法来获取对象距原点 (0,0) 的距离。

origin[1] = y axis
origin[0] = x axis

这里我建议让它也计算与 x 轴的距离

改变这一行

contours.sort(key=lambda x:get_contour_precedence(x, img_dilation.shape[0], img_dilation.shape[1]))

def get_contour_precedence(contour, cols, rows):
    tolerance_factor = 61
    origin = cv2.boundingRect(contour)
    return (origin[1]/tolerance_factor) * (cols / tolerance_factor) + (origin[0]/tolerance_factor) * (rows / tolerance_factor)

我稍微修改了你的回报。

Answer 3

您需要的是根据阅读方向对检测到的单词进行排序。

问题由两部分组成：

将单词聚类（分组）成行并从上到下排序
然后，对于每一行，从左到右对单词进行排序

第(2)部分很简单：取一个单词的x-center (x+w/2)，然后根据x-center位置对一行中的单词进行排序。

第 (1) 部分有点棘手：我们必须首先将单词分组为行。为此，我们必须衡量两个词的接近程度。我们可以例如将两个单词的边界框投影到 y 轴上，然后看看它们重叠了多少（Jaccard 度量）。它们重叠得越多，它们就越“接近”。然后我们可以创建一个包含单词之间成对距离的矩阵。这听起来可能很复杂，但它使我们能够使用一些标准的聚类算法（如 DBSCAN）来为我们完成艰苦的工作。每个簇对应一条线。我们只需要从上到下对行进行排序，然后从左到右对每一行进行排序，这就是任务（1）。

有关代码，请参阅 word_detector 包中的函数 sort_multiline。以下是所描述方法的示例输出：

Opencv轮廓排序

3 个答案: