我是 Python 和 Opencv 的新手。我正在开展一个项目,以构建一个应用程序来识别本地语言的笔迹。它的关键部分之一是找到单词和字符的顺序。到目前为止设法分割单词和字母。问题是轮廓不按顺序排列。我关注了这个问题 Python opencv sorting contours 并得到了不错的结果。仍然有一些水平轮廓不在顺序中。我该如何解决这个问题?
def image_process(self):
image = cv2.imread(self.img)
#grayscale
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
se=cv2.getStructuringElement(cv2.MORPH_RECT , (8,8))
bg=cv2.morphologyEx(gray, cv2.MORPH_DILATE, se)
out_gray=cv2.divide(gray, bg, scale=255)
out_binary=cv2.threshold(out_gray, 0, 255, cv2.THRESH_OTSU )[1]
#binary
ret,thresh = cv2.threshold(out_binary,127,255,cv2.THRESH_BINARY_INV)
# opening
kernel = np.ones((3,3),np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
#dilation 40 for segmenting words 15 for letters
kernel = np.ones(5,40), np.uint8)
img_dilation = cv2.dilate(opening, kernel, iterations=1)
#find contours
# ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
contours, h = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours.sort(key=lambda x:get_contour_precedence(x, img_dilation.shape[0]))
for i, ctr in enumerate(contours):
# Get bounding box
x, y, w, h = cv2.boundingRect(ctr)
M = cv2.moments(ctr)
cX = int(M["m10"] / M["m00"])
cY = int(M["m01"] / M["m00"])
cv2.putText(image, "#{}".format(i + 1), (cX - 20, cY), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0), 2)
cv2.rectangle(image,(x,y),( x + w, y + h ),(90,0,255),2)
def get_contour_precedence(contour, cols):
tolerance_factor = 61
origin = cv2.boundingRect(contour)
return ((origin[1] // tolerance_factor) * tolerance_factor) * cols + origin[0]
答案 0 :(得分:2)
我扩展了 excellent answer 中的 Ann Zen。
您必须调整两个变量:
threshold
:for 区域低于此阈值的轮廓将被丢弃row_amt
:图像中的行数Ann Zen 使用的概念是将图像沿 y 轴分成 n
段,形成 n
行。对于图像的每个片段,找到其中心在该片段中的每个形状。最后,按 x 坐标对每个线段中的形状进行排序。
DEBUG
标志,它将显示一些在调试过程中可以提供帮助的额外功能。import cv2
import numpy as np
from collections import OrderedDict
DEBUG = False
def process_img(image):
# grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
se = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))
bg = cv2.morphologyEx(gray, cv2.MORPH_DILATE, se)
out_gray = cv2.divide(gray, bg, scale=255)
out_binary = cv2.threshold(out_gray, 0, 255, cv2.THRESH_OTSU)[1]
# binary
(ret, thresh) = cv2.threshold(out_binary, 127, 255,
cv2.THRESH_BINARY_INV)
# opening
kernel = np.ones((3, 3), np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
# dilation 40 for segmenting words 15 for letters
kernel = np.ones((5, 40), np.uint8)
img_dilation = cv2.dilate(opening, kernel, iterations=1)
return img_dilation
def get_centeroid(cnt):
length = len(cnt)
sum_x = np.sum(cnt[..., 0])
sum_y = np.sum(cnt[..., 1])
return int(sum_x / length), int(sum_y / length)
def get_contours(processed_img, threshold):
(contours, hierarchies) = cv2.findContours(processed_img,
cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
return [cnt for cnt in contours if cv2.contourArea(cnt) > threshold]
def get_centers(contours):
return [get_centeroid(cnt) for cnt in contours]
amt_row
行高度 row_h
。def get_bounds(contours, img, row_amt):
min_y = img.shape[0]
max_y = 0
for ctr in contours:
(x, y, w, h) = cv2.boundingRect(ctr)
if y < min_y:
min_y = y
if y + h > max_y:
max_y = y + h
row_h = (max_y - min_y) / row_amt
if DEBUG:
line_thickness = 2
x1 = 0
x2 = img.shape[1]
for i in range(row_amt + 1):
y1 = y2 = int(min_y + row_h * i)
cv2.line(img, (x1, y1), (x2, y2), (0, 255, 0),
thickness=line_thickness)
return (min_y, max_y, row_h)
img
、图像的段数、row_amt
和 threshold
。面积低于此阈值的轮廓将被丢弃。它将返回 row_amt
OrderedDicts。每个 OrderedDict 包含其对应行的中心作为键,按其 x 坐标排序,每个键的值是其对应的轮廓。def get_rows(img, row_amt, threshold):
processed_img = process_img(img)
contours = get_contours(processed_img, threshold)
centers = get_centers(contours)
centers_to_contours = dict(zip(centers, contours))
centers = np.array(centers)
min_y, max_y, row_h = get_bounds(contours, img, row_amt)
for i in range(row_amt):
f = centers[:, 1] - min_y - row_h * i
a = centers[(f < row_h) & (f > 0)]
c = a[a.argsort(0)[:, 0]]
od = OrderedDict()
for center in map(tuple, c):
od[center] = centers_to_contours[center]
yield od
img = cv2.imread('RrU0o.jpg')
count = 0
for row in get_rows(img, row_amt=7, threshold=1330):
if DEBUG:
centerpoints = np.array(list(row.keys()))
cv2.polylines(img, [centerpoints], False, (255, 0, 255), 2)
for ((x, y), ctr) in row.items():
count += 1
if DEBUG:
cv2.circle(img, (x, y), 10, (0, 0, 255), -1)
cv2.putText(img, f'#{count}', (x - 10, y + 5), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0), 2)
(x, y, w, h) = cv2.boundingRect(ctr)
cv2.rectangle(img, (x, y), (x + w, y + h), (90, 0, 255), 2)
cv2.imshow("Final", img)
cv2.waitKey(0)
结果:
结果为 DEBUG = True
。
总和:
#!/usr/bin/python
import cv2
import numpy as np
from google.colab.patches import cv2_imshow
from collections import OrderedDict
DEBUG = False
def process_img(image):
# grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
se = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))
bg = cv2.morphologyEx(gray, cv2.MORPH_DILATE, se)
out_gray = cv2.divide(gray, bg, scale=255)
out_binary = cv2.threshold(out_gray, 0, 255, cv2.THRESH_OTSU)[1]
# binary
(ret, thresh) = cv2.threshold(out_binary, 127, 255,
cv2.THRESH_BINARY_INV)
# opening
kernel = np.ones((3, 3), np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
# dilation 40 for segmenting words 15 for letters
kernel = np.ones((5, 40), np.uint8)
img_dilation = cv2.dilate(opening, kernel, iterations=1)
return img_dilation
def get_centeroid(cnt):
length = len(cnt)
sum_x = np.sum(cnt[..., 0])
sum_y = np.sum(cnt[..., 1])
return (int(sum_x / length), int(sum_y / length))
def get_contours(processed_img, threshold):
(contours, hierarchies) = cv2.findContours(processed_img,
cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
return [cnt for cnt in contours if cv2.contourArea(cnt) > threshold]
def get_centers(contours):
return [get_centeroid(cnt) for cnt in contours]
def get_bounds(contours, img, row_amt):
min_y = img.shape[0]
max_y = 0
for ctr in contours:
(x, y, w, h) = cv2.boundingRect(ctr)
if y < min_y:
min_y = y
if y + h > max_y:
max_y = y + h
row_h = (max_y - min_y) / row_amt
if DEBUG:
line_thickness = 2
x1 = 0
x2 = img.shape[1]
for i in range(row_amt + 1):
y1 = y2 = int(min_y + row_h * i)
cv2.line(img, (x1, y1), (x2, y2), (0, 255, 0),
thickness=line_thickness)
return (min_y, max_y, row_h)
def get_rows(img, row_amt, threshold):
processed_img = process_img(img)
contours = get_contours(processed_img, threshold)
centers = get_centers(contours)
centers_to_contours = dict(zip(centers, contours))
centers = np.array(centers)
(min_y, max_y, row_h) = get_bounds(contours, img, row_amt)
for i in range(row_amt):
f = centers[:, 1] - min_y - row_h * i
a = centers[(f < row_h) & (f > 0)]
c = a[a.argsort(0)[:, 0]]
od = OrderedDict()
for center in map(tuple, c):
od[center] = centers_to_contours[center]
yield od
img = cv2.imread('RrU0o.jpg')
count = 0
for row in get_rows(img, row_amt=7, threshold=1330):
if DEBUG:
centerpoints = np.array(list(row.keys()))
cv2.polylines(img, [centerpoints], False, (255, 0, 255), 2)
for ((x, y), ctr) in row.items():
count += 1
if DEBUG:
cv2.circle(img, (x, y), 10, (0, 0, 255), -1)
cv2.putText(img, f'#{count}', (x - 10, y + 5), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0), 2)
(x, y, w, h) = cv2.boundingRect(ctr)
cv2.rectangle(img, (x, y), (x + w, y + h), (90, 0, 255), 2)
cv2.imshow("Final", img)
cv2.waitKey(0)
答案 1 :(得分:0)
我认为您使用 get_contour_precedence()
方法来获取对象距原点 (0,0)
的距离。
origin[1] = y axis
origin[0] = x axis
这里我建议让它也计算与 x 轴的距离
改变这一行
contours.sort(key=lambda x:get_contour_precedence(x, img_dilation.shape[0], img_dilation.shape[1]))
def get_contour_precedence(contour, cols, rows):
tolerance_factor = 61
origin = cv2.boundingRect(contour)
return (origin[1]/tolerance_factor) * (cols / tolerance_factor) + (origin[0]/tolerance_factor) * (rows / tolerance_factor)
我稍微修改了你的回报。
答案 2 :(得分:0)
您需要的是根据阅读方向对检测到的单词进行排序。
问题由两部分组成:
第(2)部分很简单:取一个单词的x-center (x+w/2),然后根据x-center位置对一行中的单词进行排序。
第 (1) 部分有点棘手:我们必须首先将单词分组为行。为此,我们必须衡量两个词的接近程度。 我们可以例如将两个单词的边界框投影到 y 轴上,然后看看它们重叠了多少(Jaccard 度量)。 它们重叠得越多,它们就越“接近”。 然后我们可以创建一个包含单词之间成对距离的矩阵。 这听起来可能很复杂,但它使我们能够使用一些标准的聚类算法(如 DBSCAN)来为我们完成艰苦的工作。 每个簇对应一条线。我们只需要从上到下对行进行排序,然后从左到右对每一行进行排序,这就是任务(1)。
有关代码,请参阅 word_detector 包中的函数 sort_multiline。以下是所描述方法的示例输出: