Question

我目前正在尝试写一些东西，可以从一本书中一些不常见的图形中提取数据。我扫描了这本书的页面，并通过使用opencv来检测图形中的某些功能，以便将其转换为可用数据。在左图中，我正在寻找“三角形”的高度，在右图中，是从中心到虚线与灰色区域相交的点的距离。在这两种情况下，我都希望将这些值转换为数字数据以供进一步使用。

我想到的第一件事是检测图表的线条，希望我能以某种方式测量它们的长度或位置。为此，我使用了霍夫线变换。以下代码片段显示了我已经走了多远。

import numpy as np
import cv2

# Reading the image
img = cv2.imread('test2.jpg')
# Convert the image to grayscale
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
# Apply edge detection
edges = cv2.Canny(gray,50,150,apertureSize = 3)

# Line detection
lines = cv2.HoughLinesP(edges,1,np.pi/180,100,minLineLength=50,maxLineGap=20)

for line in lines:
    x1,y1,x2,y2 = line[0]
    cv2.line(img,(x1,y1),(x2,y2),(0,0,255),2)

cv2.imwrite('linesDetected.jpg',img)

唯一的问题是此检测算法根本不准确。至少不适合我。为了从图表中提取一些数据，线条的检测应该有些准确。他们有什么办法可以做到吗？还是我的检测线策略刚开始是错误的？我应该从检测圆形，物体大小，轮廓或颜色之类的东西开始吗？

Answer 1

使用颜色分割是将图形转换为数据的简便方法。此方法确实需要一些手动注释。分割图后，计算每种颜色的像素。在OpenCV库中包含的演示文件中查看“分水岭”演示：

import numpy as np
import cv2 as cv
from common import Sketcher

class App:
    def __init__(self, fn):
        self.img = cv.imread(fn)
        self.img = cv.resize(self.img, (654,654))
        h, w = self.img.shape[:2]
        self.markers = np.zeros((h, w), np.int32)
        self.markers_vis = self.img.copy()
        self.cur_marker = 1
        self.colors = np.int32( list(np.ndindex(2, 2, 3)) ) * 123
        self.auto_update = True
        self.sketch = Sketcher('img', [self.markers_vis, self.markers], self.get_colors)

    def get_colors(self):
        return list(map(int, self.colors[self.cur_marker])), self.cur_marker

    def watershed(self):
        m = self.markers.copy()
        cv.watershed(self.img, m)
        cv.imshow('img', self.img)        
        overlay = self.colors[np.maximum(m, 0)]
        vis = cv.addWeighted(self.img, 0.5, overlay, 0.5, 0.0, dtype=cv.CV_8UC3)
        cv.imshow('overlay', np.array(overlay, np.uint8))
        cv.imwrite('/home/stephen/Desktop/overlay.png', np.array(overlay, np.uint8))
        cv.imshow('watershed', vis)

    def run(self):
        while cv.getWindowProperty('img', 0) != -1 or cv.getWindowProperty('watershed', 0) != -1:
            ch = cv.waitKey(50)
            if ch >= ord('1') and ch <= ord('9'):
                self.cur_marker = ch - ord('0')
                print('marker: ', self.cur_marker)
            if self.sketch.dirty and self.auto_update:
                self.watershed()
                self.sketch.dirty = False
            if ch == 27: break
        cv.destroyAllWindows()


fn = '/home/stephen/Desktop/test.png'
App(cv.samples.findFile(fn)).run()

输出将是这样的图像：

您可以使用以下代码对每种颜色的像素进行计数：

# Extract the values from the image
vals = []
img = cv.imread('/home/stephen/Desktop/overlay.png')
# Get the colors in the image
flat = img.reshape(-1, img.shape[-1])
colors = np.unique(flat, axis=0)
# Iterate through the colors (ignore the first and last colors)
for color in colors[1:-1]:
    a,b,c = color
    lower = a-1, b-1, c-1
    upper = a+1,b+1,c+1
    lower = np.array(lower)
    upper = np.array(upper)
    mask = cv.inRange(img, lower, upper)
    vals.append(sum(sum(mask)))
    cv.imshow('mask', mask)
    cv.waitKey(0)
cv.destroyAllWindows()

并使用以下代码打印输出数据：

names = ['alcohol', 'esters', 'biter', 'hoppy', 'acid', 'zoetheid', 'mout']
print(list(zip(names, vals)))

输出为：

[('alcohol', 22118), ('esters', 26000), ('biter', 16245), ('hoppy', 21170), ('acid', 19156), ('zoetheid', 11090), ('mout', 7167)]

将图形从扫描的文档转换为数据

1 个答案: