Question

我正在尝试从图像识别文本。主要目的是从图像中提取关键因素，例如对于产品标签，我想确定其成分和营养成分。问题是图像尺寸不标准。我附上了一些示例-sample 1和sample 2。我尝试了多种方法，主要使用Canny边缘检测，robert cross，laplacian和gaussian编码，但是每个图像的结果都不相同。我可能在这里错过了一些东西。任何帮助将不胜感激。

def channel(image):
    accumEdged = np.zeros(image.shape[:2], dtype="uint8")
    for chan in cv2.split(image):
        mean = np.mean(chan)
        min_value = 0.66*mean
        max_value = 1.33*mean
        chan = cv2.GaussianBlur(chan, (7,7), 0)
        edged = cv2.Canny(chan, min_value, max_value)
        accumEdged = cv2.bitwise_xor(accumEdged, edged)

    return accumEdged, min_value, max_value

temp = []

for item in os.listdir(image_dir):
    img = "/home/abc/Test images/" + item
    image = cv2.imread(img)
    base_image = cv2.imread(img)
    length, width = base_image.shape[:2]
    edge, min_value, max_value = channel(image)
    (_, cnts, _) = cv2.findContours(edge.copy(), cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
    threshold_area = 10000

kernel = np.ones((5,5), np.uint8)
image = cv2.erode(image, kernel, iterations = 5)
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 35))
threshed = cv2.morphologyEx(edge.copy(), cv2.MORPH_OPEN, rect_kernel)
(_, cnts_alt, _) = cv2.findContours(threshed, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)

idx = 0
cnts.extend(cnts_alt)
threshold_area = 10000
for c in cnts:
    area1 = cv2.contourArea(c)
    if area1 > 10000:
        hull = cv2.convexHull(c)
        x,y,w,h = cv2.boundingRect(hull)
        area = w*h
        temp.append([area1, area])
        if area > 10000:
            idx += 1
            new_img_1=base_image[y:y+h, x:x+w]
            out_path = '/home/abc/Test images/'+item.split('.')[0]+'/'+str(idx)+'.png'
            cv2.imwrite(out_path, new_img_1)
            cv2.rectangle(base_image,(x,y),(x+w,y+h), (0,0,255), 5)
path = '/home/abc/Test images/'+item.split('.')[0]+'.png'   
cv2.imwrite(path, base_image)

检测轮廓和文本块

0 个答案: