我的图片里面有一些文字。我想将图像发送到OCR,但图像中有一些白噪声,因此OCR结果不是很好。我试图侵蚀/扩张图像,但无法获得完美的工作阈值。由于图像中的所有文本都是完全水平的,我尝试了霍夫变换。
这是我运行与OpenCV捆绑的示例hough变换程序时图像的样子。
问题
如何将除之外的所有排除在外的地方? OR 如何为红线突出显示的每个区域裁剪出单独的图像?
我只想专注于横向的线条,我可以丢弃对角线。
发送到OCR时,任何一个选项都适用于我。但是,我想尝试两者,看看哪种效果最好。
答案 0 :(得分:2)
带输出的howto / s
dotess2()
['Footel text goes he: e\n', 'Some mole hele\n', 'Some Text Here\n']
dotess1()
['Foolel text goes he: e\n', 'Some mole hele\n', 'Some Text Here\n', 'Directions\n']
码
# -*- coding: utf-8 -*-
import cv2
import numpy as np
import math
import subprocess
import os
import operator
#some clean up/init blah blah
junk='\/,-‘’“ ”?.\';!{§_~!@#$%^&*()_+-|:}»£[]¢€¥°><'
tmpdir='./tmp'
if not os.path.exists(tmpdir):
os.makedirs(tmpdir)
for path, subdirs, files in os.walk(tmpdir):
for name in files:
os.remove(os.path.join(path, name))
#when the preprocessor is not pefect, there will be junk in the result. this is a crude mean of ridding them off
def resfilter(res):
rd = dict()
for l in set(res):
rd[l]=0.
for l in rd:
for i in l:
if i in junk:
rd[l]-=1
elif i.isdigit():
rd[l]+=.5
else:
rd[l]+=1
ret=[]
for v in sorted(rd.iteritems(), key=operator.itemgetter(1), reverse=True):
ret.append(v[0])
return ret
def dotess1():
res =[]
for path, subdirs, files in os.walk(tmpdir):
for name in files:
fpath = os.path.join(path, name)
img = cv2.imread(fpath)
gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
'''
#if the text is too small/contains noise etc, resize and maintain aspect ratio
if gray.shape[1]<100:
gray=cv2.resize(gray,(int(100/gray.shape[0]*gray.shape[1]),100))
'''
cv2.imwrite('tmp.jpg',gray)
args = ['tesseract.exe','tmp.jpg','tessres','-psm','7', '-l','eng']
subprocess.call(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
with open('tessres.txt') as f:
for line in f:
if line.strip() != '':
res.append(line)
print resfilter(res)
def dotess2():
res =[]
args = ['tesseract.exe','clean.jpg','tessres','-psm','3', '-l','eng']
subprocess.call(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
with open('tessres.txt') as f:
for line in f:
if line.strip() != '':
res.append(line)
print resfilter(res)
'''
start of code
'''
img = cv2.imread('c:/data/ocr3.png')
gray=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
canny=cv2.Canny(gray,50,200,3)
cv2.imshow('canny',canny)
#remove the actual horizontal lines so that hough wont detect them
linek = np.zeros((11,11),dtype=np.uint8)
linek[5,...]=1
x=cv2.morphologyEx(canny, cv2.MORPH_OPEN, linek ,iterations=1)
canny-=x
cv2.imshow('canny no horizontal lines',canny)
#draw a fat line so that you can box it up
lines = cv2.HoughLinesP(canny, 1, math.pi/2, 50,50, 50, 20)
linemask = np.zeros(gray.shape,gray.dtype)
for line in lines[0]:
if line[1]==line[3]:#check horizontal
pt1 = (line[0],line[1])
pt2 = (line[2],line[3])
cv2.line(linemask, pt1, pt2, (255), 30)
cv2.imshow('linemask',linemask)
'''
* two methods of doing ocr,line mode and page mode
* boxmask is used to so that a clean image can be saved for page mode
* for every detected boxes, the roi are cropped and saved so that tess3 can be run in line mode
'''
boxmask = np.zeros(gray.shape,gray.dtype)
contours,hierarchy = cv2.findContours(linemask,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
idx=0
for cnt in contours:
idx+=1
area = cv2.contourArea(cnt)
x,y,w,h = cv2.boundingRect(cnt)
roi=img[y:y+h,x:x+w].copy()
cv2.imwrite('%s/%s.jpg'%(tmpdir,str(idx)),roi)
cv2.rectangle(boxmask,(x,y),(x+w,y+h),(255),-1)
cv2.imshow('clean',img&cv2.cvtColor(boxmask,cv2.COLOR_GRAY2BGR))
cv2.imwrite('clean.jpg',img&cv2.cvtColor(boxmask,cv2.COLOR_GRAY2BGR))
cv2.imshow('img',img)
dotess1()
dotess2()
cv2.waitKey(0)