打开 CV 轮廓以去除验证码中存在的噪声线

时间:2021-07-19 04:22:31

标签: python ocr captcha python-tesseract opencv-python

下面是我的验证码破解代码。需要帮助删除附加验证码中的嘈杂线。这些线与类似的数字具有相同的像素。在传递给 pytesseract 之前需要屏蔽这些行。尝试使用开放 cv 方法扩张、侵蚀、变形、阈值、轮廓。在过滤轮廓中的线条(需要创建蒙版)时陷入困境。

import cv2
import numpy as np
from matplotlib import pyplot as plt
from PIL import Image
import re
from PIL import ImageFilter
import imutils


def is_contour_bad(c):
    #print(c)
    x,y,w,h = cv2.boundingRect(c)
    # cv2.
    print(x,y,w,h)
    ar = w / float(h)
    print("Ar",ar)
    peri = cv2.arcLength(c,True)
    approx = cv2.approxPolyDP(c,0.02*peri,True)
    print("length",len(approx))
    return (w < 6 and h < 7 )
    #return len(approx) < 3

image = cv2.imread('/home/geethaselvam/Desktop/CaptchaImage.jpeg')
cv2.imshow("img",image)

gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) #convert to grey scale
cv2.imshow("gray",gray)
# filter = cv2.bilateralFilter(gray, 0, 0, 0)
# cv2.imshow("filter",filter)
# edged = cv2.Canny(gray, 50, 100)
# cv2.imshow("EDGED", edged)
# find contours in the image and initialize the mask that will be
# used to remove the bad contours

ret, thres = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY)
cv2.imshow("thres",thres)
#kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (4,8))
kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (2,3))
morph_img = cv2.morphologyEx(thres, cv2.MORPH_CLOSE, kernel)
cv2.imshow("morph_img",morph_img)

morph_img = thres

cnts = cv2.findContours(morph_img, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
#cnts = cv2.findContours(morph_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
print("cnts",len(cnts))
cnts = imutils.grab_contours(cnts)
mask = np.ones(image.shape[:2], dtype="uint8") * 255
#cv2.imshow("MASK",mask)
blank = np.zeros(image.shape,dtype='uint8')
#cv2.imshow("blank",blank)
cv2.drawContours(blank,cnts,-1,(0,0,255),1)
cv2.imshow("con",blank)


# loop over the contours
for c in cnts:
    # if the contour is bad, draw it on the mask
    if is_contour_bad(c):
        print("in if loop")
        cv2.drawContours(mask, [c], -1, 0, -1)
# remove the contours from the image and show the resulting images
cv2.imshow("Mask", mask)
final = cv2.bitwise_and(morph_img,morph_img,mask=mask)

cv2.imshow("After", final)
cv2.waitKey(0)
cv2.destroyAllWindows()[![enter image description here][1]][1]
#display('/home/geethaselvam/Desktop/dilated_image_4.jpg')
#display('/home/geethaselvam/Desktop/inverted.jpg')
iplimage = Image.fromarray(morph_img)
iplimage = iplimage.convert('RGB')
iplimage = iplimage.filter(ImageFilter.MinFilter(3))
iplimage.save("/home/geethaselvam/Desktop/dilated_image_4.jpg")
from tesserocr import PyTessBaseAPI,PSM
api = PyTessBaseAPI(psm=PSM.SINGLE_WORD)
api.SetVariable("tessedit_char_whitelist", "1234567890")
api.SetImage(iplimage)
regex = re.compile(r'[\n\r\t ]')
captcha = regex.sub("", api.GetUTF8Text())
conf = api.MeanTextConf()

print("cap conf:", captcha, conf)

enter image description here

0 个答案:

没有答案
相关问题