从图像中读取嘈杂的文本

时间:2019-08-25 15:37:15

标签: python tesseract

尝试将图像从轮胎图像转换为文本。到目前为止,使用opencv和Tesseract没有任何结果。我尝试使用全尺寸图像反转图像。此外,尝试使用sobelx和y,它们的工作效果不比laplacian好。

您可以在下面看到图像和代码。任何有关如何更好地清洁图像或使Tesseract peform更好的建议都将受到赞赏!

preprocessed image original image

try:
    from PIL import Image
except ImportError:
    import Image
import pytesseract
import cv2
import numpy
import UtilityFunctions as uf
import numpy as np
import PreprocessingImage as PRI

import time


windowName = "Dot Reader"
imagePath = "./Images/DOT_Num_01.png"

# Define cv2 window for image and sliders
cv2.namedWindow(windowName)
cv2.createTrackbar("threshold_l", windowName, 25, 255, uf.nothing)
cv2.createTrackbar("threshold_h", windowName, 255, 255, uf.nothing)
cv2.createTrackbar("kernel", windowName, 1, 30, uf.nothing)
cv2.createTrackbar("iterations", windowName, 1, 10, uf.nothing)



def ocr_core(image):
    """
    This function will handle the core OCR processing of images.
    """
    text = pytesseract.image_to_string(imageForOCR)  
    return text


def preprocessImage4OCR(image):




    # Find reference tracker in image
    threshhold_h = cv2.getTrackbarPos("threshold_h", windowName) 
    threshhold_l = cv2.getTrackbarPos("threshold_l", windowName) 
    kernel = cv2.getTrackbarPos("kernel", windowName)
    iteration = cv2.getTrackbarPos("iteration", windowName)

    processedImage = PRI.FindObjectOfColor(image,threshhold_h,threshhold_l,kernel,iteration)

    #Preprocessing images
        #gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    laplacianImage = cv2.Laplacian(processedImage,cv2.CV_64F)
    sobelxImage = cv2.Sobel(processedImage,cv2.CV_64F,1,0,ksize=5)
    sobelyImage = cv2.Sobel(processedImage,cv2.CV_64F,0,1,ksize=5)


    cv2.resize(image,None,fx=0.5, fy=0.5, interpolation = cv2.INTER_CUBIC)
    mergedImage = np.concatenate((laplacianImage, processedImage), axis=0)
    mergedImage = cv2.resize(mergedImage,None,fx=0.5, fy=0.5, interpolation = cv2.INTER_CUBIC)
    cv2.imshow(windowName, mergedImage)

    #cv2.imshow('sobely',sobelyImage)
    return laplacianImage


#Loading and converting image
pil_image = Image.open(imagePath).convert('RGB') 
image = numpy.array(pil_image) 

#Resizing images
#q
image = image[1000:1800,800:3200]
#image = cv2.resize(image,None,fx=0.5, fy=0.5, interpolation = cv2.INTER_CUBIC)

##****** Looping **********##
showLive=True
while(showLive):

    start = time.time()
    imageForOCR = preprocessImage4OCR(image)

    imageForOCR = cv2.bitwise_not(imageForOCR)

    print(ocr_core(imageForOCR))
    end = time.time()

    #Stop process on key press
    if cv2.waitKey(30)>=0:
        showLive=False




print(end - start)

0 个答案:

没有答案