希望我的脚本处理原始图像而不是调整大小的图像

时间:2017-12-31 05:41:19

标签: python opencv image-processing

问题我需要有人回答
我需要更改以下脚本才能处理原始图像而不是调整大小的图像?

有助于解决问题的信息
图像来自扫描仪,包含2或3个写入或键入3" x5"索引卡。

该脚本正确识别每个receipe并将其保存到单个文件中。不幸的是,新图像中的文字很小,我的OCR脚本可以准确地读取手写的记录。

删除img = cv2.resize()行(第54行)会导致" IndexError:列表索引超出范围"在第33行被def transform()抛出。

# Original Script: https://www.quora.com/How-can-I-detect-an-object-from-static-image-and-crop-it-from-the-image-using-openCV

import cv2
import numpy as np
import os

def transform(pos):
    # This function is used to find the corners of the object and the dimensions of the object

    pts=[]
    n=len(pos)

    for i in range(n):
        pts.append(list(pos[i][0]))

    sums={}
    diffs={}
    tl=tr=bl=br=0

    for i in pts:
        x=i[0]
        y=i[1]
        sum=x+y
        diff=y-x
        sums[sum]=i
        diffs[diff]=i

    sums=sorted(sums.items())
    diffs=sorted(diffs.items())
    n=len(sums)

    # The IndexError occurs on this line
    rect=[sums[0][1],diffs[0][1],diffs[n-1][1],sums[n-1][1]]
    #       top-left   top-right   bottom-left   bottom-right

    h1 = np.sqrt((rect[0][0]-rect[2][0])**2 + (rect[0][1]-rect[2][1])**2)        #height of left side
    h2 = np.sqrt((rect[1][0]-rect[3][0])**2 + (rect[1][1]-rect[3][1])**2)        #height of right side
    h = max(h1, h2)

    w1 = np.sqrt((rect[0][0]-rect[1][0])**2 + (rect[0][1]-rect[1][1])**2)        #width of upper side
    w2 = np.sqrt((rect[2][0]-rect[3][0])**2 + (rect[2][1]-rect[3][1])**2)        #width of lower side
    w = max(w1, w2)

    return int(w), int(h), rect

img = cv2.imread('source_image.png')

#
# Resizing of image is done here
#
# Removal of these lines results in "IndexError: list index out of range" being displayed
r = 500.0 / img.shape[1]
dim = (500, int(img.shape[0] * r))
img = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (11,11), 0)
edge = cv2.Canny(gray, 100, 200)
_, contours, _ = cv2.findContours(edge.copy(), 1, 1)

# Give each output image a unique name
loopcnt = 0
for pos in contours:
    peri = cv2.arcLength(pos, True)
    approx = cv2.approxPolyDP(pos, 0.02 * peri, True)

    w, h, arr = transform(approx)

    if w > 0 and h > 0:

        pts2 = np.float32([[0, 0], [w, 0], [0, h], [w, h]])
        pts1 = np.float32(arr)
        M = cv2.getPerspectiveTransform(pts1, pts2)
        dst = cv2.warpPerspective(img, M, (w, h))
        image = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY)
        cv2.imwrite("output_image_"+str(loopcnt)+".png", image)

        loopcnt+=1

1 个答案:

答案 0 :(得分:1)

这是我的解决方案。

该脚本一次只能处理一个图像。如果要处理多个图像,则需要为每个图像调用此脚本一次。

# Original Script: https://www.quora.com/How-can-I-detect-an-object-from-static-image-and-crop-it-from-the-image-using-openCV

##########
# Process a scanned image and place each receipe card into its own image file.
#
# WARNING: This script will overwrite existing files when saving output images.
#
# Created: 2017-12-29 1148
# Modified: 2017-12-31 1358
##########

import cv2
import numpy as np
import math
import argparse
import os

def transform(pos):
    # This function is used to find the corners and dimensions of the object

    pts=[]
    n=len(pos)

    for i in range(n):
        pts.append(list(pos[i][0]))

    sums={}
    diffs={}
    tl=tr=bl=br=0

    for i in pts:
        x=i[0]
        y=i[1]
        sum=x+y
        diff=y-x
        sums[sum]=i
        diffs[diff]=i

    sums=sorted(sums.items())
    diffs=sorted(diffs.items())
    n=len(sums)

    rect=[sums[0][1],diffs[0][1],diffs[n-1][1],sums[n-1][1]]
    #       top-left   top-right   bottom-left   bottom-right

    h1 = np.sqrt((rect[0][0]-rect[2][0])**2 + (rect[0][1]-rect[2][1])**2)        #height of left side
    h2 = np.sqrt((rect[1][0]-rect[3][0])**2 + (rect[1][1]-rect[3][1])**2)        #height of right side
    h = max(h1, h2)

    w1 = np.sqrt((rect[0][0]-rect[1][0])**2 + (rect[0][1]-rect[1][1])**2)        #width of upper side
    w2 = np.sqrt((rect[2][0]-rect[3][0])**2 + (rect[2][1]-rect[3][1])**2)        #width of lower side
    w = max(w1, w2)

    return int(w), int(h), rect

def file_choices(fname):

    # List of valid file extensions
    choices = ('bmp', 'dib' ,'jpeg' ,'jpg' ,'jpe' ,'jp2' ,'png' ,'webp' ,'pbm' ,'pgm' ,'ppm' ,'sr' ,'ras' ,'tiff' ,'tif')

    # Get file extension
    ext = os.path.splitext(fname)[1][1:]

    # Check if extension is valid
    if ext not in choices:
       ap.error("File doesn't end with one of {}".format(choices))

    return fname

##
## Main program starts here
##

ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="Path to source image", type=lambda s:file_choices(s))
ap.add_argument("-c", "--color", help="If set, output images will be in color if original image is in color. Default is to return grayscale (black & white) images.", action="store_true")
args = ap.parse_args()

srcimage = args.image

# Save output images in same directory as srcimage
destpath = os.path.dirname(os.path.abspath(srcimage))

# -Should the output images be in color or grayscale?
# -Note: If the source image is alread in grayscale, this setting
#        will have no effect on the output images.
if args.color:
    returncolor = True
else:
    returncolor = False

# Load in the source image
imgorig = cv2.imread(srcimage)

# Resizing of image is done here to speed up processing
ratio = 500.0 / imgorig.shape[1]
dim = (500, int(imgorig.shape[0] * ratio))
img = cv2.resize(imgorig, dim, interpolation = cv2.INTER_AREA)

# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Add a blur to remove some of the noise
# Image noise is random variation of brightness or color.
# More info: https://en.wikipedia.org/wiki/Image_noise
gray = cv2.GaussianBlur(gray, (11,11), 0)

# Find the contours of the receipe cards
edge = cv2.Canny(gray, 100, 200)
_, contours, _ = cv2.findContours(edge.copy(), 1, 1)

# Give each output image a unique name
loopcnt = 0

# Process all found contours
for pos in contours:

    # Get length of the contour in pixels
    # peri is a float
    peri = cv2.arcLength(pos, True)

    # Approximates a polygonal curve(s) with the specified precision
    # More info: https://docs.opencv.org/2.4/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html#approxpolydp
    approx = cv2.approxPolyDP(pos, 0.02 * peri, True)

    # Find the corners and dimensions of the object
    w, h, arr = transform(approx)

    # Only process contours that have a valid dimension
    if w > 0 and h > 0:

        # Adjust width and height to match dimensions of
        # each receipe card on the original image
        wr = int(w / ratio)
        hr = int(h / ratio)

        # Adjust pixel coordinates to match orignal image
        arr_us=[]
        for a in arr:
            a[0] = int(math.floor(a[0] / ratio))
            a[1] = int(math.floor(a[1] / ratio))
            arr_us.append(list(a))

        arr = arr_us

        # Convert all of the numbers to floats
        pts1 = np.float32(arr)
        pts2 = np.float32([[0, 0], [wr, 0], [0, hr], [wr, hr]])

        # Changes perspective to a top-down view (a.k.a.: birds eye view)
        M = cv2.getPerspectiveTransform(pts1, pts2)
        dst = cv2.warpPerspective(imgorig, M, (wr, hr))

        if returncolor:
            # Keep original image colors in output images
            image = dst
        else:
            # Convert output images to grayscale before saving
            image = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY)

        # Save each receipe card to individual image files
        # WARNING: This will overwrite existing files.
        cv2.imwrite(destpath + "/output_"+str(loopcnt)+".png", image)

    loopcnt+=1