问题我需要有人回答
我需要更改以下脚本才能处理原始图像而不是调整大小的图像?
有助于解决问题的信息
图像来自扫描仪,包含2或3个写入或键入3" x5"索引卡。
该脚本正确识别每个receipe并将其保存到单个文件中。不幸的是,新图像中的文字很小,我的OCR脚本可以准确地读取手写的记录。
删除img = cv2.resize()
行(第54行)会导致" IndexError:列表索引超出范围"在第33行被def transform()
抛出。
# Original Script: https://www.quora.com/How-can-I-detect-an-object-from-static-image-and-crop-it-from-the-image-using-openCV
import cv2
import numpy as np
import os
def transform(pos):
# This function is used to find the corners of the object and the dimensions of the object
pts=[]
n=len(pos)
for i in range(n):
pts.append(list(pos[i][0]))
sums={}
diffs={}
tl=tr=bl=br=0
for i in pts:
x=i[0]
y=i[1]
sum=x+y
diff=y-x
sums[sum]=i
diffs[diff]=i
sums=sorted(sums.items())
diffs=sorted(diffs.items())
n=len(sums)
# The IndexError occurs on this line
rect=[sums[0][1],diffs[0][1],diffs[n-1][1],sums[n-1][1]]
# top-left top-right bottom-left bottom-right
h1 = np.sqrt((rect[0][0]-rect[2][0])**2 + (rect[0][1]-rect[2][1])**2) #height of left side
h2 = np.sqrt((rect[1][0]-rect[3][0])**2 + (rect[1][1]-rect[3][1])**2) #height of right side
h = max(h1, h2)
w1 = np.sqrt((rect[0][0]-rect[1][0])**2 + (rect[0][1]-rect[1][1])**2) #width of upper side
w2 = np.sqrt((rect[2][0]-rect[3][0])**2 + (rect[2][1]-rect[3][1])**2) #width of lower side
w = max(w1, w2)
return int(w), int(h), rect
img = cv2.imread('source_image.png')
#
# Resizing of image is done here
#
# Removal of these lines results in "IndexError: list index out of range" being displayed
r = 500.0 / img.shape[1]
dim = (500, int(img.shape[0] * r))
img = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (11,11), 0)
edge = cv2.Canny(gray, 100, 200)
_, contours, _ = cv2.findContours(edge.copy(), 1, 1)
# Give each output image a unique name
loopcnt = 0
for pos in contours:
peri = cv2.arcLength(pos, True)
approx = cv2.approxPolyDP(pos, 0.02 * peri, True)
w, h, arr = transform(approx)
if w > 0 and h > 0:
pts2 = np.float32([[0, 0], [w, 0], [0, h], [w, h]])
pts1 = np.float32(arr)
M = cv2.getPerspectiveTransform(pts1, pts2)
dst = cv2.warpPerspective(img, M, (w, h))
image = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY)
cv2.imwrite("output_image_"+str(loopcnt)+".png", image)
loopcnt+=1
答案 0 :(得分:1)
这是我的解决方案。
该脚本一次只能处理一个图像。如果要处理多个图像,则需要为每个图像调用此脚本一次。
# Original Script: https://www.quora.com/How-can-I-detect-an-object-from-static-image-and-crop-it-from-the-image-using-openCV
##########
# Process a scanned image and place each receipe card into its own image file.
#
# WARNING: This script will overwrite existing files when saving output images.
#
# Created: 2017-12-29 1148
# Modified: 2017-12-31 1358
##########
import cv2
import numpy as np
import math
import argparse
import os
def transform(pos):
# This function is used to find the corners and dimensions of the object
pts=[]
n=len(pos)
for i in range(n):
pts.append(list(pos[i][0]))
sums={}
diffs={}
tl=tr=bl=br=0
for i in pts:
x=i[0]
y=i[1]
sum=x+y
diff=y-x
sums[sum]=i
diffs[diff]=i
sums=sorted(sums.items())
diffs=sorted(diffs.items())
n=len(sums)
rect=[sums[0][1],diffs[0][1],diffs[n-1][1],sums[n-1][1]]
# top-left top-right bottom-left bottom-right
h1 = np.sqrt((rect[0][0]-rect[2][0])**2 + (rect[0][1]-rect[2][1])**2) #height of left side
h2 = np.sqrt((rect[1][0]-rect[3][0])**2 + (rect[1][1]-rect[3][1])**2) #height of right side
h = max(h1, h2)
w1 = np.sqrt((rect[0][0]-rect[1][0])**2 + (rect[0][1]-rect[1][1])**2) #width of upper side
w2 = np.sqrt((rect[2][0]-rect[3][0])**2 + (rect[2][1]-rect[3][1])**2) #width of lower side
w = max(w1, w2)
return int(w), int(h), rect
def file_choices(fname):
# List of valid file extensions
choices = ('bmp', 'dib' ,'jpeg' ,'jpg' ,'jpe' ,'jp2' ,'png' ,'webp' ,'pbm' ,'pgm' ,'ppm' ,'sr' ,'ras' ,'tiff' ,'tif')
# Get file extension
ext = os.path.splitext(fname)[1][1:]
# Check if extension is valid
if ext not in choices:
ap.error("File doesn't end with one of {}".format(choices))
return fname
##
## Main program starts here
##
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True, help="Path to source image", type=lambda s:file_choices(s))
ap.add_argument("-c", "--color", help="If set, output images will be in color if original image is in color. Default is to return grayscale (black & white) images.", action="store_true")
args = ap.parse_args()
srcimage = args.image
# Save output images in same directory as srcimage
destpath = os.path.dirname(os.path.abspath(srcimage))
# -Should the output images be in color or grayscale?
# -Note: If the source image is alread in grayscale, this setting
# will have no effect on the output images.
if args.color:
returncolor = True
else:
returncolor = False
# Load in the source image
imgorig = cv2.imread(srcimage)
# Resizing of image is done here to speed up processing
ratio = 500.0 / imgorig.shape[1]
dim = (500, int(imgorig.shape[0] * ratio))
img = cv2.resize(imgorig, dim, interpolation = cv2.INTER_AREA)
# Convert to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Add a blur to remove some of the noise
# Image noise is random variation of brightness or color.
# More info: https://en.wikipedia.org/wiki/Image_noise
gray = cv2.GaussianBlur(gray, (11,11), 0)
# Find the contours of the receipe cards
edge = cv2.Canny(gray, 100, 200)
_, contours, _ = cv2.findContours(edge.copy(), 1, 1)
# Give each output image a unique name
loopcnt = 0
# Process all found contours
for pos in contours:
# Get length of the contour in pixels
# peri is a float
peri = cv2.arcLength(pos, True)
# Approximates a polygonal curve(s) with the specified precision
# More info: https://docs.opencv.org/2.4/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html#approxpolydp
approx = cv2.approxPolyDP(pos, 0.02 * peri, True)
# Find the corners and dimensions of the object
w, h, arr = transform(approx)
# Only process contours that have a valid dimension
if w > 0 and h > 0:
# Adjust width and height to match dimensions of
# each receipe card on the original image
wr = int(w / ratio)
hr = int(h / ratio)
# Adjust pixel coordinates to match orignal image
arr_us=[]
for a in arr:
a[0] = int(math.floor(a[0] / ratio))
a[1] = int(math.floor(a[1] / ratio))
arr_us.append(list(a))
arr = arr_us
# Convert all of the numbers to floats
pts1 = np.float32(arr)
pts2 = np.float32([[0, 0], [wr, 0], [0, hr], [wr, hr]])
# Changes perspective to a top-down view (a.k.a.: birds eye view)
M = cv2.getPerspectiveTransform(pts1, pts2)
dst = cv2.warpPerspective(imgorig, M, (wr, hr))
if returncolor:
# Keep original image colors in output images
image = dst
else:
# Convert output images to grayscale before saving
image = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY)
# Save each receipe card to individual image files
# WARNING: This will overwrite existing files.
cv2.imwrite(destpath + "/output_"+str(loopcnt)+".png", image)
loopcnt+=1