借助我之前在堆栈溢出时所做的其他问题,我已经实现了这段代码,该代码可以检测图像中的文档(在这种情况下为ID卡),然后根据需要执行其旋转和透视校正。 / p>
它做了所有事情,但是最终处理的图像质量下降/丧失了能量,并且字母看起来比原始字母更窄(我不知道),并且重要的是不要发生这种情况,因为我想使用图像来执行OCR。
我将提供一张图片用作测试,出于明显的原因,我画了一些信息,但检查了剩余的文字,看来很奇怪。
代码:
import cv2
import numpy as np
def order_corner_points(corners):
# Separate corners into individual points
# Index 0 - top-right
# 1 - top-left
# 2 - bottom-left
# 3 - bottom-right
corners = [(corner[0][0], corner[0][1]) for corner in corners]
top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
return (top_l, top_r, bottom_r, bottom_l)
def perspective_transform(image, corners):
# Order points in clockwise order
ordered_corners = order_corner_points(corners)
top_l, top_r, bottom_r, bottom_l = ordered_corners
# Determine width of new image which is the max distance between
# (bottom right and bottom left) or (top right and top left) x-coordinates
width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
width = max(int(width_A), int(width_B))
# Determine height of new image which is the max distance between
# (top right and bottom right) or (top left and bottom left) y-coordinates
height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
height = max(int(height_A), int(height_B))
# Construct new points to obtain top-down view of image in
# top_r, top_l, bottom_l, bottom_r order
dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
[0, height - 1]], dtype = "float32")
# Convert to Numpy format
ordered_corners = np.array(ordered_corners, dtype="float32")
# Find perspective transform matrix
matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)
# Return the transformed image
return cv2.warpPerspective(image, matrix, (width, height), cv2.INTER_MAX)
def get_image_width_height(image):
image_width = image.shape[1] # current image's width
image_height = image.shape[0] # current image's height
return image_width, image_height
def calculate_scaled_dimension(scale, image):
image_width, image_height = get_image_width_height(image)
ratio_of_new_with_to_old = scale / image_width
dimension = (scale, int(image_height * ratio_of_new_with_to_old))
return dimension
def scale_image(image, size):
image_resized_scaled = cv2.resize(
image,
calculate_scaled_dimension(
size,
image
),
interpolation=cv2.INTER_AREA
)
return image_resized_scaled
def rotate_image(image, angle):
# Grab the dimensions of the image and then determine the center
(h, w) = image.shape[:2]
(cX, cY) = (w / 2, h / 2)
# grab the rotation matrix (applying the negative of the
# angle to rotate clockwise), then grab the sine and cosine
# (i.e., the rotation components of the matrix)
M = cv2.getRotationMatrix2D((cX, cY), -angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# Compute the new bounding dimensions of the image
nW = int((h * sin) + (w * cos))
nH = int((h * cos) + (w * sin))
# Adjust the rotation matrix to take into account translation
M[0, 2] += (nW / 2) - cX
M[1, 2] += (nH / 2) - cY
# Perform the actual rotation and return the image
return cv2.warpAffine(image, M, (nW, nH))
image = cv2.imread('images/cc_test_img2.jpg')
original_image = image.copy()
image = scale_image(image, 500)
# convert the image to grayscale, blur it, and find edges in the image
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray = cv2.bilateralFilter(gray, 11, 17, 17)
edged = cv2.Canny(gray, 30, 200)
cv2.imshow('edged', edged)
cv2.waitKey()
cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
cnts = sorted(cnts, key = cv2.contourArea, reverse = True)[:10]
screen_cnt = None
# loop over our contours
for c in cnts:
# approximate the contour
peri = cv2.arcLength(c, True)
approx = cv2.approxPolyDP(c, 0.015 * peri, True)
if len(approx) == 4:
screen_cnt = approx
transformed = perspective_transform(image, screen_cnt)
break
# Draw ROI
cv2.drawContours(image, [screen_cnt], -1, (0, 255, 0), 1)
(h, w) = transformed.shape[:2]
if (h > w):
rotated = rotate_image(transformed, 90)
else:
rotated = transformed
cv2.imshow("image", original_image)
cv2.imshow("ROI", image)
cv2.imshow("transformed", transformed)
cv2.imshow("rotated", rotated)
cv2.waitKey(0)
要测试的图像