我正在尝试使用手机的相机使用OpenCV扫描护照页面。 marked passport sample

在上图中,红色标记的轮廓是我的ROI(需要顶视图)。执行细分我可以detect the MRZ area。页面应具有固定的宽高比。有没有办法使用纵横比来缩放绿色轮廓以接近红色轮廓?我尝试使用approxPolyDP找到绿色矩形的角,然后缩放该矩形,最后进行透视扭曲以获得顶视图。问题是在进行矩形缩放时不考虑透视旋转,因此最终的矩形通常是错误的。

通常我会得到如下图所示的输出 enter image description here



  • 我的目标:裁剪红色标记部分,然后获得顶视图
  • 我的方法:检测MRZ /绿色矩形 - >现在假设绿色rect的底边与红色底边相同(足够接近) - >所以我得到了矩形的宽度和两个角 - >使用高度/纵横比计算其他两个角落
  • 问题:我上面的计算并没有输出红色矩形,而是输出第二张图像中的绿色矩形(可能是因为那些四边形不是矩形,边缘之间的角度也不是0或90度)

据我了解,您的主要目标是从任意角度拍摄护照页面时的俯视图。 据我了解,您的方法如下:

  1. 查找MRZ及其环绕的多边形
  2. 将MRZ多边形扩展到顶部-这将为您提供页面多边形
  3. 以透视图的形式查看顶视图。




new_left_top_x = old_left_bottom_x + (old_left_top_x - old_left_bottom_x) * pass_height_to_MRZ_height_ratio
new_left_top_y = old_left_bottom_y + (old_left_top_y - old_left_bottom_y) * pass_height_to_MRZ_height_ratio




import os
import imutils
import numpy as np
import argparse
import cv2

# Thresholds
passport_page_aspect_ratio = 1.44
passport_page_coverage_ratio_threshold = 0.6
morph_size = (4, 4)

def pre_process_image(image):
    # Let's get rid of color first
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Then apply Otsu threshold to reveal important areas
    ret, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

    # erode white areas to "disconnect" them
    # and dilate back to restore their original shape
    morph_struct = cv2.getStructuringElement(cv2.MORPH_RECT, morph_size)
    thresh = cv2.erode(thresh, morph_struct, anchor=(-1, -1), iterations=1)
    thresh = cv2.dilate(thresh, morph_struct, anchor=(-1, -1), iterations=1)

    return thresh

def find_passport_page_polygon(image):
    cnts = cv2.findContours(image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    cnts = sorted(cnts, key=cv2.contourArea, reverse=True)

    for cnt in cnts:
        # compute the aspect ratio and coverage ratio of the bounding box
        # width to the width of the image
        (x, y, w, h) = cv2.boundingRect(cnt)
        ar = w / float(h)
        cr_width = w / float(image.shape[1])

        # check to see if the aspect ratio and coverage width are within thresholds
        if ar > passport_page_aspect_ratio and cr_width > passport_page_coverage_ratio_threshold:
            # approximate the contour with a polygon with 4 points
            epsilon = 0.02 * cv2.arcLength(cnt, True)
            approx = cv2.approxPolyDP(cnt, epsilon, True)
            return approx

    return None

def order_points(pts):
    # initialize a list of coordinates that will be ordered in the order:
    # top-left, top-right, bottom-right, bottom-left
    rect = np.zeros((4, 2), dtype="float32")
    pts = pts.reshape(4, 2)

    # the top-left point will have the smallest sum, whereas
    # the bottom-right point will have the largest sum
    s = pts.sum(axis=1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]

    # now, compute the difference between the points, the
    # top-right point will have the smallest difference,
    # whereas the bottom-left will have the largest difference
    diff = np.diff(pts, axis=1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]

    return rect

def get_passport_top_vew(image, pts):
    rect = order_points(pts)
    (tl, tr, br, bl) = rect

    # compute the height of the new image, which will be the
    # maximum distance between the top-right and bottom-right
    # y-coordinates or the top-left and bottom-left y-coordinates
    height_a = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    height_b = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    max_height = max(int(height_a), int(height_b))

    # compute the width using standard passport page aspect ratio
    max_width = int(max_height * passport_page_aspect_ratio)

    # construct the set of destination points to obtain the top view, specifying points
    # in the top-left, top-right, bottom-right, and bottom-left order
    dst = np.array([
        [0, 0],
        [max_width - 1, 0],
        [max_width - 1, max_height - 1],
        [0, max_height - 1]], dtype="float32")

    # compute the perspective transform matrix and apply it
    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(image, M, (max_width, max_height))

    return warped

if __name__ == "__main__":
    ap = argparse.ArgumentParser()
    ap.add_argument("-i", "--image", required=True, help="path to images directory")
    args = vars(ap.parse_args())

    in_file = args["image"]
    filename_base = in_file.replace(os.path.splitext(in_file)[1], "")

    img = cv2.imread(in_file)

    pre_processed = pre_process_image(img)

    # Visualizing pre-processed image
    cv2.imwrite(filename_base + ".pre.png", pre_processed)

    page_polygon = find_passport_page_polygon(pre_processed)

    if page_polygon is not None:
        # Visualizing found page polygon
        vis = img.copy()
        cv2.polylines(vis, [page_polygon], True, (0, 255, 0), 2)
        cv2.imwrite(filename_base + ".bounds.png", vis)

        # Visualizing the warped top view of the passport page
        top_view_page = get_passport_top_vew(img, page_polygon)
        cv2.imwrite(filename_base + ".top.png", top_view_page)


Passport page bound enter image description here
