Question

我使用以下方法进行图像对齐。当我将warp_mode定义为cv2.MOTION_TRANSLATION时，以下代码可以正常工作。我在翻译移位的图像上得到了一些相当不错的结果....我尝试重复此代码并将warp_mode更改为EUCLIDEAN以解决具有旋转移位的图像。但是在第一张输出照片后执行需要很长时间。

import cv2
import numpy as np

path = "R:\\Temp\\xx\\ProcessedPhoto_in_PNG\\"
path1 = "R:\\Temp\\xx\\AlignedPhoto_in_PNG_EUCLIDEAN\\"




def alignment():
    for i in range(1770,1869):
        # Read the images to be aligned
        im1 =  cv2.imread(path + 'IMG_1770.png')
        im2 =  cv2.imread(path + 'IMG_%d.png' %(i))

        # Convert images to grayscale
        im1_gray = cv2.cvtColor(im1,cv2.COLOR_BGR2GRAY)
        im2_gray = cv2.cvtColor(im2,cv2.COLOR_BGR2GRAY)

        # Find size of image1
        sz = im1.shape

        # Define the motion model: can be TRANSLATION OR AFFINE OR HOMOGRAPHY
        warp_mode = cv2.MOTION_EUCLIDEAN

        # Define 2x3 or 3x3 matrices and initialize the matrix to identity
        if warp_mode == cv2.MOTION_HOMOGRAPHY :
            warp_matrix = np.eye(3, 3, dtype=np.float32)
        else :
            warp_matrix = np.eye(2, 3, dtype=np.float32)

        # Specify the number of iterations.
        number_of_iterations = 5000;

        # Specify the threshold of the increment
        # in the correlation coefficient between two iterations
        termination_eps = 1e-10;

        # Define termination criteria
        criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations,  termination_eps)

        # Run the ECC algorithm. The results are stored in warp_matrix.
        (cc, warp_matrix) = cv2.findTransformECC(im1_gray, im2_gray, warp_matrix, warp_mode, criteria)


        if warp_mode == cv2.MOTION_HOMOGRAPHY :
            # Use warpPerspective for Homography 
            im2_aligned = cv2.warpPerspective (im2, warp_matrix, (sz[1],sz[0]), flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP)
        else :
            # Use warpAffine for Translation, Euclidean and Affine
            im2_aligned = cv2.warpAffine(im2, warp_matrix, (sz[1],sz[0]), flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP);
        print(i) 

        cv2.imwrite(path1 + "AlignedEU_IMG_%d.png"%i , im2_aligned )
        #cv2.waitKey(0)

alignment()

有什么办法可以加快这个过程吗？我怎样才能加快我的代码速度？等待30分钟后我仍然停留在第二张输出照片上。我的每张图片都是16MB左右，亮度不均匀......我使用ECC图像对齐而不是其他方法的原因是因为这种对齐方法不变光度失真。

 >>> 
 RESTART: C:\Users\310293649\AppData\Local\Programs\Python\Python36\ImageAnalysisCODING\Picture Alignment.py 
1770

编辑：我尝试撰写亚历山大·雷诺兹建议的ans。

import cv2
import numpy as np


path = "R:\\ProcessedPhoto_in_PNG\\"
path1 = "R:\\AlignedPhoto_in_PNG_EUCLIDEAN\\"

nol = 3

warp_mode = cv2.MOTION_EUCLIDEAN

if warp_mode == cv2.MOTION_HOMOGRAPHY :
    warp = np.eye(3, 3, dtype=np.float32)
else :
    warp = np.eye(2, 3, dtype=np.float32)

tmp =  np.array([[1, 1, 2], [1, 1, 2], [1/2, 1/2, 1]])**(1-nol)
warp = np.dot(warp, tmp.astype(np.float32) )

# Specify the number of iterations.
number_of_iterations = 5000;

# Specify the threshold of the increment
# in the correlation coefficient between two iterations
termination_eps = 1e-10;

# Define termination criteria
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations,  termination_eps)

def alignment(criteria, warp_mode, warp, nol):

    for i in range(1770,1869):
        for level in range(nol):
            im = cv2.imread(path + 'IMG_1770.png')
            im1 = cv2.imread(path + 'IMG_%d.png'%(i))

            sz = im1.shape

            im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
            im1_gray = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY)

            scale = 1/2**(nol-1-level)

            im_1 = cv2.resize(im_gray, None, fx= scale, fy = scale, interpolation=cv2.INTER_AREA)
            im_2 = cv2.resize(im1_gray, None, fx= scale, fy= scale, interpolation=cv2.INTER_AREA)

            (cc,warp) = cv2.findTransformECC(im_1, im_2, warp, warp_mode, criteria)

            if level != nol-1:

            # scale up for the next pyramid level
                tng = np.array([[1, 1, 2], [1, 1, 2], [1/2, 1/2, 1]])
                warp = np.dot(warp, tng.astype(np.float32)) 

            if warp_mode == cv2.MOTION_HOMOGRAPHY :
                # Use warpPerspective for Homography 
                im2_aligned = cv2.warpPerspective (im2, warp, (sz[1],sz[0]), flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP)
            else :
                # Use warpAffine for Translation, Euclidean and Affine
                im2_aligned = cv2.warpAffine(im2, warp, (sz[1],sz[0]), flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP);
            print(i)

alignment(criteria, warp_mode, warp, nol)

我收到了此错误消息

>>> 
=============== RESTART: C:\Users\310293649\Desktop\resize.py ===============
Traceback (most recent call last):
  File "C:\Users\310293649\Desktop\resize.py", line 67, in <module>
    alignment(criteria, warp_mode, warp, nol)
  File "C:\Users\310293649\Desktop\resize.py", line 48, in alignment
    warp = cv2.findTransformECC(im_gray, im1_gray, warp, warp_mode, criteria)
cv2.error: D:\Build\OpenCV\opencv-3.3.0\modules\video\src\ecc.cpp:540: error: (-7) The algorithm stopped before its convergence. The correlation is going to be minimized. Images may be uncorrelated or non-overlapped in function cv::findTransformECC

>>>

Answer 1

即使图像很大，三十分钟也很荒谬。我敢打赌，因为你对1e-10的容忍度非常严格;你的算法很可能只是在那个时候振荡，并且无法获得更好的对齐。你应该放松一下，也许试试1e-6。

加速当前代码的最佳方法（特别是对于完整的单应性匹配）是实现金字塔方法，您可以在图像的缩小版本上运行算法，然后使用生成的单应性作为初始猜测下一个尺寸，等等，直到你达到完整尺寸。这通常要快得多。典型的方法是在每个维度上重复缩放一半的大小，直到它很小（可能大约300x300像素左右），运行算法，然后升级。请注意，您每次都必须缩放单应性;虽然这并不难。如果warp是最小尺度的单应性，则金字塔中下一个级别的初始猜测（每个维度的大小为两倍）应为

warp = warp * np.array([[1, 1, 2], [1, 1, 2], [1/2, 1/2, 1]])

当然，您不需要缩放底行以进行仿射变换。所以伪算法将是：

create a pyramid of image resolutions, halving the h, w each time
warp = np.eye(3)
for each image in the pyramid from smallest to second to largest
    warp = findTransformECC(..., warp, ...)
    warp = warp * np.array([[1, 1, 2], [1, 1, 2], [1/2, 1/2, 1]])
warp = findTransformECC(full resolution images, warp, ...)

ECC是密集对齐（它会查看图片中每个单点的修改），这需要一段时间，即使上述加速应该让你在几秒钟而不是几小时内对齐。此外，您可能会更好地使用Lucas-Kanade或其他基于特征的稀疏方法（功能也可以对照明条件不变）。 OpenCV的Lucas-Kanade功能内置了这个金字塔功能;你可以查看教程，或OpenCV的样本lk_homography.py。

我回来后写了一个自定义密集的Lucas-Kanade方案，并自己实现了金字塔;我无法完全分享它，因为它不是我要分享的代码，但我可以给你一个要点：

nol = 5 # nol: number of levels
# maybe do some calculation to decide the nol based on h, w

# initial guess may not be the identity warp, so scale to smallest level
warp = initWarp
warp = warp * np.array([[1, 1, 2], [1, 1, 2], [1/2, 1/2, 1]])**(1-nol)

for level in range(nol):

    scale = 1/2**(nol-1-level)
    rszImg = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
    rszTmp = cv2.resize(tmp, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)

    warp = your_warping_algorithm(rszImg, rszTmp, warp, ...)

    if level != nol-1:
        # might want some error catching here to reset initial guess
        # if your algorithm fails at some level of the pyramid

        # scale up for the next pyramid level
        warp = warp * np.array([[1, 1, 2], [1, 1, 2], [1/2, 1/2, 1]])

return warp

编辑：当你的图像没有像你的例子那样紧密对齐时，上面的内容非常有用，并且当它们进一步对齐时，可以提供显着的加速和更好的单应性。金字塔方法确实提供了当前代码的加速，而不是大规模 - 大约快2倍。我现在看到你的代码运行得如此之慢，因为你在大量图像上执行此操作，而不仅仅是一对图像。通过ECC注册确实需要很长时间，因为它是一个密集的算法，这意味着它每次迭代都会查看每个像素的扭曲，并且有批次一个大图像。加速的好主意只是调整图像大小。如果您需要您的单应图像是全尺寸图像，您仍然可以从较小的图像中按照我的上述比例进行缩放。

与完全比例法相比，我做了金字塔方法的一些时间安排。这是代码和结果：

import cv2
import numpy as np
import timeit


"""Inits"""


img1 = cv2.imread('IMG_1770_1.png')
img2 = cv2.imread('IMG_1868_1.png')
h, w = img1.shape[:2]

# ECC params
init_warp = np.array([[1, 0, 0], [0, 1, 0]], dtype=np.float32)
n_iters = 1000
e_thresh = 1e-6
warp_mode = cv2.MOTION_EUCLIDEAN
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, n_iters, e_thresh)


"""Full scale ECC algorithm"""


full_scale_start_time = timeit.default_timer()

gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
cc, warp = cv2.findTransformECC(gray1, gray2, init_warp, warp_mode, criteria)
print('Non-pyramid time:', timeit.default_timer() - full_scale_start_time)

# write blended warp and diff
img2_aligned = cv2.warpAffine(img2, warp, (w, h), flags=cv2.WARP_INVERSE_MAP)
blended = cv2.addWeighted(img1, 0.5, img2_aligned, 0.5, 0)
cv2.imwrite('full_scale_blended.png', blended)
warp_diff = cv2.absdiff(img2_aligned, img1)
cv2.imwrite('full_scale_diff.png', warp_diff)


"""Pyramid ECC algorithm"""


pyr_start_time = timeit.default_timer()

# initial guess may not be the identity warp, so scale to smallest level
nol = 4
warp = init_warp
warp = warp * np.array([[1, 1, 2], [1, 1, 2]], dtype=np.float32)**(1-nol)

for level in range(nol):
    lvl_start_time = timeit.default_timer()

    # resize images
    scale = 1/2**(nol-1-level)
    rszImg1 = cv2.resize(img1, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
    rszImg2 = cv2.resize(img2, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
    rszGray1 = cv2.cvtColor(rszImg1, cv2.COLOR_BGR2GRAY)
    rszGray2 = cv2.cvtColor(rszImg2, cv2.COLOR_BGR2GRAY)

    cc, warp = cv2.findTransformECC(rszGray1, rszGray2, warp, warp_mode, criteria)

    if level != nol-1:  # scale up for the next pyramid level
        warp = warp * np.array([[1, 1, 2], [1, 1, 2]], dtype=np.float32)

    print('Level %i time: '%level, timeit.default_timer() - lvl_start_time)

print('Pyramid time:', timeit.default_timer() - pyr_start_time)

# write blended warp and diff
img2_aligned = cv2.warpAffine(img2, warp, (w, h), flags=cv2.WARP_INVERSE_MAP)
blended = cv2.addWeighted(img1, 0.5, img2_aligned, 0.5, 0)
cv2.imwrite('pyr_blended.png', blended)
warp_diff = cv2.absdiff(img2_aligned, img1)
cv2.imwrite('pyr_diff.png', warp_diff)

非金字塔时间：6.001738801016472
  0级时间：0.13332156010437757
  1级时间：0.2627768460661173
  2级时间：0.7635528810787946
  3级时间：2.0936299220193177
  金字塔时间：3.253465031972155

金字塔方法背后的想法是在单应性上得到一个接近的首先猜测，以便算法更快地终止。 3级金字塔的最终级别需要2秒才能运行，而不是大约6秒，即使它们都在全尺寸图像上 - 因为它有更好的猜测。并且金字塔方法通常更快，因为它涉及较小图像的初始猜测，其中算法运行得更快。

请记住，当warp精确到某个级别时，warp准确性级别（termination_eps）不会终止，但是当前warp和last warp之间的差异变化小于阈值时。如果你有一个非常小的epsilon，比如1e-10，你很可能会得到一些振荡并且永远不会以阈值终止，而是终止你的迭代次数。

通过对金字塔方法进行一些预处理，您甚至可以进一步提高速度。使用最后调整大小的图像构建调整大小的灰度图像，，每次从那里缩小 - 这样，调整大小方法适用于更小的图像。然后在你的for循环中你不必进行任何转换或调整大小，你只需使用金字塔中的图像。此外，您可以在前几个warp中降低所需的精度，因为您只需要为最后一级提供非常精确的扭曲。您不需要在较小的图像变形上获得亚像素精度，以便对下一级别进行粗略猜测。在这里，我定时预先构建金字塔，然后在算法中使用它。似乎它提供了~3倍的加速;现在我们对于算法不到1秒，而对于全尺寸ECC算法则为6秒。所以这会好得多。

"""Pre-built pyramid ECC algorithm"""


pyr_start_time = timeit.default_timer()

nol = 4
warp = init_warp
warp = warp * np.array([[1, 1, 2], [1, 1, 2]], dtype=np.float32)**(1-nol)

# construct grayscale pyramid
gray1 = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
gray2 = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
gray1_pyr = [gray1]
gray2_pyr = [gray2]

for level in range(nol):
    gray1_pyr.insert(0, cv2.resize(gray1_pyr[0], None, fx=1/2, fy=1/2,
                                   interpolation=cv2.INTER_AREA))
    gray2_pyr.insert(0, cv2.resize(gray2_pyr[0], None, fx=1/2, fy=1/2,
                                   interpolation=cv2.INTER_AREA))

# run pyramid ECC
for level in range(nol):
    lvl_start_time = timeit.default_timer()

    cc, warp = cv2.findTransformECC(gray1_pyr[level], gray2_pyr[level],
                                    warp, warp_mode, criteria)

    if level != nol-1:  # scale up for the next pyramid level
        warp = warp * np.array([[1, 1, 2], [1, 1, 2]], dtype=np.float32)

    print('Level %i time: '%level, timeit.default_timer() - lvl_start_time)

print('Pyramid time:', timeit.default_timer() - pyr_start_time)

等级0时间：0.026944385026581585
  1级时间：0.06884818698745221
  2级时间：0.22921762999612838
  3级时间：0.5990059389732778
  金字塔时间：0.9410004370147362

关于warp矩阵的乘法：

如果您的单应性与img1和img2相关，则与half_size_img1和half_size_img2相关的单应性（即高度和宽度减半）是除了翻译减半之外完全相同（全尺寸图像中的10像素平移在半尺寸图像中为5像素）。因此，在金字塔循环之前，如果您有一个与两个全尺寸图像相关的初始扭曲猜测，那么如果您要将其作为初始值输入，则需要按照级别数量缩小它们猜测调整大小的图像的扭曲。所以我在for循环之前重新缩放到最小比例。请注意，如果您的初始猜测始终只是一个单位矩阵，这是完全没有必要的，因为乘法不会做任何事情，但是包含这一点很重要，您可能会进行初步猜测。

在for循环结束时，我以相同的方式向上扩展 - 但向后扩展。我来自较小的图像并且它们的大小加倍，所以我需要将它们乘以2加倍翻译。但是你不需要在最后一级执行此操作，因为最后一级是完整的-scale图像，因此if语句可以捕捉到这一点。

如果你有完整的单应性而不是仿射经线，那么它的翻译比这样缩放的更多。您可以在我的帖子顶部显示使用完整的单应性。它实际上是相同的，但是单应性的两个非线性剪切条目也有1/2。

cv2.MOTION_EUCLIDEAN用于ECC图像对齐方法中的warp_mode

1 个答案: