
时间:2012-11-22 06:12:27

标签: python image-processing numpy


大部分时间花在for循环中,我的机器上的512x52图像需要大约40 + s。我希望加快一些速度。是否有更有效/更快的方式来遍历图像?是否有更好的pythonic / numpy / scipy方式访问每个像素?或者我应该去学习cython?



import matplotlib.pylab as plt
import numpy as np
from import imread
from skimage.transform import integral_image, integrate
import time

def intersect(a, b):
    '''Determine the intersection of two rectangles'''
    rect = (0,0,0,0)
    r0 = max(a[0],b[0])
    c0 = max(a[1],b[1])
    r1 = min(a[2],b[2])
    c1 = min(a[3],b[3])
    # Do we have a valid intersection?
    if r1 > r0 and  c1 > c0: 
         rect = (r0,c0,r1,c1)
    return rect

# Setup data
depth_src = imread("test.jpg", as_grey=True)
depth_intg = integral_image(depth_src)   # integrate to find sum depth in region
depth_pts = integral_image(depth_src > 0)  # integrate to find num points which have depth
boundary = (0,0,depth_src.shape[0]-1,depth_src.shape[1]-1) # rectangle to intersect with

# Image to accumulate response
out_img = np.zeros(depth_src.shape)

# Average dimensions of bbox/detection window per unit length of depth
model = (0.602,2.044)  # width, height

start_time = time.time()
for (r,c), junk in np.ndenumerate(depth_src):
    # Find points around current pixel      
    r0, c0, r1, c1 = intersect((r-1, c-1, r+1, c+1), boundary)

    # Calculate average of depth of points around current pixel
    scale =  integrate(depth_intg, r0, c0, r1, c1) * 255 / 9.0 

    # Based on average depth, create the detection window
    r0 = r - (model[0] * scale/2)
    c0 = c - (model[1] * scale/2)
    r1 = r + (model[0] * scale/2)
    c1 = c + (model[1] * scale/2)

    # Used scale optimised detection window to extract features
    r0, c0, r1, c1 = intersect((r0,c0,r1,c1), boundary)
    depth_count = integrate(depth_pts,r0,c0,r1,c1)
    if depth_count:
         depth_sum = integrate(depth_intg,r0,c0,r1,c1)
         avg_change = depth_sum / depth_count
         # Accumulate response
         out_img[r0:r1,c0:c1] += avg_change
print time.time() - start_time, " seconds"


1 个答案:

答案 0 :(得分:3)

迈克尔,有趣的问题。似乎你遇到的主要性能问题是图像中的每个像素都有两个在其上计算的integrate()函数,一个大小为3x3,另一个大小是事先未知的。无论你使用什么numpy函数,以这种方式计算单个积分都是非常低效的;这是一个算法问题,而不是实现问题。考虑大小为N N的图像。您可以仅使用大约4 * N N个操作来计算该图像中任何大小K K的所有积分,而不是(正如人们可能天真地期望的那样)N N K ķ。您这样做的方法是首先计算每行中窗口K上的滑动总和的图像,然后在每列中的结果上滑动总和。更新每个滑动和以移动到下一个像素只需要在当前窗口中添加最新像素并减去前一窗口中最旧的像素,因此无论窗口大小如何,每个像素都需要两次操作。我们必须做两次(对于行和列),因此每个像素有4次操作。



# img is a 2D ndarray
# K is the size of sums to calculate using sliding window
row_sums = numpy.zeros_like(img)
for i in range( img.shape[0] ):
    if i > K:
        row_sums[i,:] = row_sums[i-1,:] - img[i-K-1,:] + img[i,:]
    elif i > 1:
        row_sums[i,:] = row_sums[i-1,:] + img[i,:]
    else: # i == 0
        row_sums[i,:] = img[i,:]

col_sums = numpy.zeros_like(img)
for j in range( img.shape[1] ):
    if j > K:
        col_sums[:,j] = col_sums[:,j-1] - row_sums[:,j-K-1] + row_sums[:,j]
    elif j > 1:
        col_sums[:,j] = col_sums[:,j-1] + row_sums[:,j]
    else: # j == 0
        col_sums[:,j] = row_sums[:,j]

# here col_sums[i,j] should be equal to numpy.sum(img[i-K:i, j-K:j]) if i >=K and j >= K
# first K rows and columns in col_sums contain partial sums and can be ignored


