Question

我试图加快一些pygame图像处理代码，迭代每个像素并修改它们。我正在研究numpy nditer函数，但我正在努力研究如何实现它。

override func prepareForSegue(segue: UIStoryboardSegue!, sender: AnyObject!) {
    if (segue.identifier == "SecondVC") {
        // set properties
    }
}

我在# Iterate though main image for x, row in enumerate(main): for y, pix1 in enumerate(row): # Check the pixel isn't too dark to worry about if pix1[0] + pix1[1] + pix1[2] > 10: # Calculate distance to light source light_distance = np.hypot( x - light_source_pos[0], y - light_source_pos[1] ) # Calculate light intensity light_intensity = (300 - light_distance) / 300 # Apply light color and intensity to the specular map, apply specular gain then add to main main[x][y] += light_color * light_intensity * specular[x][y] * specular_gain # Apply light color and intensity to the diffuse map, apply diffuse gain then add to main main[x][y] += light_color * light_intensity * diffuse[x][y] * diffuse_gain生成的图像数据[x] [y] [r] [g] [b]的数组上进行迭代。该数组不是副本，它是对实际内存内容的引用。

如何创建一个遍历x和y坐标的迭代器，并尽快应用更改？

从我可以解决的问题来看，按内存顺序对像素进行操作会更快，并将所有内容保存在迭代器循环中吗？

编辑：上面的代码段是为了更容易消化，但整个脚本都是gists here。要运行它，您需要一些源图像才能使用。

Answer 1

查看代码，似乎实现可以并行化，因此我们可以实现矢量化实现。现在，在去除循环的过程中，我们需要在某些位置扩展输入的维度，这将使broadcasting发挥作用。

为了便于代码查找和维护，我假设这些缩写 -

S = specular
D = diffuse
LSP = light_source_pos
LC = light_color
S_gain = specular_gain
D_gain = diffuse_gain

这是向量化问题的一种方法 -

# Vectorize light_distance calculations and thereafter for light_intensity
LD = (np.hypot(np.arange(M)[:,None] - LSP[0], np.arange(N) - LSP[1]))
LI = (300 - LD) / 300

# Vectorized "LC * light_intensity * S[x][y] * S_gain" and 
# "LC * light_intensity * D[x][y] * D_gain" calculations
add_part = (LC*LI[...,None]*S*S_gain) + (LC*LI[...,None]*D*D_gain)

# Get masked places set by "pix1[0] + pix1[1] + pix1[2] > 10", which would be 
# "main.sum(2) > 10". Use mask to add selective elements from add_part into main 
main += (add_part*(main.sum(2)[...,None] > 10))

运行时测试并验证输出

定义函数 -

def original_app(main,S,D,LSP,LC,S_gain,D_gain):
    for x, row in enumerate(main):
        for y, pix1 in enumerate(row):
            if pix1[0] + pix1[1] + pix1[2] > 10:
                light_distance = np.hypot( x - LSP[0], y - LSP[1] )
                light_intensity = (300 - light_distance) / 300
                main[x][y] += LC * light_intensity * S[x][y] * S_gain
                main[x][y] += LC * light_intensity * D[x][y] * D_gain


def vectorized_app(main,S,D,LSP,LC,S_gain,D_gain):
    LD = (np.hypot(np.arange(M)[:,None] - LSP[0], np.arange(N) - LSP[1]))
    LI = (300 - LD) / 300
    add_part = (LC*LI[...,None]*S*S_gain) + (LC*LI[...,None]*D*D_gain)
    main += (add_part*(main.sum(2)[...,None] > 10))

运行时 -

In [38]: # Inputs
    ...: M,N,R = 300,200,3 # Shape as stated in the comments
    ...: main = np.random.rand(M,N,R)*10
    ...: S = np.random.rand(M,N,R)
    ...: D = np.random.rand(M,N,R)
    ...: LSP = [3,10]
    ...: LC = np.array([2,6,3])
    ...: S_gain = 0.45
    ...: D_gain = 0.22
    ...: 
    ...: # Make copies as functions would change those
    ...: mainc1 = main.copy()
    ...: mainc2 = main.copy()
    ...: 

In [39]: original_app(mainc1,S,D,LSP,LC,S_gain,D_gain)

In [40]: vectorized_app(mainc2,S,D,LSP,LC,S_gain,D_gain)

In [41]: np.allclose(mainc1,mainc2) # Verify outputs
Out[41]: True

In [42]: # Make copies again for timing as functions would change those
    ...: mainc1 = main.copy()
    ...: mainc2 = main.copy()
    ...: 

In [43]: %timeit original_app(mainc1,S,D,LSP,LC,S_gain,D_gain)
1 loops, best of 3: 1.28 s per loop

In [44]: %timeit vectorized_app(mainc2,S,D,LSP,LC,S_gain,D_gain)
100 loops, best of 3: 15.4 ms per loop

In [45]: 1280/15.4 # Speedup
Out[45]: 83.11688311688312

将像素（x，y）转换为numpy.nditer Iterator

1 个答案: