Python:用Numba改进这种反应扩散算法?

时间:2017-11-30 17:10:30

标签: python performance python-imaging-library numba

基于Daniel Shiffman的Coding Challenge #13: Reaction Diffusion Algorithm in p5.js,我做了以下python脚本:

import numpy as np  
from PIL import Image
from numba import jit 

@jit
def draw(imgA, imgB, nextImgA, nextImgB):
    laplace_kernel = np.array([[ .05, .2, .05],
                               [  .2, -1,  .2],
                               [ .05, .2, .05]])
    for x in range (1, w - 1):
        for y in range (1, h - 1):
            a = imgA[x,y]
            b = imgB[x,y]
            laplace_a = (imgA[x-1 : x+2, y-1 : y+2] * laplace_kernel).sum()
            laplace_b = (imgB[x-1 : x+2, y-1 : y+2] * laplace_kernel).sum()

            nextImgA[x,y] = a + (dA * laplace_a) - (a * b * b) + (feed * (1 - a)) 
            nextImgB[x,y] = b + (dB * laplace_b) + (a * b * b) - ((k + feed) * b) 

    nextImgA = np.clip(nextImgA, 0.0, 1.0) 
    nextImgB = np.clip(nextImgB, 0.0, 1.0) 

    return nextImgA, nextImgB, imgA, imgB

dA   = 1
dB   = 0.5
feed = 0.055
k    = 0.062
w    = 200
h    = 200

imgA     = np.ones ( (w, h) )
imgB     = np.zeros( (w, h) )
nextImgA = np.ones ( (w, h) )
nextImgB = np.zeros( (w, h) )

imgB[100:110, 100:110] = 1.0

for i in range (10000):
    print i
    imgA, imgB, nextImgA, nextImgB = draw( imgA, imgB, nextImgA, nextImgB )


finalImg = np.clip((imgA - imgB) * 255, 0, 255)
img = Image.fromarray(np.uint8(finalImg))   
img.save("test2.png")

有效。经过10000次迭代后,我得到的是:

enter image description here

但它很慢!我需要等几分钟才能获得结果。 Numba帮助我做了很多事情,但是我仍然想知道是否有办法改善我的脚本的性能?或者更快地收敛到同一模式的方法?

1 个答案:

答案 0 :(得分:0)

感谢@chrisb和@Rutger Kassies评论,看起来最昂贵的操作是laplace_kernel和滑动窗口之间的元素明智的乘法。通过使用矩阵来存储结果,我能够略微提高性能:

#!/usr/bin/python2.7 -S
# -*- coding: utf-8 -*-
import numpy as np  
from PIL import Image
from numba import jit , njit
from time import time

@njit
def draw(imgA, imgB, nextImgA, nextImgB):
    laplace_kernel = np.array( (( .05, .2, .05),
                                (  .2, -1,  .2),
                                ( .05, .2, .05)) )
    dA   = 1
    dB   = 0.5
    feed = 0.055
    k    = 0.062

    storeMatrix = np.array( ((0.0, 0.0, 0.0), 
                             (0.0, 0.0, 0.0), 
                             (0.0, 0.0, 0.0)) )

    for x in range (1, w - 1):
        for y in range (1, h - 1):
            a = imgA[x,y]
            b = imgB[x,y]
            abb = (a*b*b)

            laplace_a = np.multiply( imgA[x-1 : x+2, y-1 : y+2], laplace_kernel, storeMatrix).sum()       # * dA
            laplace_b = np.multiply( imgB[x-1 : x+2, y-1 : y+2], laplace_kernel, storeMatrix).sum() * 0.5 # * dB

            nextImgA[x,y] = a + ( laplace_a - abb + (feed * (1 - a)) ) 
            nextImgB[x,y] = b + ( laplace_b + abb - ((k + feed) * b) ) 

    return nextImgA, nextImgB, imgA, imgB

w = 200
h = 200

imgA     = np.ones ( (w, h) , dtype=np.float32)
imgB     = np.zeros( (w, h) , dtype=np.float32)
nextImgA = np.ones ( (w, h) , dtype=np.float32)
nextImgB = np.zeros( (w, h) , dtype=np.float32)
imgB[100:110, 100:110] = 1.0

t = time()
for i in range (10000):
    print i
    imgA, imgB, nextImgA, nextImgB = draw( imgA, imgB, nextImgA, nextImgB )
    imgA = np.clip(imgA, 0.0, 1.0) 
    imgB = np.clip(imgB, 0.0, 1.0) 
print "Time Elapsed : " + str(time() - t)

finalImg = np.clip((imgA - imgB) * 255, 0, 255).T
img = Image.fromarray(np.uint8(finalImg))   
img.save("test2.png")

更新

最后,我使用了PyOpenGL和Pygame,以便将我的GPU与Fragment Shader一起使用,它更快!