Python 3D插值加速

时间:2016-12-19 10:33:25

标签: python performance numpy interpolation cython

我有以下用于插入3D体数据的代码。

Y, X, Z = np.shape(volume)
xs = np.arange(0, X)
ys = np.arange(0, Y)
zs = np.arange(0, Z)

points = list(zip(np.ravel(result[:, :, :, 1]), np.ravel(result[:, :, :, 0]), np.ravel(result[:, :, :, 2])))
interp = interpolate.RegularGridInterpolator((ys, xs, zs), volume,
                                             bounds_error=False, fill_value=0, method='linear')
new_volume = interp(points)
new_volume = np.reshape(new_volume, (Y, X, Z))

此代码在512x512x110卷(大约2900万点)上执行大约需要37秒,这导致每个体素超过1微秒(这对我来说是不可接受的时间 - 更多的是它使用4个核心)。呼叫new_volume=interp(points)占用了约80%的提前时间,并且创建了列表几乎整个剩余时间。

是否有任何简单(甚至更复杂)的方法可以更快地进行此计算?或者是否有任何优秀的Python库,它提供更快的插值?每次打电话给我们,我的音量和积分都会发生变化。

2 个答案:

答案 0 :(得分:2)

以下是cython解决方案的略微修改版本:

import numpy as np
cimport numpy as np
from libc.math cimport floor
from cython cimport boundscheck, wraparound, nonecheck, cdivision

DTYPE = np.float
ctypedef np.float_t DTYPE_t

@boundscheck(False)
@wraparound(False)
@nonecheck(False)
def interp3D(DTYPE_t[:,:,::1] v, DTYPE_t[:,:,::1] xs, DTYPE_t[:,:,::1] ys, DTYPE_t[:,:,::1] zs):

    cdef int X, Y, Z
    X,Y,Z = v.shape[0], v.shape[1], v.shape[2]
    cdef np.ndarray[DTYPE_t, ndim=3] interpolated = np.zeros((X, Y, Z), dtype=DTYPE)

    _interp3D(&v[0,0,0], &xs[0,0,0], &ys[0,0,0], &zs[0,0,0], &interpolated[0,0,0], X, Y, Z)
    return interpolated


@cdivision(True)
cdef inline void _interp3D(DTYPE_t *v, DTYPE_t *x_points, DTYPE_t *y_points, DTYPE_t *z_points, 
               DTYPE_t *result, int X, int Y, int Z):

    cdef:
        int i, x0, x1, y0, y1, z0, z1, dim
        DTYPE_t x, y, z, xd, yd, zd, c00, c01, c10, c11, c0, c1, c

    dim = X*Y*Z

    for i in range(dim):
        x = x_points[i]
        y = y_points[i]
        z = z_points[i]

        x0 = <int>floor(x)
        x1 = x0 + 1
        y0 = <int>floor(y)
        y1 = y0 + 1
        z0 = <int>floor(z)
        z1 = z0 + 1

        xd = (x-x0)/(x1-x0)
        yd = (y-y0)/(y1-y0)
        zd = (z-z0)/(z1-z0)

        if x0 >= 0 and y0 >= 0 and z0 >= 0:
            c00 = v[Y*Z*x0+Z*y0+z0]*(1-xd) + v[Y*Z*x1+Z*y0+z0]*xd
            c01 = v[Y*Z*x0+Z*y0+z1]*(1-xd) + v[Y*Z*x1+Z*y0+z1]*xd
            c10 = v[Y*Z*x0+Z*y1+z0]*(1-xd) + v[Y*Z*x1+Z*y1+z0]*xd
            c11 = v[Y*Z*x0+Z*y1+z1]*(1-xd) + v[Y*Z*x1+Z*y1+z1]*xd

            c0 = c00*(1-yd) + c10*yd
            c1 = c01*(1-yd) + c11*yd

            c = c0*(1-zd) + c1*zd

        else:
            c = 0

        result[i] = c 

结果仍与您的相同。使用60x60x60的随机网格数据,我获得以下时间:

SciPy's solution:                982ms
Your cython solution:            24.7ms
Above modified cython solution:  8.17ms

所以它比你的cython解决方案快近4倍。注意

  1. Cython默认在数组上执行边界检查,如果您需要该功能,请启用boundschecking删除@boundscheck(False)
  2. 在上述解决方案中,阵列需要是C-连续的
  3. 如果您想要上述代码的并行变体,请在range中替换prange而不是for loop
  4. 希望这会有所帮助。

答案 1 :(得分:0)

我使用Cython来加速这个并实现以下代码:

import numpy as np
cimport numpy as np
np.import_array()
from libc.math cimport ceil, floor

DTYPE = np.float
ctypedef np.float_t DTYPE_t

def interp3(np.ndarray[DTYPE_t, ndim=3] x_grid, np.ndarray[DTYPE_t, ndim=3] y_grid,
    np.ndarray[DTYPE_t, ndim=3] z_grid, np.ndarray[DTYPE_t, ndim=3] v,
    np.ndarray[DTYPE_t, ndim=3] xs, np.ndarray[DTYPE_t, ndim=3] ys, 
    np.ndarray[DTYPE_t, ndim=3] zs):

    cdef int i
    cdef float x
    cdef float y
    cdef float z
    cdef int x0
    cdef int x1
    cdef int y0
    cdef int y1
    cdef int z0
    cdef int z1
    cdef float xd
    cdef float yd
    cdef float zd
    cdef float c00
    cdef float c01
    cdef float c10
    cdef float c11
    cdef float c0
    cdef float c1
    cdef float c
    cdef int X
    cdef int Y
    cdef int Z

    X, Y, Z = np.shape(x_grid)

    cdef np.ndarray[DTYPE_t, ndim=1] x_points = np.ravel(xs)
    cdef np.ndarray[DTYPE_t, ndim=1] y_points = np.ravel(ys)
    cdef np.ndarray[DTYPE_t, ndim=1] z_points = np.ravel(zs)
    cdef np.ndarray[DTYPE_t, ndim=1] result = np.empty((len(x_points)), dtype=DTYPE)

    for i in range(len(x_points)):
        x = x_points[i]
        y = y_points[i]
        z = z_points[i]

        x0 = int(floor(x))
        x1 = x0 + 1
        y0 = int(floor(y))
        y1 = y0 + 1
        z0 = int(floor(z))
        z1 = z0 + 1

        xd = (x-x0)/(x1-x0)
        yd = (y-y0)/(y1-y0)
        zd = (z-z0)/(z1-z0)

        try:
            assert x0 >= 0 and y0 >= 0 and z0 >= 0
            c00 = v[x0, y0, z0]*(1-xd) + v[x1, y0, z0]*xd
            c01 = v[x0, y0, z1]*(1-xd) + v[x1, y0, z1]*xd
            c10 = v[x0, y1, z0]*(1-xd) + v[x1, y1, z0]*xd
            c11 = v[x0, y1, z1]*(1-xd) + v[x1, y1, z1]*xd

            c0 = c00*(1-yd) + c10*yd
            c1 = c01*(1-yd) + c11*yd

            c = c0*(1-zd) + c1*zd
        except:
            c = 0

        result[i] = c

    cdef np.ndarray[DTYPE_t, ndim=3] interpolated = np.zeros((X, Y, Z), dtype=DTYPE)
    interpolated = np.reshape(result, (X, Y, Z))
    return interpolated  

这是我第一次使用Cython,所以有以下问题:

  1. 如何进一步优化?

  2. 有没有简单的方法可以避免try和assert语句检查数组边界?尝试确保与最小/最大组合的边界比这个try / assert方法慢

  3. 目前,它比上面发布的原始代码快8倍左右。