在numpy中对此循环进行矢量化

时间:2017-10-06 11:06:08

标签: numpy

我正在尝试使用numpy在python中计算矩阵z(在下面定义)。

这是我当前的解决方案(使用1 for for循环)

z = np.zeros((n, k))
for i in range(n):
    v = pi * (1 / math.factorial(x[i])) * np.exp(-1 * lamb) * (lamb ** x[i])
    numerator = np.sum(v)
    c = v / numerator
    z[i, :] = c
return z

是否可以完全矢量化此计算?我需要对数千次迭代进行这种计算,而numpy中的矩阵运算要比循环中的矩阵运算快得多。

1 个答案:

答案 0 :(得分:1)

这是E的矢量化版本。它用NumPy broadcasting和基于数组的算法替换for-loop和标量算术:

def alt_E(x):
    x = x[:, None]
    z = pi * (np.exp(-lamb) * (lamb**x)) / special.factorial(x)
    denom = z.sum(axis=1)[:, None]
    z /= denom
    return z

我运行em.py以了解xlambpink的典型尺寸。关于这个大小的数据, alt_EE快约120倍:

In [32]: %timeit E(x)
100 loops, best of 3: 11.5 ms per loop

In [33]: %timeit alt_E(x)
10000 loops, best of 3: 94.7 µs per loop

In [34]: 11500/94.7
Out[34]: 121.43611404435057

这是我用于基准测试的设置:

import math
import numpy as np
import scipy.special as special

def alt_E(x):
    x = x[:, None]
    z = pi * (np.exp(-lamb) * (lamb**x)) / special.factorial(x)
    denom = z.sum(axis=1)[:, None]
    z /= denom
    return z


def E(x):
    z = np.zeros((n, k))
    for i in range(n):
        v = pi * (1 / math.factorial(x[i])) * \
            np.exp(-1 * lamb) * (lamb ** x[i])
        numerator = np.sum(v)
        c = v / numerator
        z[i, :] = c
    return z

n = 576
k = 2

x = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 5])

lamb = np.array([ 0.84835141, 1.04025989])
pi = np.array([ 0.5806958, 0.4193042])

assert np.allclose(alt_E(x), E(x))

顺便说一下,E也可以使用scipy.stats.poisson计算:

import scipy.stats as stats
pois = stats.poisson(mu=lamb)

def alt_E2(x):
    z = pi * pois.pmf(x[:,None])
    denom = z.sum(axis=1)[:, None]
    z /= denom
    return z

但这并不会更快,至少对于这个长度的数组来说:

In [33]: %timeit alt_E(x)
10000 loops, best of 3: 94.7 µs per loop

In [102]: %timeit alt_E2(x)
1000 loops, best of 3: 278 µs per loop

对于较大的xalt_E2更快:

In [104]: x = np.random.random(10000)

In [106]: %timeit alt_E(x)
100 loops, best of 3: 2.18 ms per loop

In [105]: %timeit alt_E2(x)
1000 loops, best of 3: 643 µs per loop