我有一些代码,它基本上将long numpy数组的某些元素设置为零。
import numpy as np
a = np.array([[[1, 2, 3, 4], [0, 1, 2, 3], [1, 2, 3, 4]],
[[0, 1, 2, 3], [0, 0, 1, 2], [0, 2, 3, 4]]])
aup = a + 2
b = np.array([[[0, 1, 2], [3, 4, 5], [0, 2, 3], [5, 6, 7], [0, 1, 2]],
[[3, 4, 5], [0, 2, 3], [5, 6, 7], [0, 1, 2], [3, 4, 5]],
[[0, 1, 2], [3, 4, 5], [0, 2, 3], [5, 6, 7], [0, 1, 2]],
[[3, 4, 5], [0, 2, 3], [5, 6, 7], [0, 1, 2], [3, 4, 5]],
[[0, 1, 2], [3, 4, 5], [0, 2, 3], [5, 6, 7], [0, 1, 2]]])
for i in range(3):
for j in range(4):
for row_t in range(a[0, i, j], aup[0, i, j]):
for col_t in range(a[1, i, j], aup[1, i, j]):
row = row_t%5
col = col_t%5
b[row, col, i] = 0
主要问题是运行时间,因为我有4个for循环,所以可能很多。是否有任何笨拙的技巧来避免这些for循环并基本达到相同的目的?
更新:
我认为这一切归结为以下问题。首先在轴2上找到a
和aup
的最小值,即
min_a = np.min(a, axis=2)%5 \\ [[1 0 1], [0 0 0]]
max_aup = np.max(aup, axis=2)%5 \\ [[1 0 1], [0 4 1]]
Npw列出在min_a
和min_aup
之间形成的所有元素明智元组。与相应的索引元素:
tuples = [[1, 1, 0], [0, 0, 1], [0, 1, 1], [0, 2, 1], [0, 3, 1],
[0, 4, 1], [1, 0, 2], [1, 1, 2]]
并将这些元组的b
元素设置为0:
b[tuples] = 0
因此,整个问题基本上是有效地找到这些tuples
。
答案 0 :(得分:6)
好的,这是你的循环的矢量化版本。它明确地使用了aup
是a
加上标量偏移的事实:
>>> import numpy as np
>>> a = np.array([[[1, 2, 3, 4], [0, 1, 2, 3], [1, 2, 3, 4]],
... [[0, 1, 2, 3], [0, 0, 1, 2], [0, 2, 3, 4]]])
>>>
>>> aup = a + 2
>>>
>>> b = np.array([[[0, 1, 2], [3, 4, 5], [0, 2, 3], [5, 6, 7], [0, 1, 2]],
... [[3, 4, 5], [0, 2, 3], [5, 6, 7], [0, 1, 2], [3, 4, 5]],
... [[0, 1, 2], [3, 4, 5], [0, 2, 3], [5, 6, 7], [0, 1, 2]],
... [[3, 4, 5], [0, 2, 3], [5, 6, 7], [0, 1, 2], [3, 4, 5]],
... [[0, 1, 2], [3, 4, 5], [0, 2, 3], [5, 6, 7], [0, 1, 2]]])
>>>
>>> b0 = b.copy()
>>>
>>>
>>> for i in range(3):
... for j in range(4):
... for row_t in range(a[0, i, j], aup[0, i, j]):
... for col_t in range(a[1, i, j], aup[1, i, j]):
... row = row_t % 5
... col = col_t % 5
... b[row, col, i] = 0
...
>>> b_loopy = b
>>> b = b0.copy()
>>>
>>> i, j, k, _ = np.ogrid[:2, :2, :3, :0]
>>>
>>> b[(a[0] + i) % 5, (a[1] + j) % 5, k] = 0
>>>
>>> b_vect = b
>>>
>>> np.all(b_vect == b_loopy)
True
对于任意aup > a
,它有点毛茸茸。对于小问题大小,下面的代码比循环稍慢,但扩展得更好,请参阅本文末尾的时间。
import numpy as np
a = np.array([[[1, 2, 3, 4], [0, 1, 2, 3], [1, 2, 3, 4]],
[[0, 1, 2, 3], [0, 0, 1, 2], [0, 2, 3, 4]]])
aup = a + np.random.randint(2, 5, a.shape)
b = np.array([[[0, 1, 2], [3, 4, 5], [0, 2, 3], [5, 6, 7], [0, 1, 2]],
[[3, 4, 5], [0, 2, 3], [5, 6, 7], [0, 1, 2], [3, 4, 5]],
[[0, 1, 2], [3, 4, 5], [0, 2, 3], [5, 6, 7], [0, 1, 2]],
[[3, 4, 5], [0, 2, 3], [5, 6, 7], [0, 1, 2], [3, 4, 5]],
[[0, 1, 2], [3, 4, 5], [0, 2, 3], [5, 6, 7], [0, 1, 2]]])
def setup(i, j, k, l):
b = np.random.randint(1, 10, (i, j, k))
a = np.array(np.unravel_index(np.random.randint(0, i*j, (k, l)), (i, j)))
aup = a + 1 + np.array(np.unravel_index(np.random.randint(
0, (i-2)*(j-2), (k, l)), (i-2, j-2)))
return b, a, aup
def f_loopy(b, a, aup):
b = b.copy()
for i in range(b.shape[-1]):
for j in range(a.shape[-1]):
for row_t in range(a[0, i, j], aup[0, i, j]):
for col_t in range(a[1, i, j], aup[1, i, j]):
row = row_t % b.shape[0]
col = col_t % b.shape[1]
b[row, col, i] = 0
return b
def unwrap_indices(a, aup, shp):
i, j, k = shp
_, k, m = a.shape
ij = np.array((i, j))[:, None]
m *= k
a = a.reshape(2, -1)
aup = aup.reshape(2, -1)
fst, snd = mask = aup > ij
fsti = np.flatnonzero(fst)
sndi = np.flatnonzero(snd)
bthi = fsti[snd[fsti]]
m2 = m + len(fsti)
m3 = m2 + len(sndi)
d = np.empty((2, m3 + len(bthi)), dtype=int)
d[0, m:m2] = aup[0, fsti] - i
d[1, m2:m3] = aup[1, sndi] - j
d[:, m3:] = aup[:, bthi] - ij
d[:, :m] = np.where(mask, ij, aup) - a
d[1, m:m2] = d[1, fsti]
d[0, m2:m3] = d[0, sndi]
aa = np.empty_like(d)
aa[:, :m] = a
aa[1, m:m2] = a[1, fsti]
aa[0, m2:m3] = a[0, sndi]
aa[0, m:m2] = 0
aa[1, m2:m3] = 0
aa[:, m3:] = 0
z = np.empty(aa.shape[1:], dtype=int)
z[:m].reshape(k, -1)[...] = np.arange(k)[:, None]
z[m:m2] = z[fsti]
z[m2:m3] = z[sndi]
z[m3:] = z[bthi]
return aa, d, z
def embed_indices_flat(aa, d, z, shp):
i, j, k = shp
_, m = aa.shape
A = np.ravel_multi_index((aa[0], aa[1], z), shp)
A[1:] -= A[:-1] + np.einsum('ij,i->j', d[:, :-1]-1, (j*k, k))
A1 = (((j+1)-d[1]) * k).repeat(d[0])
A1[0] = A[0]
A1[d[0, :-1].cumsum()] = A[1:]
idx = d[1].repeat(d[0]).cumsum()
A2 = np.full(idx[-1:], k)
A2[0] = A1[0]
A2[idx[:-1]] = A1[1:]
return A2.cumsum()
def f_vect(b, a, aup, switch_strat=20):
b = b.copy()
A, D, Z = unwrap_indices(a, aup, b.shape)
if D.sum() > switch_strat * D.size:
for ai, di, zi in zip(A.T, D.T, Z):
b[ai[0]:ai[0]+di[0], ai[1]:ai[1]+di[1], zi] = 0
else:
b.ravel()[embed_indices_flat(A, D, Z, b.shape)] = 0
return b
from timeit import timeit
print(np.all(f_vect(b, a, aup) == f_loopy(b, a, aup)))
print(timeit(lambda: f_vect(b, a, aup), number=1000))
print(timeit(lambda: f_loopy(b, a, aup), number=1000))
b, a, aup = setup(40, 30, 10, 8)
print(np.all(f_vect(b, a, aup) == f_loopy(b, a, aup)))
print(timeit(lambda: f_vect(b, a, aup), number=1000))
print(timeit(lambda: f_loopy(b, a, aup), number=1000))
示例输出:
True # <- results equal
0.08376670675352216 # <- vectorized
0.062134379986673594 # <- loopy
True # same for larger problem size
0.3771689278073609 #
8.375985411927104 #