我有像
这样的numpy数组m = np.array([[0,0,0,0,0],
[0,0,0,1,1],
[0,1,0,1,0],
[0,1,0,1,0],
[0,0,1,1,1],])
(m == 1).argmax(0)
会给array([0, 2, 4, 1, 1])
。是否有任何类似的函数可以在每列中获得1的最小索引和最大索引。即
array([[ nan, 2., 4., 1., 1.], [ nan, 3., 4., 4., 4.]])
答案 0 :(得分:3)
一种方法是 -
mask = m==1
mask_cumsum = mask.cumsum(0)
valid_mask = mask.any(0)
min_idx = (mask_cumsum==1).argmax(0)
max_idx = mask_cumsum.argmax(0)
min_max_idx = np.vstack((min_idx, max_idx))
out = np.where(valid_mask, min_max_idx, np.nan)
答案 1 :(得分:2)
这不是一个单一的功能,但你可以把它包装成一个,它应该给出预期的回报:
def argwhere_both_sides_with_nan(arr):
ones = arr == 1
min_r = ones[::-1].argmax(0) # reversed columns
max_n = ones.argmax(0)
res = np.array([max_n, arr.shape[0] - 1 - min_r], dtype=float)
res[:, ~ones.any(0)] = np.nan
return res
>>> argwhere_both_sides_with_nan(m)
array([[ nan, 2., 4., 1., 1.],
[ nan, 3., 4., 4., 4.]])
如果你有numba,你可以在这里使用它,这可以加快这一点:
import numba as nb
@nb.njit
def first_and_last_index(arr, val):
rows, cols = arr.shape
ret = np.full((2, cols), np.nan)
for row_idx in range(rows):
for col_idx in range(cols):
if arr[row_idx, col_idx] == val:
if np.isnan(ret[0, col_idx]):
ret[0, col_idx] = row_idx
ret[1, col_idx] = row_idx
return ret
>>> first_and_last_index(m, 1)
array([[ nan, 2., 4., 1., 1.],
[ nan, 3., 4., 4., 4.]])
@nb.njit
def first_and_last_index(arr, val):
rows, cols = arr.shape
ret = np.full((2, cols), np.nan)
for row_idx in range(rows):
for col_idx in range(cols):
if arr[row_idx, col_idx] == val:
if np.isnan(ret[0, col_idx]):
ret[0, col_idx] = row_idx
ret[1, col_idx] = row_idx
return ret
def argwhere_both_sides_with_nan(arr):
ones = arr == 1
min_r = ones[::-1].argmax(0) # reversed columns
max_n = ones.argmax(0)
res = np.array([max_n, arr.shape[0] - 1 - min_r], dtype=float)
res[:, ~ones.any(0)] = np.nan
return res
def divakars(m):
mask = m==1
mask_cumsum = mask.cumsum(0)
valid_mask = mask.any(0)
min_idx = (mask_cumsum==1).argmax(0)
max_idx = mask_cumsum.argmax(0)
min_max_idx = np.vstack((min_idx, max_idx))
out = np.where(valid_mask, min_max_idx, np.nan)
return out
m = np.array([[0, 0, 0, 0, 0],
[0, 0, 0, 1, 1],
[0, 1, 0, 1, 0],
[0, 1, 0, 1, 0],
[0, 0, 1, 1, 1],
[0, 0, 1, 1, 1]])
np.testing.assert_array_equal(first_and_last_index(m, 1), argwhere_both_sides_with_nan(m))
np.testing.assert_array_equal(first_and_last_index(m, 1), divakars(m))
%timeit first_and_last_index(m, 1)
# 6.77 µs ± 178 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
%timeit argwhere_both_sides_with_nan(m)
# 121 µs ± 3.57 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
%timeit divakars(m)
# 138 µs ± 4.24 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
m = (np.random.random((1000, 1000)) > 0.5).astype(np.int64)
np.testing.assert_array_equal(first_and_last_index(m, 1), argwhere_both_sides_with_nan(m))
np.testing.assert_array_equal(first_and_last_index(m, 1), divakars(m))
%timeit first_and_last_index(m, 1)
# 10 ms ± 248 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
%timeit argwhere_both_sides_with_nan(m)
# 12.8 ms ± 393 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
%timeit divakars(m)
# 67.2 ms ± 2.05 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
答案 2 :(得分:0)
翻转它的另一个解决方案:
import numpy as np
a = list((m == 1).argmax(0))
b = (np.flipud(m) == 1).argmax(0)
# a is your first output
c = [len(m) - elt - 1 for elt in b]
# c is your second output
当前输出:
a = [0, 2, 4, 1, 1]
c = [4, 3, 4, 4, 4]
只需要修复完整的0列的问题。