如何找到具体数字的最大索引和最小索引列numpy

时间:2017-08-31 14:03:19

标签: python arrays numpy

我有像

这样的numpy数组
m = np.array([[0,0,0,0,0],
              [0,0,0,1,1],
              [0,1,0,1,0],
              [0,1,0,1,0],
              [0,0,1,1,1],])

(m == 1).argmax(0)会给array([0, 2, 4, 1, 1])。是否有任何类似的函数可以在每列中获得1的最小索引和最大索引。即

array([[ nan, 2., 4., 1., 1.], [ nan, 3., 4., 4., 4.]])

3 个答案:

答案 0 :(得分:3)

一种方法是 -

mask = m==1
mask_cumsum = mask.cumsum(0)

valid_mask = mask.any(0)
min_idx = (mask_cumsum==1).argmax(0)
max_idx = mask_cumsum.argmax(0)
min_max_idx = np.vstack((min_idx, max_idx))

out = np.where(valid_mask, min_max_idx, np.nan)

答案 1 :(得分:2)

这不是一个单一的功能,但你可以把它包装成一个,它应该给出预期的回报:

def argwhere_both_sides_with_nan(arr):
    ones = arr == 1
    min_r = ones[::-1].argmax(0)  # reversed columns
    max_n = ones.argmax(0)
    res = np.array([max_n, arr.shape[0] - 1 - min_r], dtype=float)
    res[:, ~ones.any(0)] = np.nan
    return res

>>> argwhere_both_sides_with_nan(m)
array([[ nan,   2.,   4.,   1.,   1.],
       [ nan,   3.,   4.,   4.,   4.]])

如果你有numba,你可以在这里使用它,这可以加快这一点:

import numba as nb

@nb.njit
def first_and_last_index(arr, val):
    rows, cols = arr.shape
    ret = np.full((2, cols), np.nan)
    for row_idx in range(rows):
        for col_idx in range(cols):
            if arr[row_idx, col_idx] == val:
                if np.isnan(ret[0, col_idx]):
                    ret[0, col_idx] = row_idx
                ret[1, col_idx] = row_idx
    return ret

>>> first_and_last_index(m, 1)
array([[ nan,   2.,   4.,   1.,   1.],
       [ nan,   3.,   4.,   4.,   4.]])

时序:

@nb.njit
def first_and_last_index(arr, val):
    rows, cols = arr.shape
    ret = np.full((2, cols), np.nan)
    for row_idx in range(rows):
        for col_idx in range(cols):
            if arr[row_idx, col_idx] == val:
                if np.isnan(ret[0, col_idx]):
                    ret[0, col_idx] = row_idx
                ret[1, col_idx] = row_idx
    return ret

def argwhere_both_sides_with_nan(arr):
    ones = arr == 1
    min_r = ones[::-1].argmax(0)  # reversed columns
    max_n = ones.argmax(0)
    res = np.array([max_n, arr.shape[0] - 1 - min_r], dtype=float)
    res[:, ~ones.any(0)] = np.nan
    return res

def divakars(m):
    mask = m==1
    mask_cumsum = mask.cumsum(0)

    valid_mask = mask.any(0)
    min_idx = (mask_cumsum==1).argmax(0)
    max_idx = mask_cumsum.argmax(0)
    min_max_idx = np.vstack((min_idx, max_idx))

    out = np.where(valid_mask, min_max_idx, np.nan)
    return out

m = np.array([[0, 0, 0, 0, 0],
              [0, 0, 0, 1, 1],
              [0, 1, 0, 1, 0],
              [0, 1, 0, 1, 0],
              [0, 0, 1, 1, 1],
              [0, 0, 1, 1, 1]])

np.testing.assert_array_equal(first_and_last_index(m, 1), argwhere_both_sides_with_nan(m))
np.testing.assert_array_equal(first_and_last_index(m, 1), divakars(m))
%timeit first_and_last_index(m, 1)
# 6.77 µs ± 178 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
%timeit argwhere_both_sides_with_nan(m)
# 121 µs ± 3.57 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
%timeit divakars(m)
# 138 µs ± 4.24 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)

m = (np.random.random((1000, 1000)) > 0.5).astype(np.int64)

np.testing.assert_array_equal(first_and_last_index(m, 1), argwhere_both_sides_with_nan(m))
np.testing.assert_array_equal(first_and_last_index(m, 1), divakars(m))
%timeit first_and_last_index(m, 1)
# 10 ms ± 248 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
%timeit argwhere_both_sides_with_nan(m)
# 12.8 ms ± 393 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
%timeit divakars(m)
# 67.2 ms ± 2.05 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)

答案 2 :(得分:0)

翻转它的另一个解决方案:

import numpy as np
a = list((m == 1).argmax(0))
b = (np.flipud(m) == 1).argmax(0)

# a is your first output
c = [len(m) - elt - 1 for elt in b]

# c is your second output

当前输出:

a = [0, 2, 4, 1, 1]
c = [4, 3, 4, 4, 4]

只需要修复完整的0列的问题。