import numpy as np
import pandas as pd


x = np.array(



预期输出:2D ndarray


我想为2d ndarray(INPUT1)中的每个True设置范围值(垂直向量)。为此有一些有用的API或解决方案吗?

  1. 在每个True值上进行蛮力循环并分配切片,然后
  2. 使用单个索引分配来替换必要的值。



import numpy as np

x = np.array(
y = np.array([1,2,3,4])
refout = np.array([[0,0,0,0,1],

# alternative input with arbitrary size:
# N = 100; x = np.random.rand(N,N) < 0.2; y = np.arange(1,N)

def looping_clip(x, y):
    """Loop over Trues, use clipped slices"""
    nmax = x.shape[0]
    n = y.size

    # initialize output
    out = np.zeros_like(x, dtype=y.dtype)
    # loop over True values
    for i,j in zip(*x.nonzero()):
        # truncate right-hand side where necessary
        out[i:i+n, j] = y[:nmax-i]
    return out

def looping_expand(x, y):
    """Loop over Trues, use an expanded buffer"""
    n = y.size
    nmax,mmax = x.shape
    ivals,jvals = x.nonzero()

    # initialize buffed-up output
    out = np.zeros((nmax + max(n + ivals.max() - nmax,0), mmax), dtype=y.dtype)
    # loop over True values
    for i,j in zip(ivals, jvals):
        # slice will always be complete, i.e. of length y.size
        out[i:i+n, j] = y
    return out[:nmax, :].copy() # rather not return a view to an auxiliary array

def index_2d(x, y):
    """Assign directly with 2d indices, use an expanded buffer"""
    n = y.size
    nmax,mmax = x.shape
    ivals,jvals = x.nonzero()

    # initialize buffed-up output
    out = np.zeros((nmax + max(n + ivals.max() - nmax,0), mmax), dtype=y.dtype)

    # now we can safely index for each "(ivals:ivals+n, jvals)" so to speak
    upped_ivals = ivals[:,None] + np.arange(n) # shape (ntrues, n)
    upped_jvals = jvals.repeat(y.size).reshape(-1, n) # shape (ntrues, n)

    out[upped_ivals, upped_jvals] = y # right-hand size of shape (n,) broadcasts

    return out[:nmax, :].copy() # rather not return a view to an auxiliary array

def index_1d(x,y):
    """Assign using linear indices, use an expanded buffer"""
    n = y.size
    nmax,mmax = x.shape
    ivals,jvals = x.nonzero()

    # initialize buffed-up output
    out = np.zeros((nmax + max(n + ivals.max() - nmax,0), mmax), dtype=y.dtype)

    # grab linear indices corresponding to Trues in a buffed-up array
    inds = np.ravel_multi_index((ivals, jvals), out.shape)

    # now all we need to do is start stepping along rows for each item and assign y
    upped_inds = inds[:,None] + mmax*np.arange(n) # shape (ntrues, n)

    out.flat[upped_inds] = y  # y of shape (n,) broadcasts to (ntrues, n)

    return out[:nmax, :].copy() # rather not return a view to an auxiliary array

# check that the results are correct
print(all([np.array_equal(refout, looping_clip(x,y)),
           np.array_equal(refout, looping_expand(x,y)),
           np.array_equal(refout, index_2d(x,y)),
           np.array_equal(refout, index_1d(x,y))]))


  1. looping_clip遍历输入中的每个True值,并分配给输出中的相应切片。我们会在右侧注意缩短分配的数组,以防止切片的一部分沿第一维超出数组的边缘。
  2. looping_expand遍历输入中的每个True值,并在分配填充后的输出数组以确保每个分片都满后将其分配给输出中的相应 full 分片。分配更大的输出数组时,我们会做更多的工作,但是不必缩短赋值的右边。我们可以在最后一步中省略.copy()调用,但是我不希望不返回非平凡的数组(即,对辅助数组的视图,而不是正确的副本),因为这可能导致用户难以理解的意外。
  3. index_2d计算要分配给每个值的2d索引,并假定重复索引将按顺序处理。这不能保证! (稍后会对此进行更多介绍。)
  4. index_1d使用线性索引并索引到输出的flatiter中。


timings: indexing versions are only faster for medium-sized inputs of N around 10-150


只是使上述情况更糟,请注意,索引版本假定按顺序处理花式索引方案中的重复索引,因此当处理True值(在数组中为“较低”)时,将使用先前值根据您的要求将被覆盖。只有一个问题:this is not guaranteed




我们可以通过自己消除重复的索引来使分配更安全。为此,我们可以在相应的问题上使用this answer by Divakar

def index_1d_safe(x,y):
    """Same as index_1d but use Divakar's safe solution for reducing duplicates"""
    n = y.size
    nmax,mmax = x.shape
    ivals,jvals = x.nonzero()

    # initialize buffed-up output
    out = np.zeros((nmax + max(n + ivals.max() - nmax,0), mmax), dtype=y.dtype)

    # grab linear indices corresponding to Trues in a buffed-up array
    inds = np.ravel_multi_index((ivals, jvals), out.shape)

    # now all we need to do is start stepping along rows for each item and assign y
    upped_inds = inds[:,None] + mmax*np.arange(n) # shape (ntrues, n)

    # now comes https://stackoverflow.com/a/44672126
    # need additional step: flatten upped_inds and corresponding y values for selection
    upped_flat_inds = upped_inds.ravel() # shape (ntrues, n) -> (ntrues*n,)
    y_vals = np.broadcast_to(y, upped_inds.shape).ravel() # shape (ntrues, n) -> (ntrues*n,)

    sidx = upped_flat_inds.argsort(kind='mergesort')
    sindex = upped_flat_inds[sidx]
    idx = sidx[np.r_[np.flatnonzero(sindex[1:] != sindex[:-1]), upped_flat_inds.size-1]]
    out.flat[upped_flat_inds[idx]] = y_vals[idx]

    return out[:nmax, :].copy() # rather not return a view to an auxiliary array


updated timing figure: safe 1d indexing case is always slower
