如何沿两个轴重复数组的元素?

时间:2011-10-05 04:17:03

标签: python numpy

我想分别沿轴0和轴1重复数组元素M和N次:

import numpy as np

a = np.arange(12).reshape(3, 4)
b = a.repeat(2, 0).repeat(2, 1)
print(b)

[[ 0  0  1  1  2  2  3  3]
 [ 0  0  1  1  2  2  3  3]
 [ 4  4  5  5  6  6  7  7]
 [ 4  4  5  5  6  6  7  7]
 [ 8  8  9  9 10 10 11 11]
 [ 8  8  9  9 10 10 11 11]]

这有效,但我想知道有没有更好的方法而不创建临时数组。

4 个答案:

答案 0 :(得分:10)

您可以使用kronecker产品,请参阅numpy.kron

>>> a = np.arange(12).reshape(3,4)
>>> print np.kron(a, np.ones((2,2), dtype=a.dtype))
[[ 0  0  1  1  2  2  3  3]
 [ 0  0  1  1  2  2  3  3]
 [ 4  4  5  5  6  6  7  7]
 [ 4  4  5  5  6  6  7  7]
 [ 8  8  9  9 10 10 11 11]
 [ 8  8  9  9 10 10 11 11]]

你的原始方法也可以!

答案 1 :(得分:4)

您可以在此处使用 np.broadcast_to

def broadcast_tile(a, h, w):
    x, y = a.shape
    m, n = x * h, y * w
    return np.broadcast_to(
        a.reshape(x, 1, y, 1), (x, h, y, w)
    ).reshape(m, n)

broadcast_tile(a, 2, 2)

array([[ 0,  0,  1,  1,  2,  2,  3,  3],
       [ 0,  0,  1,  1,  2,  2,  3,  3],
       [ 4,  4,  5,  5,  6,  6,  7,  7],
       [ 4,  4,  5,  5,  6,  6,  7,  7],
       [ 8,  8,  9,  9, 10, 10, 11, 11],
       [ 8,  8,  9,  9, 10, 10, 11, 11]])

性能


功能

def chris(a, h, w):
    x, y = a.shape
    m, n = x * h, y * w
    return np.broadcast_to(
        a.reshape(x, 1, y, 1), (x, h, y, w)
    ).reshape(m, n)

def alex_riley(a, b0, b1):
    r, c = a.shape
    rs, cs = a.strides
    x = np.lib.stride_tricks.as_strided(a, (r, b0, c, b1), (rs, 0, cs, 0))
    return x.reshape(r*b0, c*b1)

def paul_panzer(a, b0, b1):
    r, c = a.shape
    out = np.empty((r, b0, c, b1), a.dtype)
    out[...] = a[:, None, :, None]
    return out.reshape(r*b0, c*b1)

def wim(a, h, w):
    return np.kron(a, np.ones((h,w), dtype=a.dtype))

设置

import numpy as np
import pandas as pd
from timeit import timeit

res = pd.DataFrame(
       index=['chris', 'alex_riley', 'paul_panzer', 'wim'],
       columns=[5, 10, 20, 50, 100, 500, 1000],
       dtype=float
)

a = np.arange(100).reshape((10,10))

for f in res.index:
    for c in res.columns:
        h = w = c
        stmt = '{}(a, h, w)'.format(f)
        setp = 'from __main__ import h, w, a, {}'.format(f)
        res.at[f, c] = timeit(stmt, setp, number=50)

输出

enter image description here

答案 2 :(得分:3)

另一种解决方案是使用as_stridedkron比使用repeat两次慢得多。我发现as_strided在很多情况下比双repeat要快得多(小数组[< 250x250],每个维度as_strided只有一倍加倍)。 as_strided技巧如下:

a = arange(1000000).reshape((1000, 1000)) # dummy data

from numpy.lib.stride_tricks import as_strided
N, M = 4,3 # number of time to replicate each point in each dimension
H, W = a.shape
b = as_strided(a, (H, N, W, M), (a.strides[0], 0, a.strides[1], 0)).reshape((H*N, W*M))

这通过使用0长度步幅来工作,这导致numpy多次读取相同的值(直到它到达下一个维度)。最终的reshape会复制数据,但只使用一次,而不是使用将复制数据两次的双repeat

答案 3 :(得分:0)

勘误表:我只考虑2倍的向上采样。

TL; DR事实证明,在OpenCV版本之后,

np.repeat(np.repeat(a, 2, axis=1), 2, axis=0)

是最快的。答案是-如今numpy中没有更快的方法, 但是您可以通过更改轴的顺序来获得一些改进。

如果您不介意OpenCV-

cv.resize(a, None, fx=2, fy=2, interpolation=cv.INTER_NEAREST)

这是测试。

import timeit
import numpy as np
import cv2 as cv
test = np.zeros((16, 16, 3), dtype=np.float32)

def measure(f):
    t = timeit.timeit("f(test)", number=1000, globals={"test": test, "f": f})
    print("%s - %f"%(f.__name__, t))
    return f, t

def fastest(c):
    print(c.__name__)
    winner, t = min((measure(getattr(c, ve)) for ve in dir(c) if ve.startswith("alg_")), key=lambda x: x[1])
    print("%s winner: %s - %f"%(c.__name__, winner.__name__, t))
    return winner

@fastest
class nn:
    def alg_01(a):
        return np.repeat(np.repeat(a, 2, axis=0), 2, axis=1)
    def alg_02(a):
        return np.repeat(np.repeat(a, 2, axis=1), 2, axis=0)
    def alg_03(a):
        b = a[:, None, :, None]
        b = np.concatenate((b, b), axis=1)
        b = np.concatenate((b, b), axis=3)
        return b.reshape(a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:])
    def alg_04(a):
        b = a[:, None, :, None]
        b = np.concatenate((b, b), axis=3)
        b = np.concatenate((b, b), axis=1)
        return b.reshape(a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:])
    def alg_05(a):
        return (a[:, None, :, None]*np.ones((1, 2, 1, 2)+((1,)*len(a.shape[2:])), dtype=np.float32)).reshape(a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:])
    def alg_06(a):
        return cv.resize(a, None, fx=2, fy=2, interpolation=cv.INTER_NEAREST)
    def alg_07(a):
        return a[:, None, :, None][:, (0, 0)][:, :, :, (0, 0)].reshape(a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:])
    def alg_08(a):
        return a[:, None, :, None][:, :, :, (0, 0)][:, (0, 0)].reshape(a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:])
    def alg_09(a):
        return np.kron(a, np.ones((2, 2), dtype=np.float32))
    def alg_10(a):
        return np.broadcast_to(a[:, None, :, None], (a.shape[0], 2, a.shape[1], 2)+a.shape[2:]).reshape(a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:])
    def alg_11(a):
        ret = np.empty((a.shape[0], 2, a.shape[1], 2, *a.shape[2:]), dtype=np.float32)
        ret[...] = a[:, None, :, None]
        ret.resize((a.shape[0]<<1, a.shape[1]<<1, *a.shape[2:]), refcheck=False)
        return ret

结果是:

nn
alg_01 - 0.040967
alg_02 - 0.033744
alg_03 - 0.057969
alg_04 - 0.048739
alg_05 - 0.076595
alg_06 - 0.078638
alg_07 - 0.084692
alg_08 - 0.084539
alg_09 - 0.344339
alg_10 - 0.078707
alg_11 - 0.049424
nn winner: alg_02 - 0.033744