我想将索引数组划分为随机大小的块(取自有限范围的可能大小),这些块也在彼此之间进行混洗。我尝试了以下我发现here,但它集中在同样大小的块上。
a = np.arange(1, 100)
def chunk(xs, n): # to chunk the array xs in n parts
ys = list(xs)
random.shuffle(ys)
size = len(ys) // n
leftovers= ys[size*n:]
for c, xtra in enumerate(leftovers):
yield ys[c*size:(c+1)*size] + [ xtra ]
for c in xrange(c+1,n):
yield ys[c*size:(c+1)*size]
换句话说,我怎样才能将上述函数改变为具有一定数量的块(随机数和彼此之间的混洗),其中可变大小是从一个范围中随机取的,例如, [5-10]
答案 0 :(得分:1)
这个工作:
from itertools import chain
import numpy as np
a = np.arange(1, 100)
def chunk(xs, nlow, nhigh, shuffle=True):
xs = np.asarray(xs)
if shuffle:
# shuffle, if you want
xs = xs.copy()
np.random.shuffle(xs)
# get at least enough random chunk sizes in the specified range, ie nlow <= n <= nhigh
ns = np.random.randint(nlow, nhigh+1, size=xs.size//nlow)
# add up the chunk sizes to get the indices at which we'll slice up the input array
ixs = np.add.accumulate(ns)
# truncate ixs so that its contents are all valid indices with respect to xs
ixs = ixs[:np.searchsorted(ixs, xs.size)]
# yield slices from the input array
for start,end in zip(chain([None], ixs), chain(ixs, [None])):
yield xs[start:end]
list(chunk(a, 5, 10))
输出:
[array([67, 79, 17, 62, 12, 37, 70, 24]),
array([98, 48, 88, 59, 47]),
array([52, 60, 89, 23, 43, 44]),
array([ 7, 27, 33, 74, 49, 2]),
array([ 6, 51, 40, 13, 56, 45]),
array([31, 3, 55, 10, 11, 46, 9, 42, 34]),
array([53, 22, 95, 41, 19, 32, 4, 69, 86]),
array([93, 68, 57, 65, 92, 76, 28, 63, 64, 58]),
array([91, 66, 18, 99, 21]),
array([36, 83, 15, 78, 1, 81, 97, 84]),
array([61, 71, 25, 94, 87, 20, 85, 38]),
array([ 8, 96, 75, 30, 77, 14, 72, 29]),
array([35, 90, 82, 73, 39, 5, 26, 50, 16]),
array([80, 54])]
我的原始答案并未对最终块的大小设置下限,因此有时它会小于指定的值(尽管从不大)。据我所知,没有直接的方法可以解决这个问题。但是,通常只需拒绝来自该区域的任何样本,即可从随机分布中删除不需要的区域。换句话说,你可以确保最后一个块足够大,只需抛出任何不在的块:
def getIxs(xsize, nlow, nhigh):
# get at least enough random chunk sizes in the specified range, ie nlow <= n <= nhigh
ns = np.random.randint(nlow, nhigh+1, size=xsize//nlow)
# add up the chunk sizes to get the indices at which we'll slice up the input array
ixs = np.add.accumulate(ns)
# truncate ixs so that its contents are all valid indices with respect to xs
ixs = ixs[:np.searchsorted(ixs, xsize)]
return ixs
def chunk(xs, nlow, nhigh):
xs = np.asarray(xs)
ixs = getIxs(xs.size, nlow, nhigh)
# rerun getIxs until the size of the final chunk is large enough
while (xs.size - ixs[-1]) < nlow:
ixs = getIxs(xs.size, nlow, nhigh)
# yield slices from the input array
for start,end in zip(chain([None], ixs), chain(ixs, [None])):
yield xs[start:end]
这种方法应该保留每个块大小的整体随机性。
答案 1 :(得分:0)
您可以使用np.split(array,indices)
import random
a = np.arange(100)
np.random.shuffle(a)
ind = sorted(random.sample(range(len(a)),k=np.random.randint(low=1,high=10)))
np.split(a,ind)
[array([41, 19, 85, 51, 34]),
array([71, 27]),
array([36, 16, 18, 74, 43, 96, 45, 97, 54, 75, 89, 48, 33, 32, 63, 98, 5,
80, 30, 17, 86, 14, 67]),
array([ 9, 70, 84, 99, 39]),
array([59, 20, 78, 61, 49, 37, 93]),
array([ 1, 79, 81, 69, 40, 42, 29, 8, 3, 68, 87, 66, 4, 21, 91, 92, 31]),
array([83, 15, 56, 2, 64, 95, 12, 0, 90, 77, 57, 60, 38, 76, 94, 22, 24,
6, 46, 65, 50, 62, 28, 44, 73, 13, 26, 72, 7, 53, 82, 47, 58, 35,
52, 25, 88, 11, 10, 55, 23])]