如何通过给定大小和期望的总和获得随机数列表,完全支持
我有一个代码sum-int.ts sum-float.ts internal/sum-num.ts
我想做的事
但现在出现了问题
thx @SeverinPappadeux用于int版本,以及用于float的想法
{ size: 2, sum: 5, min: 0, max: 5, n: 5, maxv: 5 }
true 0 5 [ 2, 3 ] 0 [ 2, 3 ]
{ bool: true,
ret_a: [ 2, 3 ],
a_sum: 5,
ret_b: [ 2, 3 ],
b_sum: 5 }
[ 2, 3 ] 5
----------
{ size: 6, sum: 13, min: -8, max: 15, n: 61, maxv: 23 }
false 0 61 [ 9, 8, 7, 3, 6, 28 ] -8 [ 9, 8, 7, 3, 6, 28 ]
false 1 61 [ 11, 9, 7, 4, 5, 25 ] -8 [ 11, 9, 7, 4, 5, 25 ]
true 2 13 [ 1, -1, 0, -2, 2, 13 ] -8 [ 9, 7, 8, 6, 10, 21 ]
{ bool: true,
ret_a: [ 9, 7, 8, 6, 10, 21 ],
a_sum: 61,
ret_b: [ 1, -1, 0, -2, 2, 13 ],
b_sum: 13 }
[ 1, -1, 0, -2, 2, 13 ] 13
----------
{ size: 6, sum: -13, min: -8, max: 15, n: 35, maxv: 23 }
true 0 -13 [ 0, -6, -1, -4, -7, 5 ] -8 [ 8, 2, 7, 4, 1, 13 ]
{ bool: true,
ret_a: [ 8, 2, 7, 4, 1, 13 ],
a_sum: 35,
ret_b: [ 0, -6, -1, -4, -7, 5 ],
b_sum: -13 }
[ 0, -6, -1, -4, -7, 5 ] -13
{ size: 6, sum: 0, min: -8, max: 15, n: 48, maxv: 23 }
true 0 0 [ -1, 0, -3, -2, -4, 10 ] -8 [ 7, 8, 5, 6, 4, 18 ]
{ bool: true,
ret_a: [ 7, 8, 5, 6, 4, 18 ],
a_sum: 48,
ret_b: [ -1, 0, -3, -2, -4, 10 ],
b_sum: 0 }
[ -1, 0, -3, -2, -4, 10 ] 0
/**
* not support unique, but will try make unique if can
* thx @SeverinPappadeux for int version
*
* @see https://stackoverflow.com/questions/53279807/how-to-get-random-number-list-with-fixed-sum-and-size
*/
export function coreFnRandSumInt(argv: ISumNumParameterWuthCache)
{
let {
random,
size,
sum,
min,
max,
} = argv;
let sum_1_to_size = sum_1_to_n(size);
sum = isUnset(sum) ? sum_1_to_size : sum;
expect(sum).integer();
min = isUnset(min) ? (sum > 0 ? 0 : sum) : min;
max = isUnset(max) ? Math.abs(sum) : max;
expect(min).integer();
expect(max).integer();
let n_sum = Math.abs(sum - size * min);
let maxv = max - min;
/*
console.log({
sum_1_to_size,
size,
sum,
min,
max,
n_sum,
maxv,
});
*/
if (sum > 0)
{
expect(sum).gt(min)
}
/**
* pre-check
*/
//expect(maxv, `(max - min) should > sum_1_to_size`).gte(sum_1_to_size);
/**
* probabilities
*/
let prob = get_prob(size, maxv);
expect(prob).is.array.lengthOf(size);
/**
* make rmultinom use with random.next
*/
let rmultinomFn = libRmath.Multinomial(fakeLibRmathRng(random.next)).rmultinom;
/**
* low value for speed up, but more chance fail
*/
let n_len = argv.limit || 5 || n_sum;
/**
* rebase number
*/
let n_diff: number = min;
const rmultinomCreateFn = (n_len: number) => {
return (rmultinomFn(n_len, n_sum, prob) as number[][])
.reduce((a, value) =>
{
let i = value.length;
let b_sum = 0;
let bool = false;
let unique_len = 0;
while(i--)
{
let v = value[i];
let n = v + n_diff;
if (value.indexOf(v) === i)
{
unique_len++;
}
if (n >= min && n <= max)
{
bool = true;
value[i] = n;
b_sum += n
}
else
{
bool = false;
break;
}
}
if (bool && b_sum === sum)
{
let item = {
value,
unique_len,
b_sum,
bool,
};
a.push(item)
}
return a
}, [] as {
value: number[],
unique_len: number,
b_sum: number,
bool: boolean,
}[])
.sort((a, b) => b.unique_len - a.unique_len)
;
};
/**
* pre-make fail-back value
*/
const cache_max = 10;
let cache: number[][] = [];
{
let len = 200;
let arr = array_unique(rmultinomCreateFn(len));
if (arr.length)
{
let i = Math.min(cache_max, arr.length);
while(i--)
{
cache.push(arr[i].value)
}
cache = array_unique(cache.map(v => v.sort()))
}
arr = undefined;
// console.log(cache);
}
/**
* try reset memory
*/
argv = undefined;
return () =>
{
let arr = rmultinomCreateFn(n_len);
let ret_b: number[];
let bool_toplevel: boolean;
let c_len = cache.length;
if (arr.length)
{
ret_b = arr[0].value;
bool_toplevel = arr[0].bool;
if (bool_toplevel && c_len < cache_max)
{
cache.push(ret_b);
}
}
else if (c_len)
{
let i = UtilDistributions.randIndex(random, c_len);
ret_b = cache[i];
bool_toplevel = true;
}
if (!bool_toplevel || !ret_b)
{
throw new Error(`can't generator value by current input argv, or try set limit for high number`)
}
return ret_b;
}
}
答案 0 :(得分:2)
好的,就在这里。让我们从整数问题开始。最简单的方法是使用统计分布,它是自然产生的一组数字,总和为一个固定值。并且有这样的分布-Multinomial distribution。它具有等于n
的固定总和,它提供从0到n
的采样值。由于要求采样间隔是任意的,因此我们首先将间隔最小移为0,然后将其移回。请注意,采样值可能会高于所需的最大值,因此我们必须使用剔除技术,其中任何大于max的样本都将被剔除,并进行下一次尝试。
我们使用来自Python / Numpy的多项采样。除了拒绝,您还可以添加唯一性测试。代码,python 3.7
import numpy as np
def sample_sum_interval(n: int, p, maxv: int):
while True:
q = np.random.multinomial(n, p, size=1)[0]
v = np.where(q > maxv)
if len(v[0]) == 0: # if len(v) > 0, some values are outside the range, reject
# test on unique if len(np.unique(q)) == len(q)
return q
return None
k = 6
min = -8
max = 13
sum = 13
n = sum - k*min # redefined sum
maxv = max - min # redefined max, min would be 0
p = np.full((k), np.float64(1.0)/np.float64(k), dtype=np.float64) # probabilities
q = sample_sum_interval(n, p, maxv) + min # back to original interval
print(q)
print(np.sum(q))
print(np.mean(q))
q = sample_sum_interval(n, p, maxv) + min
print(q)
print(np.sum(q))
print(np.mean(q))
q = sample_sum_interval(n, p, maxv) + min
print(q)
print(np.sum(q))
print(np.mean(q))
输出
[ 5 0 -2 3 3 4]
13
2.1666666666666665
[ 3 3 -1 2 1 5]
13
2.1666666666666665
[-4 0 0 3 10 4]
13
2.1666666666666665
要将其转换为Javascript,您将需要多项式采样或二项式采样,从二项式很容易就可以得到多项式。
更新
好的,当我不将min
添加到结果中时,输出为总和为61(13 + 6 * 8),
范围[0 ... 21]
[11 7 6 8 9 20]
61
10.166666666666666
[ 5 10 14 13 14 5]
61
10.166666666666666
[ 9 12 7 15 7 11]
61
10.166666666666666
显然,有一个Javascript library with multinomial sampling,它是根据 R 建模的, 谢谢@bluelovers
应该在循环中这样调用:
v = rmultinom(1, n, p);
然后应检查v
是否在[0 ... maxv]范围内,如果超出则接受或拒绝。
UPDATE II
让我迅速(对不起,真的没有时间,明天再讨论)描述我将如何为花车做主意的想法。在[0 ... 1]范围内生成的数字束也有类似的分布,称为Dirichlet distribution,并且总和始终为固定值1。在Python / Numpy中,它的值为https://docs.scipy.org/doc/numpy-1.15.1/reference/generated/numpy.random.dirichlet.html。>
假设我从Dirichlet采样了n
个数字d i ,然后将它们映射到[min ... max]间隔:
x i = min + d i *(max-min)
然后,我使用所有d i 总计为1的属性对它们全部求和:
Sum = n * min +(max-min)=(n-1)* min + max
如果Sum
是固定的,则意味着我们必须重新定义最大值-称其为采样max s 。
所以采样过程如下-从Dirichlet采样n
[0 ... 1]个数字,将它们映射到[min ... max s ]间隔,然后检查如果这些数字中的任何一个低于期望的max
(原始的,未重新定义)。如果是,则您接受,否则拒绝,例如在整数情况下。
下面的代码
import numpy as np
def my_dirichlet(n: int):
"""
This is equivalent to numpy.random.dirichlet when all alphas are equal to 1
"""
q = np.random.exponential(scale = np.float64(1.0), size=n)
norm = 1.0/np.sum(q)
return norm * q
def sample_sum_interval(n: int, summa: np.float64, minv: np.float64, maxv: np.float64):
maxs = summa - np.float64(n-1)*minv # redefine maximum value of the interval is sum is fixed
alpha = np.full(n, np.float64(1.0), dtype=np.float64)
while True:
q = my_dirichlet(n) # q = np.random.dirichlet(alpha)
q = minv + q*(maxs - minv) # we map it to [minv...maxs]
v = np.where(q > maxv) # but we need it in the [minv...maxv], so accept or reject test
if len(v[0]) == 0: # if len(v) > 0, some values are outside the range, reject, next sample
return q
return None
n = 5
min = np.float64(-2.0)
max = np.float64(3.0)
sum = np.float64(1.0)
q = sample_sum_interval(n, sum, min, max)
print(q)
print(np.sum(q))
q = sample_sum_interval(n, sum, min, max)
print(q)
print(np.sum(q))
q = sample_sum_interval(n, sum, min, max)
print(q)
print(np.sum(q))
我放置了标准NumPy Dirichlet采样以及自定义Dirichlet采样。显然, libRmath.js 具有指数分布抽样,但没有Dirichlet,但可以用用户定义的代码和指数代替。请记住,NumPy使用单个运算符对向量进行运算,循环是隐式的。
输出:
[-0.57390094 -1.80924001 0.47630282 0.80008638 2.10675174]
1.0000000000000013
[-1.12192892 1.18503129 0.97525135 0.69175429 -0.73010801]
0.9999999999999987
[-0.34803521 0.36499743 -1.165332 0.9433809 1.20498888]
0.9999999999999991