基本上我在java脚本中有一个二维数组,以对象的形式,概率:
A,10 B,50 C,20 d,20
其中数字是要选择的项目中100的可能性。
最有效的算法是按照指定的概率随机选择项目吗?
(目前我从1-100得到一个随机数,并根据之前项目的总和选择一项,所以5将获得A,40 B,65 C等。)
谢谢!
中号
答案 0 :(得分:1)
好吧,如果效率是一个问题,你的概率是整数,你可以创建一个这样的查找表:
// I assume the probabilities are in an object probs
// in the form { "A": 10, "B": 90 }
var lut = new Array(100);
var lutIndex = 0;
for (var key in probs) {
for (var i=0 ; i<probs[key] ; i++) {
lut[lutIndex] = key;
lutIndex++;
}
}
// now, you can get the random object -- represented by a letter -- like this
var randomObject = lut[randomNumber];
答案 1 :(得分:0)
我们假设您的输入格式为
var probs = [['A' ,10], ['B', 50], ['C', 20], ['D', 20]];
计算累积概率:
var cur = 0;
var cumulative = probs.map(elt => cur += elt[1]);
这将创建一个数组
[10, 60, 80, 100]
编写一个函数来查找哪个元素对应于特定的概率:
function get_element(prob) {
for (var i = 0; i < cumulative.length; i++) {
if (prob <= cumulative[i]) return i;
}
}
现在写一下
probs[get_element(Math.random() * 100)] [0]
答案 2 :(得分:0)
如果您不想创建查找表,可以快速确定如下对象:
// I assume the probabilities are in an object probs
// in the form { "A": 10, "B": 90 }
function getObjectForRandomInt(i) {
var sum = 0;
for (var key in probs) {
if (i <= sum) return key;
sum += probs[key];
}
}
请注意,遍历probs
的键的顺序并不重要(对象字段在javascript中是无序的)!即使订单每次都改变,它仍然可以正常工作。如果你不相信,想象你可以强迫某个命令扭曲结果。如果不事先知道下一个随机int i
,这是不可能的。
答案 3 :(得分:0)
几周前,我确实发现了一个非常棒的解决方案。有一种叫做alias method的方法,它在O(n)中设置并在O(1)中运行,创建一对数组,将你的每个任意概率选择变成一个有偏差的硬币数组。我的JavaScript实现如下:
function generate_random_selector(input_probabilities) {
// Generates a random selector function using the Alias Method
// for discrete probability distributions (see
// https://en.wikipedia.org/wiki/Alias_method for an explanation)
var i = 0;
var probabilities = [], aliases = [];
var probSum = 0;
// Fill the probabilities array
// Slightly modify this call for your own input structure
// If you need to return names instead of numbers, create
// an indexed array of names during this step.
input_probabilities.forEach(function(p) {
probabilities.push(p);
probSum += p;
});
// Normalize all probabilities to average to 1
// and categorize each probability as to where it fits
// in that scale
var probMultiplier = probabilities.length / probSum;
var overFull = [], underFull = [];
probabilities = probabilities.map(function(p, i) {
var newP = p * probMultiplier;
if (newP > 1) overFull.push(i);
else if (newP < 1) underFull.push(i);
else if (newP !== 1) {
throw "Non-numerical value got into probabilities";
}
return newP;
});
overFull.sort();
underFull.sort();
// Process both queues by having each under-full entry
// have the rest of its space occupied by the fullest
// over-full entry, re-categorizing the over-full entry
// as needed
while (overFull.length > 0 || underFull.length > 0) {
if (!(overFull.length > 0 && underFull.length > 0)) {
// only reached due to rounding errors.
// Just assign all the remaining probabilities to 1
var notEmptyArray = overFull.length > 0 ? overFull : underFull;
notEmptyArray.forEach(function(index) {
probabilities[index] = 1;
});
break; // get out of the while loop
}
aliases[underFull[0]] = overFull[0];
probabilities[overFull[0]] += probabilities[underFull[0]] - 1;
underFull.shift();
if (probabilities[overFull[0]] > 1) overFull.push(overFull.shift());
else if (probabilities[overFull[0]] < 1) underFull.push(overFull.shift());
else overFull.shift();
}
return function() {
var index = Math.floor(Math.random() * probabilities.length);
// If you need to return a name and created an LUT during
// the preparation code, set the index attained below
// to a variable and return lut[index];
return Math.random() < probabilities[index] ? index : aliases[index];
}
}
var selector = generate_random_selector([10, 50, 20, 20]);
var sample = selector();
以下是它的工作原理:
要构建这两个表,我们执行以下操作:
此算法在O(n)时间运行,并始终保证使用任意精度十进制数正确终止 - 每次迭代将至少一个项目放入完全完整的类别,最后一个项目放入两者。如果其中一个类别在另一个类别之前清空,则由于舍入误差较小,您仍然可以通过将剩余概率设置为1来获得强大的结果。
可能最好的部分是它为你做扩展 。你可以拥有任何数量的这样的元素几乎任何精度(包括实数精度),并且它们甚至不需要加到1或100或其他任何你得到它们。