我正在用HTML5 / JS编写一个测验/教学游戏,其中玩家可以从更大的精通集中获得一组10个问题。游戏会随着时间的推移跟踪玩家的得分,并且更有可能从问题列表中选择玩家遇到问题的问题。
为了构建概率分布列表,我使用下面的alias method,它在O(1)时间内选择一个项目,同时完全遵守分布:
function generate_random_question_selector() {
// Generates a random selector function using the Alias Method
// for discrete probability distributions (see
// https://en.wikipedia.org/wiki/Alias_method for an explanation)
var i = 0;
var probabilities = [], aliases = [];
var probSum = 0;
/* ... Business logic to fill probabilities array ... */
// Normalize all probabilities to average to 1
// and categorize each probability as to where it fits
// in that scale
var probMultiplier = probabilities.length / probSum;
var overFull = [], underFull = [];
probabilities = probabilities.map(function(p, i) {
var newP = p * probMultiplier;
if (newP > 1) overFull.push(i);
else if (newP < 1) underFull.push(i);
else if (newP !== 1) {
throw "Non-numerical value got into scores";
}
return newP;
});
overFull.sort();
underFull.sort();
// Process both queues by having each under-full entry
// have the rest of its space occupied by the fullest
// over-full entry, re-categorizing the over-full entry
// as needed
while (overFull.length > 0 || underFull.length > 0) {
if (!(overFull.length > 0 && underFull.length > 0)) {
// only reached due to rounding errors.
// Just assign all the remaining probabilities to 1
var notEmptyArray = overFull.length > 0 ? overFull : underFull;
notEmptyArray.forEach(function(index) {
probabilities[index] = 1;
});
break; // get out of the while loop
}
aliases[underFull[0]] = overFull[0];
probabilities[overFull[0]] += probabilities[underFull[0]] - 1;
underFull.shift();
if (probabilities[overFull[0]] > 1) overFull.push(overFull.shift());
else if (probabilities[overFull[0]] < 1) underFull.push(overFull.shift());
else overFull.shift();
}
return function() {
var index = Math.floor(Math.random() * probabilities.length);
return Math.random() < probabilities[index] ? index : aliases[index];
}
}
这种方法效果很好,但我的业务规范的一部分是问题不重复。我目前使用天真的重卷技术来实现这一目标,但很明显,如果少于10个项目比其他项目更可能,这将会中断:
var selectQuestion = generate_random_question_selector();
var questionSet = [];
for (var i = 0; i < num_questions; i++) {
var question_num;
do {
question_num = selectQuestion();
} while (questionSet.indexOf(question_num) >= 0)
questionSet.push(question_num);
}
可以对这种方法做些什么或者有什么方法可以让我在没有替换的情况下有效地对问题进行抽样?
答案 0 :(得分:1)
别名方法不适合无需替换的采样,因为每个值都使用不同的概率分布进行采样,并且计算(或更新)别名表为O(n)。
您需要一个可以更有效地更新的数据结构。例如,您可以构建所有值的搜索树(其中每个节点存储其子树的总权重),这将允许采样并更新O(log n)中的概率分布。
如果我们通过将其概率设置为0来删除条目,则该树永远不会在结构上被修改,并且可以编码为数组。
以下是一些代码:
function prepare() {
// index i is the parent of indices 2*i and 2*i+1
// therefore, index 0 is unused, and index 1 the root of the tree
var i;
for (i = weights.length - 1; i > 1; i--) {
weights[i >> 1] += weights[i];
}
}
function sample() {
var index = 1;
var key = Math.random() * weights[index];
for (;;) {
var left = index << 1;
var right = left + 1;
leftWeight = weights[left] || 0;
rightWeight = weights[right] || 0;
if (key < leftWeight) {
index = left;
} else {
key -= leftWeight;
if (key < rightWeight) {
index = right;
} else {
return index;
}
}
}
}
function remove(index) {
var left = index << 1;
var right = left + 1;
leftWeight = weights[left] || 0;
rightWeight = weights[right] || 0;
var w = weights[index] - leftWeight - rightWeight;
while (index > 0) {
weights[index] -= w;
index = index >> 1;
}
}
测试代码:
function retrieve() {
var index = sample();
remove(index);
console.log(index);
console.log(weights);
}
weights = [0,1,2,3,4];
prepare();
console.log(weights);
retrieve();
retrieve();
retrieve();
retrieve();