我正在尝试在R中编写基本的Vanilla TrueSkill (3.1)算法,但我得到了一些奇怪的结果。
我的代码如下:
# A simple test between two players repeatedly laying one another
betaSq = 0.1
obs = 10000
p1_skills = 0.333
p2_skills = 0
p1_draws = rnorm(obs, p1_skills, sqrt(betaSq))
p2_draws = rnorm(obs, p2_skills, sqrt(betaSq))
p1_pred_mu = rep(NA, obs+1)
p1_pred_sigmaSq = rep(NA, obs+1)
p2_pred_mu = rep(NA, obs+1)
p2_pred_sigmaSq = rep(NA, obs+1)
# Initial values
p1_pred_mu[1] = 0
p1_pred_sigmaSq[1] = 1
p2_pred_mu[1] = 0
p2_pred_sigmaSq[1] = 1
results = p1_draws > p2_draws
probs = rep(NA, obs)
# Run TrueSkill
for (i in seq(2,obs+1)) {
probs[i-1] = predictProb(p1_pred_mu[i-1], p1_pred_sigmaSq[i-1], p2_pred_mu[i-1], p2_pred_sigmaSq[i-1], betaSq)
out = updateSkill(p1_pred_mu[i-1], p1_pred_sigmaSq[i-1], p2_pred_mu[i-1], p2_pred_sigmaSq[i-1], betaSq, results[i-1])
# Now update based on the out
p1_pred_mu[i] = out$mu1
p1_pred_sigmaSq[i] = out$sigmaSq1
p2_pred_mu[i] = out$mu2
p2_pred_sigmaSq[i] = out$sigmaSq2
}
# Output results
dev.new()
mu = p1_pred_mu
lower = qnorm(0.05, p1_pred_mu, p1_pred_sigmaSq)
upper = qnorm(0.95, p1_pred_mu, p1_pred_sigmaSq)
plot(mu, ylim = c(min(lower), max(upper)), main = "p1")
lines(lower)
lines(upper)
dev.new()
mu = p2_pred_mu
lower = qnorm(0.05, p2_pred_mu, p2_pred_sigmaSq)
upper = qnorm(0.95, p2_pred_mu, p2_pred_sigmaSq)
plot(mu, ylim = c(min(lower), max(upper)), main = "p2")
lines(lower)
lines(upper)
a = filter(probs, rep(1, 20))/20
dev.new()
plot(a)
print(sprintf("Mean p1: %g", mean(p1_pred_mu)))
print(sprintf("Mean p2: %g", mean(p2_pred_mu)))
print(sprintf("Mean results: %g", mean(results)))
print(sprintf("Mean predicted results: %g", mean(probs)))
调用的函数是:
# Functions
updateSkill <- function(mu1, sigmaSq1, mu2, sigmaSq2, betaSq, result) {
# http://papers.nips.cc/paper/3331-trueskill-through-time-revisiting-the-history-of-chess.pdf
c = 2*betaSq + sigmaSq1 + sigmaSq2
if (result == 1) {
# Player 1 wins
v = dnorm((mu1-mu2)/c)/pnorm((mu1-mu2)/c)
w = v*(v+(mu1-mu2)/c)
mu1 = mu1 + (sigmaSq1/c)*v
mu2 = mu2 - (sigmaSq2/c)*v
sigmaSq1 = sigmaSq1 * sqrt(1 - (sigmaSq1/c^2)*w)
sigmaSq2 = sigmaSq2 * sqrt(1 - (sigmaSq2/c^2)*w)
} else if (result == 0) {
# Player 2 wins
v = dnorm((mu2-mu1)/c)/pnorm((mu2-mu1)/c)
w = v*(v+(mu2-mu1)/c)
mu1 = mu1 - (sigmaSq1/c)*v
mu2 = mu2 + (sigmaSq2/c)*v
sigmaSq1 = sigmaSq1 * sqrt(1 - (sigmaSq1/c^2)*w)
sigmaSq2 = sigmaSq2 * sqrt(1 - (sigmaSq2/c^2)*w)
}
return(list(mu1=mu1, mu2=mu2, sigmaSq1=sigmaSq1, sigmaSq2=sigmaSq2))
}
predictProb <- function(mu1, sigmaSq1, mu2, sigmaSq2, betaSq) {
# Try to predict the probability of player 1 beating player 2 using Trueskill model
mean1 = mu1
mean2 = mu2
var1 = sigmaSq1 + betaSq
var2 = sigmaSq2 + betaSq
# Now the dist of player1 - player2 is N(mean1 - mean2, sqrt(var1 + var2))
prob1Wins = pnorm(0, mean2 - mean1, sqrt(var1 + var2))
return(prob1Wins)
}
我讨厌发布大量的代码blob,但我真的无法弄清楚出错的地方。
该程序运行并且预测的技能(分布到N(mu,sigma))收敛。然而,他们给出的预测概率并没有收敛到结果的真实概率!
示例输出为:
[1] "Mean p1: 0.0762161"
[1] "Mean p2: -0.0762161"
[1] "Mean results: 0.7733"
[1] "Mean predicted results: 0.631424"
知道出了什么问题吗?
答案 0 :(得分:0)
这不起作用的原因是因为在updateSkills函数的第3行它应该读取
c = sqrt(2*betaSq + sigmaSq1 + sigmaSq2)
不
c = 2*betaSq + sigmaSq1 + sigmaSq2