我有一个数据库,其中包含每个游戏的玩家和玩家的得分。我正在尝试为我的预测模型创建一个评估变量。我正在使用a blogpost中的公式。
这是虚拟数据集:
df = data.frame(
matchid = c(1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4),
playerid = c(2,3,4,5,6,7,8,9,10,11,5,2,3,4,6,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,17,19,21,18,20,22,26,24,25,23),
point = c(52,38,34,33,16,19,16,8,10,2,38,37,31,34,21,18,18,13,9,-2,45,34,37,39,12,9,7,-3,-1,-8,47,38,31,17,26,32,28,17,16,9))
这是我尝试使用for循环的方法。对于30000个游戏数据库,for循环的运行速度非常慢。请给我一些有关如何改进此过程/循环的提示。我真的不知道。
## Initialize initial rating for each player
players_ratings = data.frame(playerid = unique(df$playerid),rating = 1000, stringsAsFactors = FALSE)
## Initialize unique matches
unique_matches = df$matchid %>% unique
## Matches with rating
relative_rating_matches = list(length(df))
### GENERATE RATING
for(index in 1:length(unique_matches)){
match = df %>% filter(matchid == unique_matches[[index]])
position = index
## UPDATE RATING
match = match %>% left_join(players_ratings,by = 'playerid')
relative_rating_matches[[position]] = match
print(match)
## BUILD ACTUAL RESULTS MATRIX
S = matrix(nrow = 10, ncol = 10)
rownames(S) = match$playerid
colnames(S) = match$playerid
for(i in 1:nrow(S)) {
for(j in 1:ncol(S)) {
player_row_point = as.numeric(match %>% filter(playerid == rownames(S)[i]) %>% select(point))
player_col_point = as.numeric(match %>% filter(playerid == colnames(S)[j]) %>% select(point))
S[i,j] = ifelse(player_col_point == player_row_point,0.5,
ifelse(player_col_point > player_row_point,1,0))
}
}
diag(S)= 0
print(S)
## BUILD EXPECTED WIN/LOSS MATRIX
E = matrix(nrow = 10, ncol = 10)
rownames(E) = match$playerid
colnames(E) = match$playerid
for(i in 1:nrow(E)) {
for(j in 1:ncol(E)) {
player_row_rating = as.numeric(match %>% filter(playerid == rownames(E)[i]) %>% select(rating))
player_col_rating = as.numeric(match %>% filter(playerid == colnames(E)[j]) %>% select(rating))
r = 1 + 10^((player_row_rating - player_col_rating)/400)
expected_result = 1/r
E[i,j] = expected_result
}
}
diag(E) = 0
print(E)
## GENERATE INCREMENTAL RATING
R = 20 * (S-E)
R = as.data.frame(colSums(R)) %>% rownames_to_column()
print(R)
## UPDATE EXISTING RATING DATABASE
for(i in 1:nrow(R)){
player_id = R[i,1]
incre_rating = ifelse(is.na(R[i,2]),0,R[i,2])
cur_rating = players_ratings[players_ratings$playerid == player_id,2]
players_ratings[players_ratings$playerid == player_id,2] = cur_rating + incre_rating
}
}