Question

我有一个数据库，其中包含每个游戏的玩家和玩家的得分。我正在尝试为我的预测模型创建一个评估变量。我正在使用a blogpost中的公式。

这是虚拟数据集：

df = data.frame(
matchid = c(1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4),
playerid = c(2,3,4,5,6,7,8,9,10,11,5,2,3,4,6,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,17,19,21,18,20,22,26,24,25,23),
point = c(52,38,34,33,16,19,16,8,10,2,38,37,31,34,21,18,18,13,9,-2,45,34,37,39,12,9,7,-3,-1,-8,47,38,31,17,26,32,28,17,16,9))

这是我尝试使用for循环的方法。对于30000个游戏数据库，for循环的运行速度非常慢。请给我一些有关如何改进此过程/循环的提示。我真的不知道。

## Initialize initial rating for each player 
players_ratings = data.frame(playerid = unique(df$playerid),rating = 1000, stringsAsFactors = FALSE)

## Initialize unique matches 
unique_matches = df$matchid %>% unique 

## Matches with rating 
relative_rating_matches = list(length(df))

### GENERATE RATING 
for(index in 1:length(unique_matches)){
  match = df %>% filter(matchid == unique_matches[[index]])
  position = index

  ## UPDATE RATING
  match = match %>% left_join(players_ratings,by = 'playerid')
  relative_rating_matches[[position]] = match

  print(match)

  ## BUILD ACTUAL RESULTS MATRIX      
  S = matrix(nrow = 10, ncol = 10)
  rownames(S) = match$playerid
  colnames(S) = match$playerid
  for(i in 1:nrow(S)) {
    for(j in 1:ncol(S)) {
      player_row_point = as.numeric(match %>% filter(playerid == rownames(S)[i]) %>% select(point))
      player_col_point = as.numeric(match %>% filter(playerid == colnames(S)[j]) %>% select(point))
      S[i,j] = ifelse(player_col_point == player_row_point,0.5,
                      ifelse(player_col_point > player_row_point,1,0))
    }
  }
  diag(S)= 0
  print(S)

  ## BUILD EXPECTED WIN/LOSS MATRIX 
  E = matrix(nrow = 10, ncol = 10)
  rownames(E) = match$playerid
  colnames(E) = match$playerid

  for(i in 1:nrow(E)) {
    for(j in 1:ncol(E)) {
      player_row_rating = as.numeric(match %>% filter(playerid == rownames(E)[i]) %>% select(rating))
      player_col_rating = as.numeric(match %>% filter(playerid == colnames(E)[j]) %>% select(rating))

      r = 1 + 10^((player_row_rating - player_col_rating)/400)
      expected_result = 1/r
      E[i,j] = expected_result
    }
  }
  diag(E) = 0
  print(E)

  ## GENERATE INCREMENTAL RATING
  R = 20 * (S-E)
  R = as.data.frame(colSums(R)) %>% rownames_to_column()

  print(R)

  ## UPDATE EXISTING RATING DATABASE
  for(i in 1:nrow(R)){
    player_id  = R[i,1]
    incre_rating = ifelse(is.na(R[i,2]),0,R[i,2])

    cur_rating = players_ratings[players_ratings$playerid == player_id,2]

    players_ratings[players_ratings$playerid == player_id,2] = cur_rating + incre_rating
  }
}

窗口功能/ R中的For循环

0 个答案: