Question

我有（1）个评分的参考表，以及（2）一个函数，可根据这些评分随机生成结果并根据生成的结果更新评分。

尽管下面的可重现示例有更简单的解决方案，但预期的应用是根据对手的Elo评分来模拟对手之间的结果，并在每一轮之后更新评分，以使模拟“热”进行。

在这里，我有一个评级表ref的参考表，并使用函数genResult生成随机结果并使用全局赋值更新参考表。

set.seed(123)
ref <- data.frame(id = LETTERS[1:5],
                  rating = round(runif(5, 100, 200)))

genResult <- function(ref) {

  id_i <- LETTERS[floor(runif(1, 1, 5))]

  score_i <- round(rnorm(1, 0, 20))

  ref[ref$id == id_i,]$rating <- ref[ref$id == id_i,]$rating + score_i

  result_i <- data.frame(id = id_i, score = score_i)

  # assign('ref', ref, envir=.GlobalEnv)
  ref <<- ref

  return(list(result_i, ref))
}

将此功能复制两次，可以看到ref已按预期更新。

replicate(2, genResult(ref), simplify = F)

返回这一点，我们可以看到参考表在两次迭代的每一次中都得到更新。

[[1]]
[[1]][[1]]
id score
1  A     1

[[1]][[2]]
id rating
1  A    130
2  B    179
3  C    141
4  D    188
5  E    194


[[2]]
[[2]][[1]]
id score
1  C    -2

[[2]][[2]]
id rating
1  A    130
2  B    179
3  C    139
4  D    188
5  E    194

现在假设我要复制上面的（复制的）函数；使用动态更新的评分来模拟5个结果的3个单独实例，并仅输出结果。我再次创建参考表ref，并定义一个使用全局赋值的类似函数：

set.seed(123)
ref <- data.frame(id = LETTERS[1:5],
                  rating = round(runif(5, 100, 200)))

genResult2 <- function(ref) {

  id_i <- LETTERS[floor(runif(1, 1, 5))]

  score_i <- round(rnorm(1, 0, 20))

  ref[ref$id == id_i,]$rating <- ref[ref$id == id_i,]$rating + score_i

  result_i <- data.frame(id = id_i, score = score_i)

  ref <<- ref

  return(result_i)
}

然后使用apply循环并将结果列表折叠到数据框：

lapply(1:3, function(i) {

  ref_i <- ref

  replicate(5, genResult2(ref_i), simplify = F) %>% 
    plyr::rbind.fill() %>% 
    mutate(i)

}) %>% 
  plyr::rbind.fill()

返回：

id score i
1   A     1 1
2   C    -2 1
3   B     9 1
4   A    26 1
5   A    -9 1
6   D    10 2
7   D     8 2
8   C     5 2
9   A    36 2
10  C    17 2
11  B    14 3
12  B   -15 3
13  B    -4 3
14  A   -22 3
15  B   -13 3

现在，这似乎可以按预期工作，但（i）感觉确实很丑陋，并且（ii）我读过无数次关于全局分配可能并且将导致意外伤害的信息。

有人可以提出更好的解决方案吗？

Answer 1

如果要进行迭代，并且下一次迭代依赖于上一次迭代，则通常是一个好兆头，您应该使用老式的for循环，而不要使用replicate或apply函数（另一种可能性是一直使用Reduce参数设置为accumulate的{{1}}。

这与您发布的代码具有相同的输出，我使用了for循环并使您的函数也返回ref：

TRUE

返回：

genResult3 <- function(ref) {

  id_i <- LETTERS[floor(runif(1, 1, 5))]

  score_i <- round(rnorm(1, 0, 20))

  ref[ref$id == id_i,]$rating <- ref[ref$id == id_i,]$rating + score_i

  result_i <- data.frame(id = id_i, score = score_i)

  #ref <<- ref

  return(list(result_i,ref)) # added ref to output
}

lapply(1:3, function(i) {
  res <- list(5)
  for (k in 1:5){
    gr <- genResult3(ref)
    res[[k]] <- gr[[1]] # update rating
    ref      <- gr[[2]] # get result
    res[[k]] <- left_join(res[[k]], ref, by = "id") # combine for output
  }
    plyr::rbind.fill(res) %>% 
    mutate(i)

}) %>% 
  plyr::rbind.fill()

Answer 2

您可以使用new.env()创建新环境并在那里进行计算：

将该想法应用到您的第一个函数中将得到以下结果：

set.seed(123)
ref1 <- data.frame(id = LETTERS[1:5],
                  rating = round(runif(5, 100, 200)))
ref1

refEnv <- new.env()
refEnv$ref = ref1

genResult <- function(ref) {

  id_i <- LETTERS[floor(runif(1, 1, 5))]

  score_i <- round(rnorm(1, 0, 20))

  ref[ref$id == id_i,]$rating <- ref[ref$id == id_i,]$rating + score_i

  result_i <- data.frame(id = id_i, score = score_i)

  assign('ref', ref, envir=refEnv)

  return(list(result_i, ref))
}
replicate(2, genResult(refEnv$ref), simplify = F)

ref1
refEnv$ref

您将看到原始的ref1未被触摸并且保持不变，而refEnv$ref包含了上次迭代的结果。

然后使用lapply将其实现到第二个功能：

set.seed(123)
ref1 <- data.frame(id = LETTERS[1:5],
                   rating = round(runif(5, 100, 200)))
ref1

refEnv <- new.env()
refEnv$ref = ref1


genResult2 <- function(ref) {

  id_i <- LETTERS[floor(runif(1, 1, 5))]

  score_i <- round(rnorm(1, 0, 20))

  ref[ref$id == id_i,]$rating <- ref[ref$id == id_i,]$rating + score_i

  result_i <- data.frame(id = id_i, score = score_i)

  assign('ref', ref, envir=refEnv)

  return(result_i)
}

# Replicating this function twice, we can see `ref` is updated as expected.    
lapply(1:3, function(i) {

  replicate(5, genResult2(refEnv$ref), simplify = F) %>% 
    plyr::rbind.fill() %>% 
    mutate(i)

}) %>% 
  plyr::rbind.fill()

ref1

在每次函数迭代时动态更新输入数据框，而无需全局分配

2 个答案: