structure(list(Score = c(2, 5, 1, 36, 69, 8, 54, 25, 2, 2, 2,
5, 5, 4, 1), ID = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
2)), .Names = c("Score", "ID"), row.names = c(NA, -15L), class =
"data.frame")
假设我采用group_by(window = 3),上述数据的滚动平均值。
new %>% group_by(ID) %>% mutate(roll_mean = rollapply(ID,3,mean,align ='right',
fill=NA))
上述陈述的输出是,
structure(list(Score = c(2, 5, 1, 36, 69, 8, 54, 25, 2, 2, 2,
5, 5, 4, 1), ID = c(1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
2), roll_mean = c(NA, NA, 1, 1, 1, 1, 1, NA, NA, 2, 2, 2, 2,
2, 2)), .Names = c("Score", "ID", "roll_mean"), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -15L), vars = "ID", labels
= structure(list(
ID = c(1, 2)), row.names = c(NA, -2L), class = "data.frame", vars = "ID",
drop = TRUE, .Names = "ID"), indices = list(
0:6, 7:14), drop = TRUE, group_sizes = 7:8, biggest_group_size = 8L)
我想通过滚动平均值来填写NA,无论窗口大小是多少。我希望输出为:
structure(list(Score = c(2L, 5L, 1L, 36L, 69L, 8L, 54L, 25L,
2L, 2L, 2L, 5L, 5L, 4L, 1L), ID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), Roll3 = c(2, 3.5, 2.666666667,
14, 35.33333333, 37.66666667, 43.66666667, 25, 13.5, 9.666666667,
2, 3, 4, 4.666666667, 3.333333333)), .Names = c("Score", "ID",
"Roll3"), class = "data.frame", row.names = c(NA, -15L))
[注意:在我的原始数据中,我有2000万观察结果。如果可以使用data.table
执行上述操作,那真的很棒]