我在R中有以下数据框。它包含奥运会篮球锦标赛中每位球员的统计数据 样本表
每个游戏都由游戏栏中的数字表示。我想用最近3场比赛的平均值创建一个新专栏。在关注类似帖子中的示例时,我最大的下降是拥有游戏数字而不是其他方法似乎需要的实际日期。
非常感谢任何帮助。 感谢
编辑: 根据一些解决方案和建议澄清一点。对于每一行,我希望新列显示最近3场比赛的平均分钟数或分数。到目前为止,该建议使每一行显示游戏3,4的平均值。 5。
所以举个例子。 玩家A,游戏= 3 平均值=平均值(pts game1,pts game2,pts game3)
玩家B,游戏= 4 平均值=平均值(pts game2,pts game3,pts game4)
我希望能够清除它。 感谢
数据:
我很陌生。我希望这是共享数据的合适方法。
structure(list(Player = structure(c(1L, 2L, 6L, 8L, 17L, 21L,
23L, 24L, 24L, 24L, 24L, 25L, 26L, 15L, 20L, 20L, 12L, 15L, 11L,
5L, 15L, 16L, 14L, 9L, 20L, 11L, 18L, 4L, 12L, 9L, 4L, 9L, 20L,
12L, 5L, 13L, 22L, 7L, 11L, 20L, 4L, 5L, 10L, 11L, 14L, 19L,
3L, 7L, 14L, 5L), .Label = c("Adas Juskevicius", "Alex Abrines",
"Andrew Bogut", "Bojan Bogdanovic", "Boris Diaw", "Brock Motum",
"Dario Saric", "Dwight Lewis", "Facundo Campazzo", "Ike Diogu",
"Jianlian Yi", "Jonas Maciulis", "Kevin Durant", "Luis Scola",
"Mantas Kalnietis", "Matt Dellavedova", "Miguel Marriaga", "Milos Teodosic",
"Nikola Mirotic", "Pau Gasol", "Rafa Luz", "Ricky Rubio", "Roberto Acuna",
"Vaidas Kariniauskas", "Windi Graterol", "Zeljko Sakic"), class = "factor"),
Team = structure(c(8L, 6L, 2L, 12L, 12L, 3L, 1L, 8L, 8L,
8L, 8L, 12L, 5L, 8L, 6L, 6L, 8L, 8L, 4L, 7L, 8L, 2L, 1L,
1L, 6L, 4L, 10L, 5L, 8L, 1L, 5L, 1L, 6L, 8L, 7L, 11L, 6L,
5L, 4L, 6L, 5L, 7L, 9L, 4L, 1L, 6L, 2L, 5L, 1L, 7L), .Label = c("ARG",
"AUS", "BRZ", "CHN", "CRO", "ESP", "FRA", "LTU", "NGR", "SRB",
"USA", "VEN"), class = "factor"), Pos = structure(c(3L, 4L,
2L, 5L, 2L, 5L, 1L, 2L, 2L, 2L, 2L, 1L, 4L, 3L, 1L, 1L, 4L,
5L, 2L, 2L, 5L, 3L, 2L, 3L, 1L, 4L, 5L, 2L, 2L, 3L, 2L, 3L,
1L, 2L, 2L, 4L, 3L, 4L, 4L, 1L, 2L, 2L, 2L, 4L, 1L, 2L, 1L,
4L, 1L, 2L), .Label = c("C", "PF", "PG", "SF", "SG"), class = "factor"),
game = c(4L, 5L, 4L, 5L, 3L, 4L, 3L, 1L, 2L, 3L, 4L, 5L,
5L, 3L, 2L, 3L, 3L, 4L, 3L, 3L, 2L, 4L, 3L, 3L, 5L, 5L, 5L,
4L, 2L, 2L, 2L, 5L, 4L, 4L, 2L, 2L, 1L, 4L, 4L, 1L, 5L, 4L,
3L, 2L, 4L, 2L, 2L, 3L, 2L, 1L), Status = c(0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), Drafted = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85,
82, 80, 78, 77, 74, 68, 68, 68, 65, 64, 63, 62, 62, 61, 61,
60, 59, 59, 59, 58, 57, 57, 57, 56, 56, 56, 55, 55, 55, 55,
54, 54, 53, 53, 52, 51), Min = c(11.04, 1.44, 16.56, 2.88,
4.8, 1.92, 13.68, 3.84, 9.36, 2.64, 21.12, 17.04, 0.24, 36.48,
32.16, 23.28, 26.88, 17.28, 33.6, 28.56, 30.48, 19.92, 30.24,
25.92, 27.84, 34.8, 15.12, 36, 28.8, 29.04, 29.28, 21.36,
23.04, 18.72, 21.12, 25.2, 12.24, 27.12, 32.88, 31.92, 34.08,
18.24, 27.6, 32.64, 33.6, 32.88, 24.72, 34.8, 35.76, 31.44
), FIC = c(3.8, 1.5, 10.2, 1, 0, -1, 0.2, 0.5, -3.2, -1,
0.6, 4.5, -0.5, 15.6, 9.5, 11.1, 0.5, 7.8, 17, 16.8, 25.2,
10.5, 10, 6, 14.4, 6, 7.5, 15.5, 14.8, 6.2, 7.9, 3, 26.9,
0.8, 11.4, 16, -1, 4.9, 14.1, 18.5, 5.9, 6.5, 10, 10, 10,
8, 19, 9, 12.1, 7.5), FP = c(8, 4, 21.75, 2, 2.75, -0.5,
4.75, 1.5, 2.5, 1.25, 8.5, 13, 0, 35.25, 37, 32.25, 17, 18.5,
39.5, 34.25, 49, 19.25, 28.75, 20.25, 41.25, 27.5, 16.5,
39.25, 33.5, 29, 30.75, 13.25, 47.25, 9, 24.5, 28.5, 6.25,
19.5, 38.25, 40.25, 27.5, 17, 21.75, 37.5, 29, 21, 38.5,
30.75, 37.75, 25.75), FPM = c(0.72463768115942, 2.77777777777778,
1.31340579710145, 0.694444444444444, 0.572916666666667, -0.260416666666667,
0.347222222222222, 0.390625, 0.267094017094017, 0.473484848484848,
0.402462121212121, 0.762910798122066, 0, 0.966282894736842,
1.15049751243781, 1.38530927835052, 0.632440476190476, 1.07060185185185,
1.17559523809524, 1.19922969187675, 1.60761154855643, 0.96636546184739,
0.950727513227513, 0.78125, 1.48168103448276, 0.790229885057471,
1.09126984126984, 1.09027777777778, 1.16319444444444, 0.99862258953168,
1.05020491803279, 0.620318352059925, 2.05078125, 0.480769230769231,
1.16003787878788, 1.13095238095238, 0.51062091503268, 0.719026548672566,
1.16332116788321, 1.2609649122807, 0.806924882629108, 0.932017543859649,
0.78804347826087, 1.14889705882353, 0.863095238095238, 0.638686131386861,
1.55744336569579, 0.883620689655172, 1.05564876957494, 0.819020356234097
), PTS = c(5L, 2L, 15L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 6L, 9L,
0L, 17L, 13L, 16L, 10L, 16L, 18L, 11L, 21L, 6L, 12L, 10L,
19L, 20L, 7L, 28L, 21L, 10L, 18L, 10L, 23L, 4L, 7L, 16L,
0L, 7L, 20L, 26L, 22L, 10L, 7L, 19L, 14L, 6L, 9L, 15L, 23L,
9L), TPM = c(1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 2L, 0L, 0L, 2L, 2L, 0L, 0L, 1L, 0L, 1L, 2L, 1L, 0L,
1L, 3L, 3L, 1L, 1L, 1L, 5L, 0L, 1L, 2L, 0L, 1L, 2L, 3L, 4L,
0L, 0L, 3L, 2L, 0L, 1L, 3L, 3L, 1L), Ast = c(2L, 0L, 2L,
0L, 1L, 0L, 0L, 1L, 2L, 0L, 1L, 1L, 0L, 7L, 1L, 3L, 1L, 2L,
1L, 9L, 12L, 8L, 4L, 1L, 1L, 2L, 5L, 2L, 2L, 8L, 2L, 1L,
5L, 2L, 5L, 5L, 1L, 0L, 2L, 1L, 1L, 0L, 3L, 0L, 1L, 2L, 6L,
3L, 0L, 2L), Reb = c(0L, 0L, 3L, 0L, 1L, 0L, 1L, 0L, 0L,
1L, 2L, 2L, 0L, 5L, 10L, 7L, 6L, 0L, 10L, 9L, 4L, 1L, 7L,
3L, 13L, 2L, 0L, 3L, 4L, 4L, 7L, 1L, 5L, 0L, 4L, 2L, 1L,
6L, 9L, 9L, 2L, 4L, 7L, 6L, 10L, 8L, 12L, 7L, 9L, 5L), BLK = c(0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 4L, 2L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 2L, 2L, 0L, 0L, 1L, 0L, 1L,
0L, 2L, 0L, 3L, 1L, 0L, 1L, 2L, 0L, 0L, 0L, 1L, 1L, 0L, 1L,
3L, 1L, 1L, 2L), STL = c(0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 3L, 0L, 1L, 0L, 1L,
2L, 0L, 0L, 1L, 2L, 1L, 2L, 0L, 0L, 2L, 1L, 0L, 0L, 2L, 2L,
0L, 0L, 1L, 1L, 0L, 4L, 0L, 0L, 0L, 1L, 1L, 2L), TO = c(1L,
0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 3L, 1L, 0L, 3L, 3L, 2L,
6L, 3L, 0L, 3L, 2L, 0L, 1L, 0L, 3L, 4L, 1L, 2L, 2L, 5L, 3L,
0L, 0L, 0L, 3L, 1L, 1L, 3L, 2L, 0L, 5L, 0L, 1L, 1L, 3L, 0L,
2L, 6L, 4L, 2L)), .Names = c("Player", "Team", "Pos", "game",
"Status", "Drafted", "Min", "FIC", "FP", "FPM", "PTS", "TPM",
"Ast", "Reb", "BLK", "STL", "TO"), row.names = c(NA, 50L), class = "data.frame")
答案 0 :(得分:4)
将dplyr与mtcars示例数据一起使用:
library(dplyr)
mtcars %>%
group_by(cyl) %>%
mutate(last3mean = mean(tail(mpg, 3)))
在您的情况下,请使用播放器和要聚合的列,而不是 cyl 和 mpg 。
使用data.table,(由@akrun建议):
data.table as.data.table(mtcars)[, .(last3mean = mean(tail(mpg,3))), by = cyl]
答案 1 :(得分:1)
您可以使用rollmeanr
包中的zoo
和dplyr
。这具有以下特征:不仅对玩家的最后三个游戏进行平均,而且为每个玩家计算最后三个游戏移动平均值。代码如下:
library(dplyr)
library(zoo)
avg.last.3 <- function (x) if (length(x) < 3) rep(NA, length(x)) else rollmeanr(x, 3, fill = NA) ## 1.
res <- df %>% group_by(Player) %>% arrange(game) %>% ## 2.
mutate(Avg.Pts=avg.last.3(PTS)) %>% ## 3.
ungroup() %>% arrange(Player,game) ## 4.
注意:
avg.last.3
,该函数应用窗口长度为rollmeanr
的函数3
。 rollmeanr
指定align="right"
以平均最后三个游戏,并且我们会将任何没有三天平均值的结果填入NA
。请注意,此函数中的if
条件是必需的,以便:
x
的长度至少是rollmeanr
所需的rollmeanr
窗口长度avg.last.3
返回一个与mutate
所需的输入长度相同的向量。group_by
Player
。由于我注意到game
列未必按每个Player
排序,因此我们按升序排序game
。mutate
创建新列Avg.Pts
,因为在列上应用了avg.last.3
函数,例如PTS
。ungroup
并显示按Player
排序后跟game
当然,您可以通过以下方式获得任意数量的列的平均值:
mutate(Avg.Pts=avg.last.3(PTS), Avg.Min=avg.last.3(Min), Avg.Ast=avg.last.3(Ast), ...)
仅对PTS
列进行平均的结果由(仅打印前六列加PTS
和Avg.Pts
)给出:
print(res[,c(colnames(res)[1:6],"PTS","Avg.Pts")],n=50)
### A tibble: 50 x 8
## Player Team Pos game Status Drafted PTS Avg.Pts
## <fctr> <fctr> <fctr> <int> <int> <dbl> <int> <dbl>
##1 Adas Juskevicius LTU PG 4 0 0 5 NA
##2 Alex Abrines ESP SF 5 0 0 2 NA
##3 Andrew Bogut AUS C 2 1 53 9 NA
##4 Bojan Bogdanovic CRO PF 2 1 59 18 NA
##5 Bojan Bogdanovic CRO PF 4 1 61 28 NA
##6 Bojan Bogdanovic CRO PF 5 1 55 22 22.666667
##7 Boris Diaw FRA PF 1 1 51 9 NA
##8 Boris Diaw FRA PF 2 1 57 7 NA
##9 Boris Diaw FRA PF 3 1 68 11 9.000000
##10 Boris Diaw FRA PF 4 1 55 10 9.333333
##11 Brock Motum AUS PF 4 0 0 15 NA
##12 Dario Saric CRO SF 3 1 53 15 NA
##13 Dario Saric CRO SF 4 1 56 7 NA
##14 Dwight Lewis VEN SG 5 0 0 0 NA
##15 Facundo Campazzo ARG PG 2 1 60 10 NA
##16 Facundo Campazzo ARG PG 3 1 64 10 NA
##17 Facundo Campazzo ARG PG 5 0 59 10 10.000000
##18 Ike Diogu NGR PF 3 1 55 7 NA
##19 Jianlian Yi CHN SF 2 1 55 19 NA
##20 Jianlian Yi CHN PF 3 1 74 18 NA
##21 Jianlian Yi CHN SF 4 1 56 20 19.000000
##22 Jianlian Yi CHN SF 5 1 62 20 19.333333
##23 Jonas Maciulis LTU PF 2 1 61 21 NA
##24 Jonas Maciulis LTU SF 3 1 78 10 NA
##25 Jonas Maciulis LTU PF 4 1 58 4 11.666667
##26 Kevin Durant USA SF 2 1 57 16 NA
##27 Luis Scola ARG C 2 1 52 23 NA
##28 Luis Scola ARG PF 3 1 65 12 NA
##29 Luis Scola ARG C 4 1 54 14 16.333333
##30 Mantas Kalnietis LTU SG 2 1 68 21 NA
##31 Mantas Kalnietis LTU PG 3 1 85 17 NA
##32 Mantas Kalnietis LTU SG 4 1 77 16 18.000000
##33 Matt Dellavedova AUS PG 4 1 68 6 NA
##34 Miguel Marriaga VEN PF 3 0 0 0 NA
##35 Milos Teodosic SRB SG 5 0 62 7 NA
##36 Nikola Mirotic ESP PF 2 1 54 6 NA
##37 Pau Gasol ESP C 1 1 56 26 NA
##38 Pau Gasol ESP C 2 1 82 13 NA
##39 Pau Gasol ESP C 3 1 80 16 18.333333
##40 Pau Gasol ESP C 4 1 59 23 17.333333
##41 Pau Gasol ESP C 5 1 63 19 19.333333
##42 Rafa Luz BRZ SG 4 0 0 0 NA
##43 Ricky Rubio ESP PG 1 1 57 0 NA
##44 Roberto Acuna ARG C 3 1 0 2 NA
##45 Vaidas Kariniauskas LTU PF 1 0 0 0 NA
##46 Vaidas Kariniauskas LTU PF 2 0 0 0 NA
##47 Vaidas Kariniauskas LTU PF 3 0 0 0 0.000000
##48 Vaidas Kariniauskas LTU PF 4 0 0 6 2.000000
##49 Windi Graterol VEN C 5 0 0 9 NA
##50 Zeljko Sakic CRO SF 5 0 0 0 NA
答案 2 :(得分:0)
首先按播放器分割数据框
playerDFs <- split(origdata, origdata["Player"])
然后对最后3场比赛进行分组
playerLast3 <- lapply(playerDFs, function(x) x[tail(order(x[["game"]]),3), ])
最后得到你的钱财
vapply(playerLast3, colMeans, numeric(ncol(origdata)))