在R

时间:2018-02-07 20:52:12

标签: r sorting user-defined-functions nested-loops lapply

我对R比较新;而且,我需要有关用户定义函数的帮助。我想看看数据帧的每次观察在相同数据帧的类似观测的子集中排名的位置。我无法参考原始观察,以便在我的功能中提取其等级。

以下是我的数据示例:

> dput(df)
structure(list(Name = c("Alex Abrines", "Steven Adams", "Cole Aldrich", 
"LaMarcus Aldridge", "Kyle Anderson", "Ryan Anderson", "Giannis Antetokounmpo", 
"Carmelo Anthony", "OG Anunoby", "Darrell Arthur", "Will Barton", 
"Bradley Beal", "Davis Bertans", "Nemanja Bjelica", "Malcolm Brogdon", 
"Aaron Brooks", "Dillon Brooks", "Lorenzo Brown", "Sterling Brown", 
"Reggie Bullock", "Jimmy Butler", "Dwight Buycks", "Clint Capela", 
"Wilson Chandler", "Torrey Craig", "Jamal Crawford", "Deyonta Davis", 
"Matthew Dellavedova", "DeMar DeRozan", "Gorgui Dieng", "Andre Drummond", 
"James Ennis", "Kenneth Faried", "Raymond Felton", "Terrance Ferguson", 
"Bryn Forbes", "Tim Frazier", "Langston Galloway", "Marc Gasol", 
"Pau Gasol", "Paul George", "Marcus Georges-Hunt", "Taj Gibson", 
"Manu Ginobili", "Marcin Gortat", "Jerami Grant", "Danny Green", 
"Gerald Green", "JaMychal Green", "Blake Griffin", "James Harden", 
"Gary Harris", "Andrew Harrison", "Myke Henry", "John Henson", 
"Nene Hilario", "Darrun Hilliard", "Josh Huestis", "Serge Ibaka", 
"Stanley Johnson", "Nikola Jokic", "Tyus Jones", "Luke Kennard", 
"Sean Kilpatrick", "Joffrey Lauvergne", "Kyle Lowry", "Trey Lyles", 
"Ian Mahinmi", "Thon Maker", "Jarell Martin", "Luc Mbah a Moute", 
"Ben McLemore", "Jodie Meeks", "Khris Middleton", "Patty Mills", 
"Eric Moreland", "Markieff Morris", "Emmanuel Mudiay", "Shabazz Muhammad", 
"Xavier Munford", "Dejounte Murray", "Jamal Murray", "Lucas Nogueira", 
"Kelly Oubre", "Tony Parker", "Patrick Patterson", "Brandon Paul", 
"Chris Paul", "Marshall Plumlee", "Jakob Poeltl", "Otto Porter", 
"Norman Powell", "Willie Reed", "Tomas Satoransky", "Mike Scott", 
"Wayne Selden", "Pascal Siakam", "Ish Smith", "Tony Snell", "Jeff Teague", 
"Anthony Tolliver", "Karl-Anthony Towns", "P.J. Tucker", "Jonas Valanciunas", 
"Rashad Vaughn", "Russell Westbrook", "Andrew Wiggins", "D.J. Wilson", 
"Delon Wright"), Pos = structure(c(5L, 1L, 1L, 1L, 3L, 2L, 3L, 
2L, 2L, 2L, 4L, 4L, 2L, 2L, 4L, 4L, 5L, 4L, 4L, 5L, 3L, 4L, 1L, 
2L, 5L, 4L, 1L, 4L, 5L, 1L, 1L, 2L, 2L, 4L, 5L, 4L, 4L, 4L, 1L, 
1L, 2L, 4L, 2L, 4L, 1L, 2L, 5L, 5L, 2L, 2L, 4L, 4L, 4L, 2L, 1L, 
1L, 4L, 2L, 1L, 2L, 1L, 4L, 4L, 4L, 1L, 4L, 2L, 1L, 1L, 2L, 2L, 
4L, 4L, 3L, 4L, 1L, 2L, 4L, 3L, 4L, 4L, 4L, 1L, 2L, 4L, 2L, 4L, 
4L, 1L, 1L, 2L, 4L, 1L, 4L, 2L, 5L, 2L, 4L, 5L, 4L, 1L, 1L, 2L, 
1L, 4L, 4L, 3L, 2L, 4L), .Label = c("C", "PF", "SF", "PG", "SG"
), class = "factor"), Date = structure(c(1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "2018-02-01 *", class = "factor"), 
    Tm = structure(c(7L, 7L, 6L, 8L, 8L, 3L, 5L, 7L, 9L, 1L, 
    1L, 10L, 8L, 6L, 5L, 6L, 4L, 9L, 5L, 2L, 6L, 2L, 3L, 1L, 
    1L, 6L, 4L, 5L, 9L, 6L, 2L, 4L, 1L, 7L, 7L, 8L, 10L, 2L, 
    4L, 8L, 7L, 6L, 6L, 8L, 10L, 7L, 8L, 3L, 4L, 2L, 3L, 1L, 
    4L, 4L, 5L, 3L, 8L, 7L, 9L, 2L, 1L, 6L, 2L, 5L, 8L, 9L, 1L, 
    10L, 5L, 4L, 3L, 4L, 10L, 5L, 8L, 2L, 10L, 1L, 6L, 5L, 8L, 
    1L, 9L, 10L, 8L, 7L, 8L, 3L, 5L, 9L, 10L, 9L, 2L, 10L, 10L, 
    4L, 9L, 2L, 5L, 6L, 2L, 6L, 3L, 9L, 5L, 7L, 6L, 5L, 9L), .Label = c("DEN", 
    "DET", "HOU", "MEM", "MIL", "MIN", "OKC", "SAS", "TOR", "WAS"
    ), class = "factor"), Opp = structure(c(1L, 1L, 5L, 3L, 3L, 
    8L, 6L, 1L, 10L, 7L, 7L, 9L, 3L, 5L, 6L, 5L, 2L, 10L, 6L, 
    4L, 5L, 4L, 8L, 7L, 7L, 5L, 2L, 6L, 10L, 5L, 4L, 2L, 7L, 
    1L, 1L, 3L, 9L, 4L, 2L, 3L, 1L, 5L, 5L, 3L, 9L, 1L, 3L, 8L, 
    2L, 4L, 8L, 7L, 2L, 2L, 6L, 8L, 3L, 1L, 10L, 4L, 7L, 5L, 
    4L, 6L, 3L, 10L, 7L, 9L, 6L, 2L, 8L, 2L, 9L, 6L, 3L, 4L, 
    9L, 7L, 5L, 6L, 3L, 7L, 10L, 9L, 3L, 1L, 3L, 8L, 6L, 10L, 
    9L, 10L, 4L, 9L, 9L, 2L, 10L, 4L, 6L, 5L, 4L, 5L, 8L, 10L, 
    6L, 1L, 5L, 6L, 10L), .Label = c("DEN", "DET", "HOU", "MEM", 
    "MIL", "MIN", "OKC", "SAS", "TOR", "WAS"), class = "factor"), 
    MP = c(29L, 32L, 3L, 34L, 30L, 29L, 36L, 34L, 21L, 1L, 36L, 
    38L, 13L, 14L, 10L, 3L, 32L, 11L, 24L, 35L, 40L, 19L, 35L, 
    34L, 22L, 17L, 15L, 25L, 38L, 13L, 28L, 15L, 10L, 14L, 4L, 
    18L, 17L, 4L, 33L, 20L, 36L, 6L, 33L, 20L, 26L, 25L, 28L, 
    30L, 20L, 35L, 37L, 38L, 34L, 22L, 32L, 13L, 8L, 12L, 35L, 
    36L, 37L, 17L, 21L, 18L, 2L, 35L, 15L, 19L, 13L, 28L, 35L, 
    10L, 9L, 35L, 24L, 5L, 32L, 14L, 3L, 7L, 24L, 34L, 3L, 23L, 
    17L, 15L, 2L, 30L, 5L, 16L, 29L, 26L, 5L, 28L, 19L, 31L, 
    13L, 29L, 29L, 28L, 22L, 33L, 31L, 29L, 4L, 39L, 30L, 4L, 
    13L), Player.ID = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 
    8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 
    20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 
    32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 42L, 41L, 43L, 
    44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 53L, 52L, 54L, 55L, 
    56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 
    68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 
    80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 
    92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L, 101L, 102L, 
    103L, 104L, 105L, 106L, 107L, 108L, 109L), .Label = c("abrinal01", 
    "adamsst01", "aldrico01", "aldrila01", "anderky01", "anderry01", 
    "antetgi01", "anthoca01", "anunoog01", "arthuda01", "bartowi01", 
    "bealbr01", "bertada01", "bjeline01", "brogdma01", "brookaa01", 
    "brookdi01", "brownlo01", "brownst02", "bullore01", "butleji01", 
    "buyckdw01", "capelca01", "chandwi01", "craigto01", "crawfja01", 
    "davisde01", "dellama01", "derozde01", "dienggo01", "drumman01", 
    "ennisja01", "farieke01", "feltora01", "fergute01", "forbebr01", 
    "fraziti01", "gallola01", "gasolma01", "gasolpa01", "georgma01", 
    "georgpa01", "gibsota01", "ginobma01", "gortama01", "grantje01", 
    "greenda02", "greenge01", "greenja01", "griffbl01", "hardeja01", 
    "harrian01", "harriga01", "henrymy01", "hensojo01", "hilarne01", 
    "hillida01", "huestjo01", "ibakase01", "johnsst04", "jokicni01", 
    "jonesty01", "kennalu01", "kilpase01", "lauvejo01", "lowryky01", 
    "lylestr01", "mahinia01", "makerth01", "martija01", "mbahalu01", 
    "mclembe01", "meeksjo01", "middlkh01", "millspa02", "moreler01", 
    "morrima02", "mudiaem01", "muhamsh01", "munfoxa02", "murrade01", 
    "murraja01", "noguelu01", "oubreke01", "parketo01", "pattepa01", 
    "paulbr01", "paulch01", "plumlma02", "poeltja01", "porteot01", 
    "powelno01", "reedwi02", "satorto01", "scottmi01", "seldewa01", 
    "siakapa01", "smithis01", "snellto01", "teaguje01", "tollian01", 
    "townska01", "tuckepj01", "valanjo01", "vaughra01", "westbru01", 
    "wiggian01", "wilsodj01", "wrighde01"), class = "factor"), 
    Game.ID = structure(c(7L, 7L, 6L, 8L, 8L, 3L, 5L, 7L, 9L, 
    1L, 1L, 10L, 8L, 6L, 5L, 6L, 4L, 9L, 5L, 2L, 6L, 2L, 3L, 
    1L, 1L, 6L, 4L, 5L, 9L, 6L, 2L, 4L, 1L, 7L, 7L, 8L, 10L, 
    2L, 4L, 8L, 7L, 6L, 6L, 8L, 10L, 7L, 8L, 3L, 4L, 2L, 3L, 
    1L, 4L, 4L, 5L, 3L, 8L, 7L, 9L, 2L, 1L, 6L, 2L, 5L, 8L, 9L, 
    1L, 10L, 5L, 4L, 3L, 4L, 10L, 5L, 8L, 2L, 10L, 1L, 6L, 5L, 
    8L, 1L, 9L, 10L, 8L, 7L, 8L, 3L, 5L, 9L, 10L, 9L, 2L, 10L, 
    10L, 4L, 9L, 2L, 5L, 6L, 2L, 6L, 3L, 9L, 5L, 7L, 6L, 5L, 
    9L), .Label = c("2018-02-01 * DEN", "2018-02-01 * DET", "2018-02-01 * HOU", 
    "2018-02-01 * MEM", "2018-02-01 * MIL", "2018-02-01 * MIN", 
    "2018-02-01 * OKC", "2018-02-01 * SAS", "2018-02-01 * TOR", 
    "2018-02-01 * WAS"), class = "factor")), .Names = c("Name", 
"Pos", "Date", "Tm", "Opp", "MP", "Player.ID", "Game.ID"), class = "data.frame", row.names = c(NA, 
109L))

我想写一个函数,对于每个观察:

> df[1, ]
          Name Pos         Date  Tm Opp MP Player.ID          Game.ID
1 Alex Abrines  SG 2018-02-01 * OKC DEN 29 abrinal01 2018-02-01 * OKC

使用匹配的df$Game.ID创建所有其他观察的子集。

> df[df$Game.ID == '2018-02-01 * OKC', ]
                 Name Pos         Date  Tm Opp MP Player.ID          Game.ID
1        Alex Abrines  SG 2018-02-01 * OKC DEN 29 abrinal01 2018-02-01 * OKC
2        Steven Adams   C 2018-02-01 * OKC DEN 32 adamsst01 2018-02-01 * OKC
8     Carmelo Anthony  PF 2018-02-01 * OKC DEN 34 anthoca01 2018-02-01 * OKC
34     Raymond Felton  PG 2018-02-01 * OKC DEN 14 feltora01 2018-02-01 * OKC
35  Terrance Ferguson  SG 2018-02-01 * OKC DEN  4 fergute01 2018-02-01 * OKC
41        Paul George  PF 2018-02-01 * OKC DEN 36 georgpa01 2018-02-01 * OKC
46       Jerami Grant  PF 2018-02-01 * OKC DEN 25 grantje01 2018-02-01 * OKC
58       Josh Huestis  PF 2018-02-01 * OKC DEN 12 huestjo01 2018-02-01 * OKC
86  Patrick Patterson  PF 2018-02-01 * OKC DEN 15 pattepa01 2018-02-01 * OKC
106 Russell Westbrook  PG 2018-02-01 * OKC DEN 39 westbru01 2018-02-01 * OKC

然后返回原始观察的等级df$MP

> df[1, c('MP')]
[1] 29

在新子集的层次结构中。

> xx <- data.frame(cbind(sort(df[df$Game.ID == '2018-02-01 * OKC', c('MP')], decreasing = TRUE), rownames(data.table(sort(df[df$Game.ID == '2018-02-01 * OKC', c('MP')], decreasing = TRUE)))))
> xx
   X1 X2
1  39  1
2  36  2
3  34  3
4  32  4
5  29  5
6  25  6
7  15  7
8  14  8
9  12  9
10  4 10
> colnames(xx) <- c('MP', 'Depth.Chart')
> yy <- df[df$Game.ID == '2018-02-01 * OKC', ]
> yy
                 Name Pos         Date  Tm Opp MP Player.ID
1        Alex Abrines  SG 2018-02-01 * OKC DEN 29 abrinal01
2        Steven Adams   C 2018-02-01 * OKC DEN 32 adamsst01
8     Carmelo Anthony  PF 2018-02-01 * OKC DEN 34 anthoca01
34     Raymond Felton  PG 2018-02-01 * OKC DEN 14 feltora01
35  Terrance Ferguson  SG 2018-02-01 * OKC DEN  4 fergute01
41        Paul George  PF 2018-02-01 * OKC DEN 36 georgpa01
46       Jerami Grant  PF 2018-02-01 * OKC DEN 25 grantje01
58       Josh Huestis  PF 2018-02-01 * OKC DEN 12 huestjo01
86  Patrick Patterson  PF 2018-02-01 * OKC DEN 15 pattepa01
106 Russell Westbrook  PG 2018-02-01 * OKC DEN 39 westbru01
             Game.ID
1   2018-02-01 * OKC
2   2018-02-01 * OKC
8   2018-02-01 * OKC
34  2018-02-01 * OKC
35  2018-02-01 * OKC
41  2018-02-01 * OKC
46  2018-02-01 * OKC
58  2018-02-01 * OKC
86  2018-02-01 * OKC
106 2018-02-01 * OKC
> zz <- merge(yy, xx, all.x = TRUE)
> zz
   MP              Name Pos         Date  Tm Opp Player.ID
1   4 Terrance Ferguson  SG 2018-02-01 * OKC DEN fergute01
2  12      Josh Huestis  PF 2018-02-01 * OKC DEN huestjo01
3  14    Raymond Felton  PG 2018-02-01 * OKC DEN feltora01
4  15 Patrick Patterson  PF 2018-02-01 * OKC DEN pattepa01
5  25      Jerami Grant  PF 2018-02-01 * OKC DEN grantje01
6  29      Alex Abrines  SG 2018-02-01 * OKC DEN abrinal01
7  32      Steven Adams   C 2018-02-01 * OKC DEN adamsst01
8  34   Carmelo Anthony  PF 2018-02-01 * OKC DEN anthoca01
9  36       Paul George  PF 2018-02-01 * OKC DEN georgpa01
10 39 Russell Westbrook  PG 2018-02-01 * OKC DEN westbru01
            Game.ID Depth.Chart
1  2018-02-01 * OKC          10
2  2018-02-01 * OKC           9
3  2018-02-01 * OKC           8
4  2018-02-01 * OKC           7
5  2018-02-01 * OKC           6
6  2018-02-01 * OKC           5
7  2018-02-01 * OKC           4
8  2018-02-01 * OKC           3
9  2018-02-01 * OKC           2
10 2018-02-01 * OKC           1

最后,我需要提取与原始观察zz$Depth.Chart对应的5的值。

> zz[zz$MP == 29, c('Depth.Chart')]
[1] 5
Levels: 1 10 2 3 4 5 6 7 8 9

我想定义一个函数,为数据框中的每个观察执行上面繁琐且杂乱的步骤,并返回结果的向量。如何在不明确调用df$MP的情况下引用与我正在进行的观察相对应的29的值,就像我上面那样?以下是我尝试过的一些事情,但未成功。

> f1 <- function(col1, df, col2){
+   lapply(col1, function(i){
+     df2 <- df[col1 == i, col2]
+     df3 <- data.frame(cbind(sort(df2, decreasing = TRUE), rownames(data.table(sort(df2, decreasing = TRUE)))))
+     df3[i, 2]
+   })}
> f1(df$Game.ID, df, c('MP'))[1:10]
[[1]]
[1] 7
Levels: 1 10 2 3 4 5 6 7 8 9

[[2]]
[1] 7
Levels: 1 10 2 3 4 5 6 7 8 9

[[3]]
[1] 6
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9

[[4]]
[1] 8
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9

[[5]]
[1] 8
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9

[[6]]
[1] 3
Levels: 1 2 3 4 5 6 7 8

[[7]]
[1] 5
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9

[[8]]
[1] 7
Levels: 1 10 2 3 4 5 6 7 8 9

[[9]]
[1] 9
Levels: 1 10 11 2 3 4 5 6 7 8 9

[[10]]
[1] 1
Levels: 1 10 2 3 4 5 6 7 8 9

> f1 <- function(col1, df, col2){
+   lapply(col1, function(i){
+     df2 <- df[col1 == i, col2]
+     df3 <- data.frame(cbind(sort(df2, decreasing = TRUE), rownames(data.table(sort(df2, decreasing = TRUE)))))
+     df3[df3$X1 == i, 2]
+   })}
> f1(df$Game.ID, df, c('MP'))
 Hide Traceback

 Rerun with Debug
 Error in Ops.factor(df3$X1, i) : level sets of factors are different 
7.
stop("level sets of factors are different") 
6.
Ops.factor(df3$X1, i) 
5.
`[.data.frame`(df3, df3$X1 == i, 2) 
4.
df3[df3$X1 == i, 2] 
3.
FUN(X[[i]], ...) 
2.
lapply(col1, function(i) {
    df2 <- df[col1 == i, col2]
    df3 <- data.frame(cbind(sort(df2, decreasing = TRUE), rownames(data.table(sort(df2, 
        decreasing = TRUE))))) ... 
1.
f1(df$Game.ID, df, c("MP")) 

> f1 <- function(col1, df, col2){
+   lapply(col1, function(i){
+     df2 <- df[col1 == i, col2]
+     df3 <- data.frame(cbind(sort(df2, decreasing = TRUE), rownames(data.table(sort(df2, decreasing = TRUE)))))
+     df3[col2 == i, 2]
+   })}
> f1(df$Game.ID, df, c('MP'))[1:10]
[[1]]
factor(0)
Levels: 1 10 2 3 4 5 6 7 8 9

[[2]]
factor(0)
Levels: 1 10 2 3 4 5 6 7 8 9

[[3]]
factor(0)
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9

[[4]]
factor(0)
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9

[[5]]
factor(0)
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9

[[6]]
factor(0)
Levels: 1 2 3 4 5 6 7 8

[[7]]
factor(0)
Levels: 1 10 11 12 13 2 3 4 5 6 7 8 9

[[8]]
factor(0)
Levels: 1 10 2 3 4 5 6 7 8 9

[[9]]
factor(0)
Levels: 1 10 11 2 3 4 5 6 7 8 9

[[10]]
factor(0)
Levels: 1 10 2 3 4 5 6 7 8 9

我想我不完全理解R如何处理函数内的i变量;或者,如何恰当地引用它。通过这个论坛,我在Python中找到了函数内部的嵌套函数的通用示例,但是在R中没有找到。任何帮助都会非常感激。

修改

以下是我数据的简单子集:

> dput(df)
structure(list(MP = c(29L, 32L, 3L, 34L, 14L, 3L, 40L, 17L, 13L, 
14L, 4L, 36L, 6L, 33L, 25L, 12L, 17L, 3L, 15L, 28L, 33L, 39L, 
30L), Player.ID = structure(c(1L, 2L, 3L, 8L, 14L, 16L, 21L, 
26L, 30L, 34L, 35L, 42L, 41L, 43L, 46L, 58L, 62L, 79L, 86L, 100L, 
102L, 106L, 107L), .Label = c("abrinal01", "adamsst01", "aldrico01", 
"aldrila01", "anderky01", "anderry01", "antetgi01", "anthoca01", 
"anunoog01", "arthuda01", "bartowi01", "bealbr01", "bertada01", 
"bjeline01", "brogdma01", "brookaa01", "brookdi01", "brownlo01", 
"brownst02", "bullore01", "butleji01", "buyckdw01", "capelca01", 
"chandwi01", "craigto01", "crawfja01", "davisde01", "dellama01", 
"derozde01", "dienggo01", "drumman01", "ennisja01", "farieke01", 
"feltora01", "fergute01", "forbebr01", "fraziti01", "gallola01", 
"gasolma01", "gasolpa01", "georgma01", "georgpa01", "gibsota01", 
"ginobma01", "gortama01", "grantje01", "greenda02", "greenge01", 
"greenja01", "griffbl01", "hardeja01", "harrian01", "harriga01", 
"henrymy01", "hensojo01", "hilarne01", "hillida01", "huestjo01", 
"ibakase01", "johnsst04", "jokicni01", "jonesty01", "kennalu01", 
"kilpase01", "lauvejo01", "lowryky01", "lylestr01", "mahinia01", 
"makerth01", "martija01", "mbahalu01", "mclembe01", "meeksjo01", 
"middlkh01", "millspa02", "moreler01", "morrima02", "mudiaem01", 
"muhamsh01", "munfoxa02", "murrade01", "murraja01", "noguelu01", 
"oubreke01", "parketo01", "pattepa01", "paulbr01", "paulch01", 
"plumlma02", "poeltja01", "porteot01", "powelno01", "reedwi02", 
"satorto01", "scottmi01", "seldewa01", "siakapa01", "smithis01", 
"snellto01", "teaguje01", "tollian01", "townska01", "tuckepj01", 
"valanjo01", "vaughra01", "westbru01", "wiggian01", "wilsodj01", 
"wrighde01"), class = "factor"), Game.ID = structure(c(7L, 7L, 
6L, 7L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 6L, 6L, 7L, 7L, 6L, 6L, 
7L, 6L, 6L, 7L, 6L), .Label = c("2018-02-01 * DEN", "2018-02-01 * DET", 
"2018-02-01 * HOU", "2018-02-01 * MEM", "2018-02-01 * MIL", "2018-02-01 * MIN", 
"2018-02-01 * OKC", "2018-02-01 * SAS", "2018-02-01 * TOR", "2018-02-01 * WAS"
), class = "factor")), .Names = c("MP", "Player.ID", "Game.ID"
), row.names = c(1L, 2L, 3L, 8L, 14L, 16L, 21L, 26L, 30L, 34L, 
35L, 41L, 42L, 43L, 46L, 58L, 62L, 79L, 86L, 100L, 102L, 106L, 
107L), class = "data.frame")

1 个答案:

答案 0 :(得分:1)

您正在使用data.table进行流程中的小步骤,但您应该只将其用于整个过程。通过组&#34;进行操作非常方便,在这种情况下使用rank() Game.ID。使用您的小样本数据:

library(data.table)
setDT(df)
df[, Depth.Chart := rank(-MP), by = Game.ID]
df
#     MP Player.ID          Game.ID Depth.Chart
#  1: 29 abrinal01 2018-02-01 * OKC         5.0
#  2: 32 adamsst01 2018-02-01 * OKC         4.0
#  3:  3 aldrico01 2018-02-01 * MIN        12.0
#  4: 34 anthoca01 2018-02-01 * OKC         3.0
#  5: 14 bjeline01 2018-02-01 * MIN         8.0
#  6:  3 brookaa01 2018-02-01 * MIN        12.0
#  7: 40 butleji01 2018-02-01 * MIN         1.0
#  8: 17 crawfja01 2018-02-01 * MIN         6.5
#  9: 13 dienggo01 2018-02-01 * MIN         9.0
# 10: 14 feltora01 2018-02-01 * OKC         8.0
# 11:  4 fergute01 2018-02-01 * OKC        10.0
# 12: 36 georgpa01 2018-02-01 * OKC         2.0
# 13:  6 georgma01 2018-02-01 * MIN        10.0
# 14: 33 gibsota01 2018-02-01 * MIN         2.5
# 15: 25 grantje01 2018-02-01 * OKC         6.0
# 16: 12 huestjo01 2018-02-01 * OKC         9.0
# 17: 17 jonesty01 2018-02-01 * MIN         6.5
# 18:  3 muhamsh01 2018-02-01 * MIN        12.0
# 19: 15 pattepa01 2018-02-01 * OKC         7.0
# 20: 28 teaguje01 2018-02-01 * MIN         5.0
# 21: 33 townska01 2018-02-01 * MIN         2.5
# 22: 39 westbru01 2018-02-01 * OKC         1.0
# 23: 30 wiggian01 2018-02-01 * MIN         4.0
#     MP Player.ID          Game.ID Depth.Chart
默认情况下,

rank会平均关联,但有关其他选项,请参阅?rank