
时间:2015-09-30 00:13:15

标签: r dataframe combinations



       Team      Player    K   D    A    LH Points Salary    PPS
  4     ATN  ExoticDeer  6.1 3.3  6.4 306.9 22.209   1622 1.3692
  2     ATN     Supreme  6.8 5.3  7.1 229.4 21.954   1578 1.3913
  1     ATN        sasu  3.6 6.4 11.0  95.7 19.357   1244 1.5560
  3     ATN eL lisasH 2  2.6 6.1  7.9  29.7 12.037    998 1.2061
  5     ATN       Nisha  2.7 5.6  7.5  48.2 12.282    955 1.2861
  11     CL Swiftending  6.0 5.8  7.8 360.5 22.285   1606 1.3876
  13     CL     Pajkatt 13.3 7.5  9.3 326.8 37.248   1489 2.5015
  15     CL  SexyBamboe  6.3 8.5  9.3 168.0 20.660   1256 1.6449
  14     CL         EGM  2.8 6.0 13.5  78.8 21.988    989 2.2233
  12     CL       Saksa  2.5 6.5 10.5  59.8 15.898    967 1.6441
  51 DBEARS         Ace  7.0 3.4  6.9 195.6 23.596   1578 1.4953
  31 DBEARS    HesteJoe  5.4 5.4  6.1 176.7 16.927   1512 1.1195
  61 DBEARS      Miggel  2.8 6.8 11.0 141.8 17.818   1212 1.4701
  21 DBEARS        Noia  3.0 6.0  8.0  36.1 13.161    970 1.3568
  41 DBEARS        Ryze  2.7 4.7  6.7  74.6 12.166    937 1.2984
  8      GB Keyser Soze  6.0 5.0  5.6 316.0 19.120   1602 1.1935
  9      GB      Madara  5.4 5.3  6.6 334.5 19.405   1577 1.2305
  10     GB     SkyLark  1.8 5.3  7.0  71.8 10.218   1266 0.8071
  7      GB         MNT  2.3 5.9  6.1  85.6  9.316   1007 0.9251
  6      GB   SKANKS224  1.4 7.6  7.4  52.5  7.565    954 0.7930

我遵循这篇文章中描述的一般概念:I want to generate combinations of 5 names from a column in an R data frame, whose values in a different column add up to a certain number or less


## make a list of all combinations of 8 of Player, Points and Salary
xx <- with(FantasyPlayers, lapply(list(as.character(Player), Points, Salary), combn,     8))
## convert the names to a string, 
## find the column sums of the others,
## set the names
yy <- setNames(
lapply(xx, function(x) {
    if(typeof(x) == "character") apply(x, 2, toString) else colSums(x)
names(FantasyPlayers)[c(2, 7, 8)]
## coerce to data.frame
newdf <- as.data.frame(yy)




structure(list(Team = c("ATN", "ATN", "ATN", "ATN", "ATN", "CL", 
"CL", "CL", "CL", "CL", "DBEARS", "DBEARS", "DBEARS", "DBEARS", 
"DBEARS", "GB", "GB", "GB", "GB", "GB"), Player = structure(c(2L, 
5L, 4L, 1L, 3L, 15L, 12L, 14L, 11L, 13L, 16L, 18L, 19L, 20L, 
21L, 6L, 7L, 10L, 8L, 9L), .Label = c("eL lisasH 2", "ExoticDeer", 
"Nisha", "sasu", "Supreme", "Keyser Soze", "Madara", "MNT", "SKANKS224", 
"SkyLark", "EGM", "Pajkatt", "Saksa", "SexyBamboe", "Swiftending", 
"Ace", "DruidzOzoneShoc", "HesteJoe", "Miggel", "Noia", "Ryze"
), class = "factor"), K = c(6.1, 6.8, 3.6, 2.6, 2.7, 6, 13.3, 
6.3, 2.8, 2.5, 7, 5.4, 2.8, 3, 2.7, 6, 5.4, 1.8, 2.3, 1.4), D = c(3.3, 
5.3, 6.4, 6.1, 5.6, 5.8, 7.5, 8.5, 6, 6.5, 3.4, 5.4, 6.8, 6, 
4.7, 5, 5.3, 5.3, 5.9, 7.6), A = c(6.4, 7.1, 11, 7.9, 7.5, 7.8, 
9.3, 9.3, 13.5, 10.5, 6.9, 6.1, 11, 8, 6.7, 5.6, 6.6, 7, 6.1, 
7.4), LH = c(306.9, 229.4, 95.7, 29.7, 48.2, 360.5, 326.8, 168, 
78.8, 59.8, 195.6, 176.7, 141.8, 36.1, 74.6, 316, 334.5, 71.8, 
85.6, 52.5), Points = c(22.209, 21.954, 19.357, 12.037, 12.282, 
22.285, 37.248, 20.66, 21.988, 15.898, 23.596, 16.927, 17.818, 
13.161, 12.166, 19.12, 19.405, 10.218, 9.316, 7.565), Salary = c(1622, 
1578, 1244, 998, 955, 1606, 1489, 1256, 989, 967, 1578, 1512, 
1212, 970, 937, 1602, 1577, 1266, 1007, 954), PPS = c(1.3692, 
1.3913, 1.556, 1.2061, 1.2861, 1.3876, 2.5015, 1.6449, 2.2233, 
1.6441, 1.4953, 1.1195, 1.4701, 1.3568, 1.2984, 1.1935, 1.2305, 
0.8071, 0.9251, 0.793)), .Names = c("Team", "Player", "K", "D", 
"A", "LH", "Points", "Salary", "PPS"), class = "data.frame", row.names = c("4", 
"2", "1", "3", "5", "11", "13", "15", "14", "12", "51", "31", 
"61", "21", "41", "8", "9", "10", "7", "6"))

2 个答案:

答案 0 :(得分:4)




xx    <- combn(as.character(FantasyPlayers$Player), 8)
mxx   <- setDT(melt(xx, varnames=c("jersey_no", "team_no"), value.name="Player"))

#     jersey_no team_no      Player
#  1:         1       1  ExoticDeer
#  2:         2       1     Supreme
#  3:         3       1        sasu
#  4:         4       1 eL lisasH 2
#  5:         5       1       Nisha
#  6:         6       1 Swiftending
#  7:         7       1     Pajkatt
#  8:         8       1  SexyBamboe
#  9:         1       2  ExoticDeer
# 10:         2       2     Supreme

8位玩家的小组共享team_no,并按其jersey_no编入索引。查看?melt.array以了解其工作原理。 setDT只是将生成的data.frame转换为data.table,以便于合并。


FantasyTeams <- FantasyPlayers[mxx, on="Player"]

#          Team      Player   K   D    A    LH Points Salary    PPS jersey_no team_no
#       1:  ATN  ExoticDeer 6.1 3.3  6.4 306.9 22.209   1622 1.3692         1       1
#       2:  ATN     Supreme 6.8 5.3  7.1 229.4 21.954   1578 1.3913         2       1
#       3:  ATN        sasu 3.6 6.4 11.0  95.7 19.357   1244 1.5560         3       1
#       4:  ATN eL lisasH 2 2.6 6.1  7.9  29.7 12.037    998 1.2061         4       1
#       5:  ATN       Nisha 2.7 5.6  7.5  48.2 12.282    955 1.2861         5       1
#      ---                                                                           
# 1007756:   GB Keyser Soze 6.0 5.0  5.6 316.0 19.120   1602 1.1935         4  125970
# 1007757:   GB      Madara 5.4 5.3  6.6 334.5 19.405   1577 1.2305         5  125970
# 1007758:   GB     SkyLark 1.8 5.3  7.0  71.8 10.218   1266 0.8071         6  125970
# 1007759:   GB         MNT 2.3 5.9  6.1  85.6  9.316   1007 0.9251         7  125970
# 1007760:   GB   SKANKS224 1.4 7.6  7.4  52.5  7.565    954 0.7930         8  125970



过滤那些来自同一team_no的玩家不超过三名的Team ...

my_teams <- FantasyTeams[, max(table(Team)) <= 3, by=team_no][V1==TRUE]$team_no

V1是分配给构造变量max(table(Team)) <= 3的默认名称。这不是一件闪电般快速的事情,但现在你已经排除了一些团队,后来的子集化步骤应该更快:

my_new_teams <- 
  FantasyTeams[team_no %in% my_teams, sum(Salary) < 10000, by=team_no][V1==TRUE]$team_no




mxx[.(team_no = my_new_teams), on="team_no"]


mxx[.(team_no = my_new_teams), .(roster = toString(Player)), on="team_no", by=.EACHI]


FantasyTeams[.(team_no = my_new_teams), .(
  roster     = toString(Player),
  tot_salary = sum(Salary),
  tot_points = sum(Points)
), on="team_no", by=.EACHI]

#        team_no                                                              roster tot_salary tot_points
#     1:    3716      ExoticDeer, Supreme, sasu, Swiftending, EGM, Saksa, Noia, Ryze       9913    149.018
#     2:    3720       ExoticDeer, Supreme, sasu, Swiftending, EGM, Saksa, Noia, MNT       9983    146.168
#     3:    3721 ExoticDeer, Supreme, sasu, Swiftending, EGM, Saksa, Noia, SKANKS224       9930    144.417
#     4:    3725       ExoticDeer, Supreme, sasu, Swiftending, EGM, Saksa, Ryze, MNT       9950    145.173
#     5:    3726 ExoticDeer, Supreme, sasu, Swiftending, EGM, Saksa, Ryze, SKANKS224       9897    143.422
#    ---                                                                                                  
# 40202:  125663         EGM, Saksa, Miggel, Noia, Ryze, Keyser Soze, MNT, SKANKS224       8638    117.032
# 40203:  125664                EGM, Saksa, Miggel, Noia, Ryze, Madara, SkyLark, MNT       8925    119.970
# 40204:  125665          EGM, Saksa, Miggel, Noia, Ryze, Madara, SkyLark, SKANKS224       8872    118.219
# 40205:  125666              EGM, Saksa, Miggel, Noia, Ryze, Madara, MNT, SKANKS224       8613    117.317
# 40206:  125667             EGM, Saksa, Miggel, Noia, Ryze, SkyLark, MNT, SKANKS224       8302    108.130

要了解by=.EACHI正在做什么,需要一些背景知识。这里的合并语法是DT[i, j, on=cols, by=.EACHI]

  • 如果省略jby,则只会进行合并,就像FantasyTeams的构造一样。
  • 如果省略by,但包含j,则合并后会计算j
  • 如果by=.EACHI,则会j中的每个值单独计算i

答案 1 :(得分:3)


splt.names <- strsplit(as.character(newdf$Player), ", ")
indices <- lapply(splt.names, function(x) match(x, FantasyPlayers$Player))
exclude <- lapply(indices, function(x) any(table(FantasyPlayers$Team[x]) > 3))
newdf2 <- newdf[!unlist(exclude), ]

首先用逗号分隔Player列。然后将玩家名称与Fantasy Players玩家名称列匹配。对于那些indices,我们可以完成any(table(FantasyPlayers$Team[x]) > 3)的主要工作。这是对超过3的团队计数的检查,这将指示来自同一团队的3个或更多玩家。