每个组的子集特定行

时间:2019-07-18 12:05:52

标签: r subset slice apply

我的数据框如下:

    Group     V2   V3   V4
      1        D    F    W 
      1        T    A    L 
      1        P    F    P 
      2        T    F    L 
      2        R    R    O 
      2        D    Y    L 
      2        D    F    I 
      ...     

我有这样的列表:

 [1]  1 3
 [2]  4
 [3]  2 3 4

列表的每个元素指示我要为每个组保留的行。所以我只想在数据帧中保留Group == 1的第1行和第3行;第二组的第四行;第3组的第2 3和4行,等等

我已经尽力了,但是我还没有找到一种简单的方法,尽管我很确定一定有一个使用apply或类似方法的方法。

3 个答案:

答案 0 :(得分:1)

可以,

do.call(rbind, Map(function(x, y) x[y,], split(df, df$Group), l1))
#    Group V2 V3 V4
#1.1     1  D  F  W
#1.3     1  P  F  P
#2       2  D  F  I

哪里

l1 <- list(c(1, 3), 4)

答案 1 :(得分:1)

具有以下对象,可以与之相似的data.frame和列表:

df <- read.table(text = "Group     V2   V3   V4
      1        D    F    W 
      1        T    A    L 
      1        P    F    P 
      2        T    F    L 
      2        R    R    O 
      2        D    Y    L 
      2        D    F    I
      3        E    F    I
      3        F    F    I
      3        G    F    I
      3        T    F    I", header = T)
l <- list(c(1, 3), 4, c(2:4))

do.call(rbind, lapply(seq_along(l), function(i) df[df$Group == i,][l[[i]],]))
#   Group V2 V3 V4
#1      1  D  F  W
#3      1  P  F  P
#7      2  D  F  I
#9      3  F  F  I
#10     3  G  F  I
#11     3  T  F  I

与简单的data.table方法产生的结果相同:

library(data.table)
dt <- as.data.table(df)
dt[, .SD[l[[.GRP]]], Group]

dt[, .SD[l[[unlist(.BY)]]], Group]

#   Group V2 V3 V4
#1:     1  D  F  W
#2:     1  P  F  P
#3:     2  D  F  I
#4:     3  F  F  I
#5:     3  G  F  I
#6:     3  T  F  I

答案 2 :(得分:0)

使用tidyverse

的选项
library(tidyverse)
df %>% 
   group_split(Group) %>% 
   map2_df(l, ~ .x %>% 
                  slice(.y))
# A tibble: 6 x 4
#  Group V2    V3    V4   
#  <int> <fct> <fct> <fct>
#1     1 D     F     W    
#2     1 P     F     P    
#3     2 D     F     I    
#4     3 F     F     I    
#5     3 G     F     I    
#6     3 T     F     I    

数据

df <- structure(list(Group = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 
3L, 3L), V2 = structure(c(1L, 7L, 5L, 7L, 6L, 1L, 1L, 2L, 3L, 
4L, 7L), .Label = c("D", "E", "F", "G", "P", "R", "T"), class = "factor"), 
    V3 = structure(c(2L, 1L, 2L, 2L, 3L, 4L, 2L, 2L, 2L, 2L, 
    2L), .Label = c("A", "F", "R", "Y"), class = "factor"), V4 = structure(c(5L, 
    2L, 4L, 2L, 3L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("I", 
    "L", "O", "P", "W"), class = "factor")), class = "data.frame", 
   row.names = c(NA, 
-11L))

l <- list(c(1, 3), 4, 2:4)