基于其他两个数据帧中唯一的值提取数据帧的子集

时间:2017-03-10 14:57:58

标签: r dataframe

我有三个数据框df1df2df3。我想确定col1df2的{​​{1}}中col1df1 col1 {/ 1}}和df3 df1 <- data.frame(col1=c('A','C','E'),col2=c(4,8,2)) df1 df2 <- data.frame(col1=c('A','B','C','E','G','I'),col2=c(4,8,2,6,1,9)) df2 df3 <- data.frame(col1=LETTERS[3:26],col2=sample(3:26)) df3 # Expected output #2 B 8

table(df2$col1 %in% df1$col1)
# FALSE  TRUE 
#    3     3

df2[df2$col1 %in% df1$col1,]
#  col1 col2
#1    A    4
#3    C    2
#4    E    6

df2[!df2$col1 %in% df1$col1,]
#  col1 col2
#2    B    8
#5    G    1
#6    I    9

table(df2$col1 %in% df3$col1)
#FALSE  TRUE 
#    2     4

df2[df2$col1 %in% df3$col1,]
#  col1 col2
#3    C    2
#4    E    6
#5    G    1
#6    I    9

df2[!df2$col1 %in% df3$col1,]
#  col1 col2
#1    A    4
#2    B    8

我做了什么?

df2[!df2$col1[!df2$col1 %in% df1$col1] %in% df3$col1,]
#  col1 col2
#1    A    4
#4    E    6

用错误的方法,

df2[!df2$col1 %in% df1$col1,][!df2$col1[!df2$col1 %in% df1$col1] %in% df3$col1,]
#  col1 col2
#2    B    8

如何避免重复索引?

有没有比下面更好的方法?

df2[!(df2$col1 %in% df1$col1 | df2$col1 %in% df3$col1),]
#  col1 col2
#2    B    8

虽然方法正确,

wget

1 个答案:

答案 0 :(得分:2)

我们可以使用anti_join

library(dplyr)
bind_rows(df1, df3) %>%
           anti_join(df2, ., by = "col1")
#  col1 col2
#1    B    8