根据来自另一个数据帧的两个条件对一个数据帧进行子集

时间:2019-07-30 02:09:55

标签: r dplyr plyr

我似乎无法基于不同数据帧中的两列来获取DF的子集。

我尝试了以下形式的变化:

test = subset(DF1, First.Name %in% DF2)

test2 = DF1 %>%
  group_by(First.Name) %>%
  filter(Date %in% DF2$Date) %>% 
  filter(First.Name %in% DF2$Participant.ID) %>% 
  ungroup()

我原来的DF:

structure(list(First.Name = c(5021, 5021, 5021, 5021, 5021, 5021
), Last.Name = structure(c(7L, 7L, 7L, 7L, 7L, 7L), .Label = c("116", 
                                                               "G103", "G104", "G105", "G106", "G107", "G108", "G109", "G110", 
                                                               "G111", "G112", "G113", "G114", "G115", "G116", "G117", "G118", 
                                                               "G119", "G120", "G121", "G122", "G123", "G124", "Post", "Post1", 
                                                               "Pre"), class = "factor"), User.ID = c(29610L, 29610L, 29610L, 
                                                                                                      29610L, 29610L, 29610L), Date = structure(c(6L,7L,8L,9L,10,1L), .Label = c("8/14/2018", "8/2/2018","8/3/2018",  "8/4/2018", "8/5/2018", 
                                                                                                                                                                                "8/6/2018", "8/7/2018", "8/8/2018", "8/9/2018","8/10/2018"), class = "factor"), Day.of.Week = structure(c(3L, 
                                                                                                                                                                                                                                                                  10L, 12L, 9L, 2L, 10L), .Label = c("friday   ", "Friday   ", 
                                                                                                                                                                                                                                                                                                     "Monday   ", "saturday ", "Saturday ", "sunday   ", "Sunday   ", 
                                                                                                                                                                                                                                                                                                     "thursday ", "Thursday ", "Tuesday  ", "wednesday", "Wednesday"
                                                                                                                                                                                                                                                                  ), class = "factor")), row.names = c(NA, -6L), class = c("tbl_df", 
                                                                                                                                                                                                                                                                                                                           "tbl", "data.frame"))


我想匹配DF2的第一个数据。

structure(list(Date = structure(c(12L, 12L, 12L, 12L, 12L, 18L, 
18L, 18L, 22L, 22L, 21L, 21L, 24L, 21L, 28L, 27L, 28L, 28L, 28L, 
27L, 27L, 27L, 27L, 1L, 1L), .Label = c("10/19/2018", "10/24/2018", 
"11/1/2018", "11/12/2018", "11/6/2018", "2/14/2019", "2/20/2019", 
"2/26/2019", "2/8/2019", "3/13/2018", "3/14/2018", "3/26/2018", 
"4/15/2019", "4/23/2019", "4/24/2019", "5/17/2019", "5/9/2019", 
"6/1/2018", "6/11/2019", "6/13/2018", "6/21/2018", "6/22/2018", 
"6/26/2018", "6/29/2018", "6/4/2018", "8/15/2018", "8/28/2018", 
"8/9/2018"), class = "factor"), Participant.ID = c(5001, 5002, 
5003, 5004, 5005, 5006, 5007, 5009, 5010, 5011, 5013, 5014, 5015, 
5016, 5017, 5018, 5019, 5020, 5021, 5022, 5023, 5024, 5026, 5027, 
5028)), row.names = c(NA, 25L), class = "data.frame")

我希望基于DF2中的日期和名称,来选择DF1中的行。任何帮助表示赞赏。

2 个答案:

答案 0 :(得分:0)

也许我们需要做一个inner_join

library(dplyr)
inner_join(DF1, DF2, by = c("Date", "First.Name" = "Participant.ID")))

或使用data.table

library(data.table)
setDT(DF10[DF2, on = .(Date, First.Name = Participant.ID)]

答案 1 :(得分:0)

在基数R中,我们可以使用merge

merge(df1, df2, by.x = c("First.Name", "Date"), by.y = c("Participant.ID", "Date"))

或使用%in%过滤行

df1[df1$First.Name %in% df2$Participant.ID & df1$Date %in% df2$Date, ]