我似乎无法基于不同数据帧中的两列来获取DF的子集。
我尝试了以下形式的变化:
test = subset(DF1, First.Name %in% DF2)
test2 = DF1 %>%
group_by(First.Name) %>%
filter(Date %in% DF2$Date) %>%
filter(First.Name %in% DF2$Participant.ID) %>%
ungroup()
我原来的DF:
structure(list(First.Name = c(5021, 5021, 5021, 5021, 5021, 5021
), Last.Name = structure(c(7L, 7L, 7L, 7L, 7L, 7L), .Label = c("116",
"G103", "G104", "G105", "G106", "G107", "G108", "G109", "G110",
"G111", "G112", "G113", "G114", "G115", "G116", "G117", "G118",
"G119", "G120", "G121", "G122", "G123", "G124", "Post", "Post1",
"Pre"), class = "factor"), User.ID = c(29610L, 29610L, 29610L,
29610L, 29610L, 29610L), Date = structure(c(6L,7L,8L,9L,10,1L), .Label = c("8/14/2018", "8/2/2018","8/3/2018", "8/4/2018", "8/5/2018",
"8/6/2018", "8/7/2018", "8/8/2018", "8/9/2018","8/10/2018"), class = "factor"), Day.of.Week = structure(c(3L,
10L, 12L, 9L, 2L, 10L), .Label = c("friday ", "Friday ",
"Monday ", "saturday ", "Saturday ", "sunday ", "Sunday ",
"thursday ", "Thursday ", "Tuesday ", "wednesday", "Wednesday"
), class = "factor")), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
我想匹配DF2的第一个数据。
structure(list(Date = structure(c(12L, 12L, 12L, 12L, 12L, 18L,
18L, 18L, 22L, 22L, 21L, 21L, 24L, 21L, 28L, 27L, 28L, 28L, 28L,
27L, 27L, 27L, 27L, 1L, 1L), .Label = c("10/19/2018", "10/24/2018",
"11/1/2018", "11/12/2018", "11/6/2018", "2/14/2019", "2/20/2019",
"2/26/2019", "2/8/2019", "3/13/2018", "3/14/2018", "3/26/2018",
"4/15/2019", "4/23/2019", "4/24/2019", "5/17/2019", "5/9/2019",
"6/1/2018", "6/11/2019", "6/13/2018", "6/21/2018", "6/22/2018",
"6/26/2018", "6/29/2018", "6/4/2018", "8/15/2018", "8/28/2018",
"8/9/2018"), class = "factor"), Participant.ID = c(5001, 5002,
5003, 5004, 5005, 5006, 5007, 5009, 5010, 5011, 5013, 5014, 5015,
5016, 5017, 5018, 5019, 5020, 5021, 5022, 5023, 5024, 5026, 5027,
5028)), row.names = c(NA, 25L), class = "data.frame")
我希望基于DF2中的日期和名称,来选择DF1中的行。任何帮助表示赞赏。
答案 0 :(得分:0)
也许我们需要做一个inner_join
library(dplyr)
inner_join(DF1, DF2, by = c("Date", "First.Name" = "Participant.ID")))
或使用data.table
library(data.table)
setDT(DF10[DF2, on = .(Date, First.Name = Participant.ID)]
答案 1 :(得分:0)
在基数R中,我们可以使用merge
merge(df1, df2, by.x = c("First.Name", "Date"), by.y = c("Participant.ID", "Date"))
或使用%in%
过滤行
df1[df1$First.Name %in% df2$Participant.ID & df1$Date %in% df2$Date, ]