我有以下问题:
我有两个这样的数据集:
a <- data.frame(date = c("12.01.2016", "13.01.2016", "14.01.2016"),
company = c("ABBN VX EQUITY", "APL US EQUTIY", "PAY US EQUITY"))
b <- data.frame(date = c("07.01.2016", "08.01.2016", "09.01.2016", "10.01.2016", "11.01.2016", 12.01.2016", "13.01.2016", "14.01.2016", "15.01.2016", "16.01.2016"),
company = c("ABBN VX EQUITY", "ABBN VX EQUITY", "ABBN VX EQUITY", "ABBN VX EQUITY", "ABBN VX EQUITY", "ABBN VX EQUITY", "ABBN VX EQUITY", "ABBN VX EQUITY", "ABBN VX EQUITY", "ABBN VX EQUITY"),
return = c("10%", "15%", "3%", "8%", "12%", "3%", "10%", "4%", "9%", "22%"),
date = c("07.01.2016", "08.01.2016", "09.01.2016", "10.01.2016", "11.01.2016", 12.01.2016", "13.01.2016", "14.01.2016", "15.01.2016", "16.01.2016"),
company = c("APL US EQUITY", "APL US EQUITY", "APL US EQUITY", "APL US EQUITY", "APL US EQUITY", "APL US EQUITY", "APL US EQUITY", "APL US EQUITY", "APL US EQUITY", "APL US EQUITY"),
return = c("11%", "5%", "19%", "4%", "8%", "10%", "7%", "3%", "9%", "11%"),
date = c("07.01.2016", "08.01.2016", "09.01.2016", "10.01.2016", "11.01.2016", 12.01.2016", "13.01.2016", "14.01.2016", "15.01.2016", "16.01.2016"),
company = c("PAY US EQUITY", "PAY US EQUITY", "PAY US EQUITY", "PAY US EQUITY", "PAY US EQUITY", "PAY US EQUITY", "PAY US EQUITY", "PAY US EQUITY", "PAY US EQUITY", "PAY US EQUITY"),
return = c("11%", "5%", "19%", "4%", "8%", "10%", "7%", "3%", "9%", "11%"))
现在我想以输出的方式匹配这些日期帧:
c <- data.frame(date = c("12.01.2016", "13.01.2016", "14.01.2016"),
company = c("ABBN VX EQUITY", "APL US EQUTIY", "PAY US EQUITY"),
return = c("4%", "3%", "3%")
我尝试使用dyplr中的leftjoin,但是在两个值(日期和公司)匹配之后总是给我所有列返回所有列但我只希望每个对返回列。
非常感谢
答案 0 :(得分:2)
我们可以使用dplyr
和tidyr
中的功能。 a2
将是最终输出。请注意,您的示例数据框包含拼写错误。我还建议不要把你的专栏变成因素。请参阅Data Preparation
部分,了解我如何构建示例数据框。
# Load packages
library(dplyr)
library(tidyr)
# Clean data frame b
b2 <- b %>%
select(-date.1, -date.2) %>%
gather(Return_Group, Return, contains("return")) %>%
gather(Company_Group, Company, contains("company")) %>%
filter(sub("return", "", Return_Group) == sub("company", "", Company_Group)) %>%
select(date, company = Company, return = Return)
# Apply left_join
a2 <- a %>%
left_join(b2, by = c("date", "company"))
a <- data.frame(date = c("12.01.2016", "13.01.2016", "14.01.2016"),
company = c("ABBN VX EQUITY", "APL US EQUITY", "PAY US EQUITY"),
stringsAsFactors = FALSE)
b <- data.frame(date = c("07.01.2016", "08.01.2016", "09.01.2016", "10.01.2016", "11.01.2016", "12.01.2016", "13.01.2016", "14.01.2016", "15.01.2016", "16.01.2016"),
company = c("ABBN VX EQUITY", "ABBN VX EQUITY", "ABBN VX EQUITY", "ABBN VX EQUITY", "ABBN VX EQUITY", "ABBN VX EQUITY", "ABBN VX EQUITY", "ABBN VX EQUITY", "ABBN VX EQUITY", "ABBN VX EQUITY"),
return = c("10%", "15%", "3%", "8%", "12%", "3%", "10%", "4%", "9%", "22%"),
date = c("07.01.2016", "08.01.2016", "09.01.2016", "10.01.2016", "11.01.2016", "12.01.2016", "13.01.2016", "14.01.2016", "15.01.2016", "16.01.2016"),
company = c("APL US EQUITY", "APL US EQUITY", "APL US EQUITY", "APL US EQUITY", "APL US EQUITY", "APL US EQUITY", "APL US EQUITY", "APL US EQUITY", "APL US EQUITY", "APL US EQUITY"),
return = c("11%", "5%", "19%", "4%", "8%", "10%", "7%", "3%", "9%", "11%"),
date = c("07.01.2016", "08.01.2016", "09.01.2016", "10.01.2016", "11.01.2016", "12.01.2016", "13.01.2016", "14.01.2016", "15.01.2016", "16.01.2016"),
company = c("PAY US EQUITY", "PAY US EQUITY", "PAY US EQUITY", "PAY US EQUITY", "PAY US EQUITY", "PAY US EQUITY", "PAY US EQUITY", "PAY US EQUITY", "PAY US EQUITY", "PAY US EQUITY"),
return = c("11%", "5%", "19%", "4%", "8%", "10%", "7%", "3%", "9%", "11%"),
stringsAsFactors = FALSE)