我有这个数据
id<-c("1","3")
Outcome<-c("Balanced","Balanced")
FromDate<-as.Date(c("2016-01-01","2016-01-01"),'%Y-%m-%d')
ToDate<-as.Date(c("2017-01-01","2017-01-01"),'%Y-%m-%d')
type<-c("ccc")
data<-data.frame(id,Outcome,FromDate,ToDate,type)
R> data
id Outcome FromDate ToDate type
1 Balanced 2016-01-01 2017-01-01 ccc
3 Balanced 2016-01-01 2017-01-01 ccc
refno<-c("1","2","1","1")
sedolnumber<-c("ABC123","XYZ12","ABC123","ZZZ123")
order_placement_date<-as.Date(c("2016-02-01","2017-02-05","2017-02-01","2016-04-01"),'%Y-%m-%d')
units_buyed<-c("1000","200","1000","1000")
buy<-data.frame(refno,sedolnumber,order_placement_date,units_buyed)
R> buy
refno sedolnumber order_placement_date units_buyed
1 ABC123 2016-02-01 1000
2 XYZ12 2017-02-05 200
1 ABC123 2017-02-01 1000
1 ZZZ123 2016-04-01 1000
refno<-c("1","1")
sedolnumber<-c("ABC123","ABC123")
sell_placement_date<-as.Date(c("2016-05-01","2017-05-01"),'%Y-%m-%d')
units_sold<-c("500","500")
sell<-data.frame(refno,sedolnumber,sell_placement_date,units_sold)
R> sell
refno sedolnumber sell_placement_date units_sold
1 ABC123 2016-05-01 500
1 ABC123 2017-05-01 500
我想根据条件加入所有三个表格,然后再添加一列 Units_Retained ,这将删除列buy.units_buyed-sell.units_sold:
如果units_sold为null,则单位保留列应显示 units_buyed 的值;如果units_buyed和units_sold均为null,则 0
我是用sqldf包做的。 R中是否有任何函数可以在不使用sqldf的情况下实现此目的。 data 表是我的父表,如果找不到买入和卖出表中的匹配记录,则应显示空白值。
R>sqldf("SELECT a.id,a.outcome,a.FromDate,a.ToDate,a.type,b.sedolnumber,b.order_placement_date,b.units_buyed,c.units_sold,c.sell_placement_date,(b.units_buyed-c.units_sold) as Units_Retained
FROM data a LEFT JOIN buy b ON (a.id=b.refno AND b.order_placement_date>=a.FromDate AND b.order_placement_date<a.ToDate)
LEFT JOIN sell c ON(a.id=c.refno AND c.sell_placement_date>=a.FromDate AND c.sell_placement_date<a.ToDate AND b.sedolnumber=c.sedolnumber) ")
R> id Outcome FromDate ToDate type sedolnumber order_placement_date units_buyed units_sold sell_placement_date Units_Retained
1 Balanced 2016-01-01 2017-01-01 ccc ABC123 2016-02-01 1000 500 2016-05-01 500
1 Balanced 2016-01-01 2017-01-01 ccc ZZZ123 2016-04-01 1000 <NA> <NA> NA
3 Balanced 2016-01-01 2017-01-01 ccc <NA> <NA> <NA> <NA> <NA> NA
答案 0 :(得分:3)
您可以使用dplyr
动词
library(dplyr)
get_units_retained <- function(units_buyed, units_sold) {
units_buyed <- as.numeric(as.character(units_buyed))
units_sold <- as.numeric(as.character(units_sold))
if_else(is.na(units_buyed), 0, units_buyed) - if_else(is.na(units_sold), 0, units_sold)
}
left_join(data, buy, by = c("id" = "refno")) %>%
left_join(sell, by = c("id" = "refno", "sedolnumber")) %>%
filter(
(order_placement_date >= FromDate & order_placement_date < ToDate) | is.na(order_placement_date),
(sell_placement_date >= FromDate & sell_placement_date < ToDate) | is.na(sell_placement_date)
) %>%
mutate(Units_Retained = get_units_retained(units_buyed, units_sold))
答案 1 :(得分:0)
如果使用非常大的数据,则可以使用data.table
:
require(data.table)
data <- as.data.table(data)
buy <- as.data.table(buy)
sell <- as.data.table(sell)
setkey(data,id)
setkey(buy,refno)
setkey(sell,refno,sedolnumber)
dd <- setkey(data[buy,nomatch = 0],
id,
sedolnumber
)[
sell,nomatch = 0
][
order_placement_date >= FromDate & order_placement_date < ToDate &
sell_placement_date >= FromDate & sell_placement_date < ToDate,
][,
Units_Retained := as.numeric(as.character(units_buyed)) - as.numeric(as.character(units_sold))
]