根据日期范围合并多个数据帧

时间:2017-10-31 08:01:36

标签: r dataframe merge

  • 我有这个数据

    id<-c("1","3")
    Outcome<-c("Balanced","Balanced")  
    FromDate<-as.Date(c("2016-01-01","2016-01-01"),'%Y-%m-%d')
    ToDate<-as.Date(c("2017-01-01","2017-01-01"),'%Y-%m-%d')
    type<-c("ccc")
    data<-data.frame(id,Outcome,FromDate,ToDate,type) 
    R> data
    
       id  Outcome    FromDate     ToDate     type  
    
       1   Balanced   2016-01-01   2017-01-01  ccc  
       3   Balanced   2016-01-01   2017-01-01  ccc
    
    refno<-c("1","2","1","1")
    sedolnumber<-c("ABC123","XYZ12","ABC123","ZZZ123") 
    order_placement_date<-as.Date(c("2016-02-01","2017-02-05","2017-02-01","2016-04-01"),'%Y-%m-%d')
    units_buyed<-c("1000","200","1000","1000")
    buy<-data.frame(refno,sedolnumber,order_placement_date,units_buyed)
    
    R> buy   
    refno sedolnumber order_placement_date units_buyed
       1      ABC123           2016-02-01        1000
       2       XYZ12           2017-02-05         200
       1      ABC123           2017-02-01        1000
       1      ZZZ123           2016-04-01        1000
    
    refno<-c("1","1")
    sedolnumber<-c("ABC123","ABC123") 
    sell_placement_date<-as.Date(c("2016-05-01","2017-05-01"),'%Y-%m-%d')
    units_sold<-c("500","500")
    sell<-data.frame(refno,sedolnumber,sell_placement_date,units_sold)
    
    R> sell   
    refno sedolnumber sell_placement_date units_sold
      1      ABC123          2016-05-01        500  
      1      ABC123          2017-05-01        500
    
  • 我想根据条件加入所有三个表格,然后再添加一列 Units_Retained ,这将删除列buy.units_buyed-sell.units_sold:

    1. data.id = buy.refno
    2. buy.order_placement_date&gt; = data.FromDate AND buy.order_placement_date&lt; data.ToDate
    3. data.id = sell.refno
    4. buy.sedolnumber = sell.sedolnumber
    5. sell.sell_placement_date&gt; = data.FromDate AND sell.sell_placement_date&lt; data.ToDate
  • 如果units_sold为null,则单位保留列应显示 units_buyed 的值;如果units_buyed和units_sold均为null,则 0

  • 我是用sqldf包做的。 R中是否有任何函数可以在不使用sqldf的情况下实现此目的。 data 表是我的父表,如果找不到买入和卖出表中的匹配记录,则应显示空白值。

    R>sqldf("SELECT a.id,a.outcome,a.FromDate,a.ToDate,a.type,b.sedolnumber,b.order_placement_date,b.units_buyed,c.units_sold,c.sell_placement_date,(b.units_buyed-c.units_sold) as Units_Retained 
     FROM data a LEFT JOIN buy b ON (a.id=b.refno AND b.order_placement_date>=a.FromDate AND b.order_placement_date<a.ToDate) 
     LEFT JOIN sell c ON(a.id=c.refno AND c.sell_placement_date>=a.FromDate AND c.sell_placement_date<a.ToDate AND b.sedolnumber=c.sedolnumber)  ") 
    
    
    
    R>  id  Outcome   FromDate     ToDate   type sedolnumber order_placement_date units_buyed units_sold sell_placement_date Units_Retained
        1    Balanced 2016-01-01 2017-01-01  ccc      ABC123           2016-02-01        1000        500          2016-05-01            500
        1    Balanced 2016-01-01 2017-01-01  ccc      ZZZ123           2016-04-01        1000       <NA>                <NA>             NA
        3    Balanced 2016-01-01 2017-01-01  ccc        <NA>                 <NA>        <NA>       <NA>                <NA>             NA
    

2 个答案:

答案 0 :(得分:3)

您可以使用dplyr动词

library(dplyr)

get_units_retained <- function(units_buyed, units_sold) {

  units_buyed <- as.numeric(as.character(units_buyed))
  units_sold <- as.numeric(as.character(units_sold))

  if_else(is.na(units_buyed), 0, units_buyed) - if_else(is.na(units_sold), 0, units_sold)

}


left_join(data, buy, by = c("id" = "refno")) %>% 
  left_join(sell, by = c("id" = "refno", "sedolnumber")) %>% 
  filter(
      (order_placement_date >= FromDate & order_placement_date < ToDate) | is.na(order_placement_date), 
      (sell_placement_date >= FromDate & sell_placement_date < ToDate) | is.na(sell_placement_date)
  ) %>% 
  mutate(Units_Retained = get_units_retained(units_buyed, units_sold))

答案 1 :(得分:0)

如果使用非常大的数据,则可以使用data.table

require(data.table)
data <- as.data.table(data)
buy <- as.data.table(buy)
sell <- as.data.table(sell)
setkey(data,id)
setkey(buy,refno)
setkey(sell,refno,sedolnumber)
dd <- setkey(data[buy,nomatch = 0],
       id,
       sedolnumber
       )[
         sell,nomatch = 0
         ][
           order_placement_date >= FromDate &  order_placement_date < ToDate &
             sell_placement_date >= FromDate & sell_placement_date < ToDate,
           ][,
             Units_Retained := as.numeric(as.character(units_buyed)) - as.numeric(as.character(units_sold))
             ]