
时间:2014-12-14 16:38:43

标签: r data.table


# Input test data
portolios <- structure(list(portfolioid = c(1L, 1L, 1L, 1L, 1L, 1L), secid = c("A", "B", "A", "C", "C", "A"), reportdate = c("2010-03-31", "2010-03-31", "2010-06-30", "2010-06-30", "2010-07-15", "2010-08-31"), report_type = c("Full", "Full", "Full", "Full", "Partial", "Full"), shares = c(100L, 100L, 130L, 50L, 75L, 80L)), .Names = c("portfolioid", "secid", "reportdate", "report_type", "shares"), row.names = c(NA, -6L), class = c("data.table", "data.frame"))

 portfolioid secid reportdate report_type shares
1:           1     A 2010-03-31        Full    100
2:           1     B 2010-03-31        Full    100
3:           1     A 2010-06-30        Full    130
4:           1     C 2010-06-30        Full     50
5:           1     C 2010-07-15     Partial     75
6:           1     A 2010-08-31        Full     80


7:           1    B 2010-06-30       Full       0
8:           1    C 2010-08-31       Full       0

业务问题是有时不会报告完整report_type的职位销售(份额= 0),因此必须根据先前的报告估算缺少的SecID。


changes <- structure(list(portfolioid = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), secid = c("A", "B", "A", "B", "C", "C", "A", "C"), reportdate = c("2010-03-31", "2010-03-31", "2010-06-30", "2010-06-30", "2010-06-30", "2010-07-15", "2010-08-31", "2010-08-31"), report_type = c("Full", "Full", "Full", "Full", "Full", "Partial", "Full", "Full"), shares = c(100L, 100L, 130L, 0L, 50L, 75L, 80L, 0L), change = c(100L, 100L, 30L, -100L, 50L, 25L, -50L, -75L)), .Names = c("portfolioid", "secid", "reportdate", "report_type", "shares", "change"), row.names = c(NA, -8L), class = c("data.table", "data.frame"))

   portfolioid secid reportdate report_type shares change
1:           1     A 2010-03-31        Full    100    100
2:           1     B 2010-03-31        Full    100    100
3:           1     A 2010-06-30        Full    130     30
4:           1     B 2010-06-30        Full      0   -100
5:           1     C 2010-06-30        Full     50     50
6:           1     C 2010-07-15     Partial     75     25
7:           1     A 2010-08-31        Full     80    -50
8:           1     C 2010-08-31        Full      0    -75

坚持如何为外部联接投资组合创建 i [i]。我的问题是我不想使用i <- CJ(reportdate, secid),因为它会产生太多不必要的记录,因为并非每个ReportD都存在每个secid而且没有正确表示需要填充的数据。



我想向前滚动secid并设置共享:= 0当完整报告中缺少secid但它在先前报告中存在(部分或完整)。我相信我会选择 roll = 1 ,但我不确定在何处或如何实施。


How to Calculate a rolling statistic in R using data.table on unevenly spaced data

我确信我错过了一些基本的理解或CJ()技巧,可以创建必要的 i

1 个答案:

答案 0 :(得分:1)



portolios[, reportdate := as.IDate(reportdate)]
uniq.dts <- unique(portolios$reportdate)
uniq.dts <- uniq.dts[order(uniq.dts)]



impute <- portolios[portolios, {
      tmp = max(reportdate) < uniq.dts;


portolios <- rbindlist(list(portolios,impute),fill=TRUE)

#Order data by secid and reportdate
portolios <- portolios[order(secid,reportdate)]

#Lag data by group
portolios[, prev.shares := c(NA,lag(shares)), by=secid]

#Calculate change WHEN a previous share amount exists
portolios[, change := ifelse(is.na(prev.shares),shares,shares-prev.shares), by=secid]

   portfolioid secid reportdate report_type shares prev.shares change
1:           1     A 2010-03-31        Full    100          NA    100
2:           1     B 2010-03-31        Full    100          NA    100
3:           1     A 2010-06-30        Full    130         100     30
4:           1     B 2010-06-30        Full      0         100   -100
5:           1     C 2010-06-30        Full     50          NA     50
6:           1     C 2010-07-15     Partial     75          50     25
7:           1     A 2010-08-31        Full     80         130    -50
8:           1     C 2010-08-31        Full      0          75    -75