我在此data.table
....
取值
Instrument Date
1: DBC 2006-02-08
2: IEF 2002-08-02
3: VEU 2007-03-10
4: VNQ 2004-10-03
5: VTI 2001-06-17
和另一个data.table
.....
DT
Instrument Date Open High Low Close Volume Adjusted
1: DBC 2006-02-06 24.55 24.66 24.10 24.20 771500 22.69
2: DBC 2006-02-07 24.05 24.20 23.45 23.50 674400 22.04
3: DBC 2006-02-08 23.49 23.67 23.33 23.40 335400 21.94
4: DBC 2006-02-09 23.56 23.82 23.51 23.62 705900 22.15
5: DBC 2006-02-10 23.65 23.69 23.10 23.19 276700 21.75
6: IEF 2002-07-31 82.05 82.58 82.05 82.52 32600 55.22
7: IEF 2002-08-01 82.54 82.90 82.52 82.86 71400 55.44
8: IEF 2002-08-02 83.02 83.70 82.90 83.50 120300 55.87
9: IEF 2002-08-05 83.68 83.92 83.53 83.92 159300 56.15
10: IEF 2002-08-06 83.51 83.51 83.03 83.24 90100 55.70
11: VEU 2007-03-08 50.71 51.10 50.71 50.96 113600 43.05
12: VEU 2007-03-09 51.15 51.36 51.02 51.30 139100 43.34
13: VEU 2007-03-12 51.25 51.59 51.20 51.54 108000 43.54
14: VEU 2007-03-13 51.20 51.23 50.17 50.19 216900 42.40
15: VEU 2007-03-14 50.00 50.26 49.39 50.25 154900 42.45
16: VNQ 2004-10-01 50.35 51.25 50.25 51.18 129800 32.84
17: VNQ 2004-10-04 51.39 51.55 51.39 51.40 8100 32.98
18: VNQ 2004-10-05 51.40 51.50 51.35 51.39 11900 32.97
19: VNQ 2004-10-06 51.50 51.63 51.49 51.63 9300 33.12
20: VNQ 2004-10-07 51.65 51.65 51.10 51.10 7900 32.78
21: VTI 2001-06-15 110.85 112.01 110.35 111.33 1067400 44.58
22: VTI 2001-06-18 111.63 111.83 110.62 110.62 282600 44.29
23: VTI 2001-06-19 112.30 112.33 110.47 110.93 1777600 44.42
24: VTI 2001-06-20 110.75 112.15 110.75 112.10 476000 44.88
25: VTI 2001-06-21 112.00 113.45 111.75 113.00 240400 45.24
Instrument Date Open High Low Close Volume Adjusted
...,我希望将 Date 从S
作为每个乐器的起点进行分组:
可重复数据:
S <- structure(list(Instrument = c("DBC", "IEF", "VEU", "VNQ", "VTI"), Date = structure(c(13187, 11901, 13582, 12694, 11490), class = "Date")), row.names =c(NA, -5L), class = c("data.table", "data.frame"), .Names = c("Instrument", "Date"), sorted = c("Instrument", "Date"))
DT <- structure(list(Instrument = c("DBC", "DBC", "DBC", "DBC", "DBC", "IEF", "IEF", "IEF", "IEF", "IEF", "VEU", "VEU", "VEU", "VEU", "VEU", "VNQ", "VNQ", "VNQ", "VNQ", "VNQ", "VTI", "VTI", "VTI", "VTI", "VTI"), Date = structure(c(13185, 13186, 13187, 13188, 13189, 11899,11900, 11901, 11904, 11905, 13580, 13581, 13584, 13585, 13586, 12692, 12695, 12696, 12697, 12698, 11488, 11491, 11492, 11493, 11494), class = c("IDate", "Date")), Open = c(24.55, 24.05, 23.49, 23.56, 23.65, 82.05, 82.54, 83.02, 83.68, 83.51, 50.71, 51.15, 51.25,51.2, 50, 50.35, 51.39, 51.4, 51.5, 51.65, 110.85, 111.63, 112.3, 110.75, 112), High = c(24.66, 24.2, 23.67, 23.82, 23.69, 82.58, 82.9, 83.7, 83.92, 83.51, 51.1, 51.36, 51.59,51.23, 50.26, 51.25, 51.55, 51.5, 51.63, 51.65, 112.01, 111.83, 112.33, 112.15, 113.45), Low = c(24.1, 23.45, 23.33, 23.51, 23.1, 82.05, 82.52, 82.9, 83.53, 83.03, 50.71, 51.02, 51.2, 50.17, 49.39, 50.25, 51.39, 51.35, 51.49, 51.1, 110.35, 110.62, 110.47, 110.75, 111.75), Close = c(24.2, 23.5, 23.4, 23.62, 23.19, 82.52, 82.86, 83.5, 83.92, 83.24, 50.96, 51.3, 51.54, 50.19, 50.25, 51.18, 51.4, 51.39, 51.63, 51.1, 111.33, 110.62, 110.93, 112.1, 113), Volume = c(771500, 674400, 335400, 705900, 276700, 32600, 71400, 120300, 159300, 90100, 113600, 139100, 108000, 216900, 154900, 129800, 8100, 11900, 9300, 7900,1067400, 282600, 1777600, 476000, 240400), Adjusted = c(22.69, 22.04, 21.94, 22.15, 21.75, 55.22, 55.44, 55.87, 56.15, 55.7, 43.05, 43.34, 43.54, 42.4, 42.45, 32.84, 32.98, 32.97, 33.12, 32.78, 44.58, 44.29, 44.42, 44.88, 45.24)), .Names = c("Instrument", "Date", "Open", "High", "Low", "Close", "Volume", "Adjusted"), row.names = c(NA, -25L), class = c("data.table", "data.frame"), sorted = c("Instrument", "Date"))
答案 0 :(得分:2)
您可以合并2套,然后按仪器过滤:
setkey(S,Instrument)
setkey(DT,Instrument)
merge(DT,S)[,.SD[Date.x>=Date.y],Instrument]
Instrument Date.x Open High Low Close Volume Adjusted Date.y
1: DBC 2006-02-08 23.49 23.67 23.33 23.40 335400 21.94 2006-02-08
2: DBC 2006-02-09 23.56 23.82 23.51 23.62 705900 22.15 2006-02-08
3: DBC 2006-02-10 23.65 23.69 23.10 23.19 276700 21.75 2006-02-08
4: IEF 2002-08-02 83.02 83.70 82.90 83.50 120300 55.87 2002-08-02
5: IEF 2002-08-05 83.68 83.92 83.53 83.92 159300 56.15 2002-08-02
6: IEF 2002-08-06 83.51 83.51 83.03 83.24 90100 55.70 2002-08-02
7: VEU 2007-03-12 51.25 51.59 51.20 51.54 108000 43.54 2007-03-10
8: VEU 2007-03-13 51.20 51.23 50.17 50.19 216900 42.40 2007-03-10
9: VEU 2007-03-14 50.00 50.26 49.39 50.25 154900 42.45 2007-03-10
10: VNQ 2004-10-04 51.39 51.55 51.39 51.40 8100 32.98 2004-10-03
11: VNQ 2004-10-05 51.40 51.50 51.35 51.39 11900 32.97 2004-10-03
12: VNQ 2004-10-06 51.50 51.63 51.49 51.63 9300 33.12 2004-10-03
13: VNQ 2004-10-07 51.65 51.65 51.10 51.10 7900 32.78 2004-10-03
14: VTI 2001-06-18 111.63 111.83 110.62 110.62 282600 44.29 2001-06-17
15: VTI 2001-06-19 112.30 112.33 110.47 110.93 1777600 44.42 2001-06-17
16: VTI 2001-06-20 110.75 112.15 110.75 112.10 476000 44.88 2001-06-17
17: VTI 2001-06-21 112.00 113.45 111.75 113.00 240400 45.24 2001-06-17
答案 1 :(得分:2)
以下是@agstudy和@Arun
提供的三种解决方案的总结和基准测试<强>基准强>
require(rbenchmark)
setkey(DT, Instrument, Date) # initial condition
setkey(S, Instrument, Date) # initial condition
agstudy <- function() {
setkey(DT, Instrument)
setkey(S, Instrument)
out <- merge(DT,S)[,.SD[Date.x>=Date.y],Instrument]
out[,Date.y:=NULL]
setnames(out,"Date.x", "Date")
setkey(out, Instrument, Date)
return(out)
}
Arun <- function(){
setkey(S, Instrument)
out <- DT[S, .SD[Date >= i.Date]]
setkey(out, Instrument, Date)
return(out)
}
DanielKrizian <- function(){
setkey(S, Instrument)
out <- DT[S][Date>=Date.1]
setkey(out, Instrument, Date)
out[,Date.1:=NULL]
return(out)
}
setkey(DT, Instrument, Date) # initial condition
setkey(S, Instrument, Date) # initial condition
benchmark(ag <- agstudy(),replications=1000)
setkey(DT, Instrument, Date)
setkey(S, Instrument, Date)
benchmark(ar <- Arun(),replications=1000)
setkey(DT, Instrument, Date)
setkey(S, Instrument, Date)
benchmark(dk <- DanielKrizian(),replications=1000)
<强>输出强>
test replications elapsed relative user.self sys.self user.child sys.child
ag <- agstudy() 1000 9.39 1 8.55 0.07 NA NA
ar <- Arun() 1000 7.16 1 6.53 0.07 NA NA
dk <- DanielKrizian() 1000 2.95 1 2.7 0 NA NA
检查强>
all(identical(ag, ar),identical(ag,dk),identical(ar,dk))
[1]TRUE