当我合并两个data.tables时,两个表中的join_times都不在1小时之前同步。
我通过data.table滚动连接进行了研究,该方法可以正常工作,但并不能保留我想要的data.table 1中的所有行
## convert data frames to data.tables
data_frame1_select1_DT <- data.table(data_frame1_select1)
atr_results1_DT <- data.table(atr_results1)
## create a separate date/time column in each table to join on
data_frame1_select1_DT[, join_time:=Trade_Time_Stamp]
atr_results1_DT[, join_time:=Time]
## set keys to both data frames
setkey(data_frame1_select1_DT, join_time)
setkey(atr_results1_DT, join_time)
## merge both data.tables by the time stamp - all set to TRUE to make sure all values from both tables are saved
test_DT1 <- merge(atr_results1_DT, data_frame1_select1_DT, by = "join_time", all = TRUE, allow.cartesian=T)
test_DT1
## sort the data.table in descending order
test_DT2 <- test_DT1[order(-join_time)]
atr_results1_DT:
structure(list(atr = c(0.0501716533252073, 0.0472899885644636,
0.0519628473409685, 0.0531110120065448), Date = structure(c(1516060800,
1516060800, 1516060800, 1516060800), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), Time = structure(c(1516122000, 1516120200,
1516118400, 1516113000), class = c("POSIXct", "POSIXt"), tzone = ""),
join_time = structure(c(1516122000, 1516120200, 1516118400,
1516113000), class = c("POSIXct", "POSIXt"), tzone = "")), class =
c("data.table",
"data.frame"), row.names = c(NA, -4L), .internal.selfref = <pointer:
0x0000000002571ef0>)
data_frame1_select1_DT:
structure(list(Trade_Date = structure(c(1516060800, 1516060800
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), Trade_Time_Stamp =
structure(c(1516122000,
1516120200), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
B_S = c("S", "B"), Lots_Total = c(5, 2), Avg_Price = c(63.8,
63.79), join_time = structure(c(1516122000, 1516120200), class =
c("POSIXct",
"POSIXt"), tzone = "UTC")), class = c("data.table", "data.frame"
), row.names = c(NA, -2L), .internal.selfref = <pointer:
0x0000000002571ef0>)
结果-合并根据18:00的join_time映射1行。 data_frame1_select1_DT的join_time是17:00。合并根据join_time 17:30映射第二行。 join_time是data_frame1_select1_DT是16:30。我不明白如何忽略data_frame1_select1_DT的join_time:
structure(list(join_time = structure(c(1516122000, 1516120200,
1516118400, 1516113000), class = c("POSIXct", "POSIXt"), tzone = ""),
atr = c(0.0501716533252073, 0.0472899885644636, 0.0519628473409685,
0.0531110120065448), Date = structure(c(1516060800, 1516060800,
1516060800, 1516060800), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
Time = structure(c(1516122000, 1516120200, 1516118400, 1516113000
), class = c("POSIXct", "POSIXt"), tzone = ""), Trade_Date =
structure(c(1516060800,
1516060800, NA, NA), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
Trade_Time_Stamp = structure(c(1516122000, 1516120200, NA,
NA), class = c("POSIXct", "POSIXt"), tzone = "UTC"), B_S = c("S",
"B", NA, NA), Lots_Total = c(5, 2, NA, NA), Avg_Price = c(63.8,
63.79, NA, NA)), class = c("data.table", "data.frame"), row.names = c(NA,
-4L), .internal.selfref = <pointer: 0x0000000002571ef0>)