data.table中合并的问题

时间:2019-01-14 12:29:02

标签: r data.table

当我合并两个data.tables时,两个表中的join_times都不在1小时之前同步。

我通过data.table滚动连接进行了研究,该方法可以正常工作,但并不能保留我想要的data.table 1中的所有行

## convert data frames to data.tables
data_frame1_select1_DT <- data.table(data_frame1_select1)
atr_results1_DT <- data.table(atr_results1)

## create a separate date/time column in each table to join on 
data_frame1_select1_DT[, join_time:=Trade_Time_Stamp]
atr_results1_DT[, join_time:=Time]

## set keys to both data frames 
 setkey(data_frame1_select1_DT, join_time)
 setkey(atr_results1_DT, join_time)

##  merge both data.tables by the time stamp - all set to TRUE to make sure all values from both tables are saved
test_DT1 <- merge(atr_results1_DT, data_frame1_select1_DT, by = "join_time", all = TRUE, allow.cartesian=T)
test_DT1

## sort the data.table in descending order
test_DT2 <- test_DT1[order(-join_time)]

atr_results1_DT:

structure(list(atr = c(0.0501716533252073, 0.0472899885644636, 
0.0519628473409685, 0.0531110120065448), Date = structure(c(1516060800, 
1516060800, 1516060800, 1516060800), class = c("POSIXct", "POSIXt"
), tzone = "UTC"), Time = structure(c(1516122000, 1516120200, 
1516118400, 1516113000), class = c("POSIXct", "POSIXt"), tzone = ""), 
join_time = structure(c(1516122000, 1516120200, 1516118400, 
1516113000), class = c("POSIXct", "POSIXt"), tzone = "")), class = 
c("data.table", 
"data.frame"), row.names = c(NA, -4L), .internal.selfref = <pointer: 
0x0000000002571ef0>)

data_frame1_select1_DT:

structure(list(Trade_Date = structure(c(1516060800, 1516060800
), class = c("POSIXct", "POSIXt"), tzone = "UTC"), Trade_Time_Stamp = 
structure(c(1516122000, 
1516120200), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
B_S = c("S", "B"), Lots_Total = c(5, 2), Avg_Price = c(63.8, 
63.79), join_time = structure(c(1516122000, 1516120200), class = 
c("POSIXct", 
"POSIXt"), tzone = "UTC")), class = c("data.table", "data.frame"
), row.names = c(NA, -2L), .internal.selfref = <pointer: 
0x0000000002571ef0>)

结果-合并根据18:00的join_time映射1行。 data_frame1_select1_DT的join_time是17:00。合并根据join_time 17:30映射第二行。 join_time是data_frame1_select1_DT是16:30。我不明白如何忽略data_frame1_select1_DT的join_time:

structure(list(join_time = structure(c(1516122000, 1516120200, 
1516118400, 1516113000), class = c("POSIXct", "POSIXt"), tzone = ""), 
atr = c(0.0501716533252073, 0.0472899885644636, 0.0519628473409685, 
0.0531110120065448), Date = structure(c(1516060800, 1516060800, 
1516060800, 1516060800), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
Time = structure(c(1516122000, 1516120200, 1516118400, 1516113000
), class = c("POSIXct", "POSIXt"), tzone = ""), Trade_Date = 
structure(c(1516060800, 
1516060800, NA, NA), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
Trade_Time_Stamp = structure(c(1516122000, 1516120200, NA, 
NA), class = c("POSIXct", "POSIXt"), tzone = "UTC"), B_S = c("S", 
"B", NA, NA), Lots_Total = c(5, 2, NA, NA), Avg_Price = c(63.8, 
63.79, NA, NA)), class = c("data.table", "data.frame"), row.names = c(NA, 
-4L), .internal.selfref = <pointer: 0x0000000002571ef0>)

0 个答案:

没有答案