合并两个xts以在某些时间戳创建NA

时间:2016-10-23 15:22:50

标签: r merge xts

我有一个不规则的xts系列,我希望在适当的时间内包括NA。例如,09:15:00,99:16:00,等等,如果这些适当的时间丢失了。我是通过创建另一个带有时间戳的NA系列的系列来实现的。 这是我的系列:

dput(data1)
structure(c(108.4, 108.35, 108.35, 108.4, 108.2, 108.35, 108.25, 
108.25, 108.25, 108.25, 108.25, 108.25, 108.35, 108.3, 108.25, 
108.3, 108.25, 108.25, 108.2, 108.15, 108.2, 108.1, 108.25, 108.25, 
108.1, 108.2, 108.25, 108.15, 108.15, 108.1, 108.2, 108.1, 108.25, 
108.3, 108.25, 108.2, 108.2, 108.2, 108.1, 108.1, 108.1, 108.1, 
108.1, 108.15, 108.15, 108.05, 108.1, 108, 108, 108, 108.1, 108.05, 
108, 108.05, 108, 108.05, 107.9, 107.95, 107.85, 107.85, 107.85, 
107.85, 107.9, 107.9, 107.9, 108, 107.85, 107.9, 107.9, 107.9, 
108, 107.9, 108, 108, 108, 107.95, 107.95, 108, 108, 108, 108, 
107.95, 108, 108, 108, 108.05, 108.05, 108.1, 108.05, 108.1, 
108.1, 108.1, 108.1, 108.1, 108.15, 108.05, 108.1, 108.05, 108.05, 
108.05), .indexCLASS = c("POSIXct", "POSIXt"), .indexTZ = "Asia/Calcutta", tclass = c("POSIXct", 
"POSIXt"), tzone = "Asia/Calcutta", index = structure(c(1459481851, 
1459482301, 1459482303, 1459482304, 1459482305, 1459482306, 1459482307, 
1459482308, 1459482309, 1459482310, 1459482311, 1459482312, 1459482313, 
1459482314, 1459482315, 1459482316, 1459482317, 1459482318, 1459482319, 
1459482320, 1459482321, 1459482322, 1459482323, 1459482324, 1459482325, 
1459482326, 1459482327, 1459482328, 1459482329, 1459482330, 1459482331, 
1459482332, 1459482333, 1459482334, 1459482335, 1459482336, 1459482337, 
1459482339, 1459482340, 1459482341, 1459482342, 1459482343, 1459482344, 
1459482345, 1459482347, 1459482348, 1459482349, 1459482350, 1459482351, 
1459482352, 1459482353, 1459482354, 1459482355, 1459482356, 1459482357, 
1459482358, 1459482360, 1459482361, 1459482362, 1459482363, 1459482364, 
1459482365, 1459482366, 1459482367, 1459482368, 1459482369, 1459482370, 
1459482371, 1459482372, 1459482374, 1459482375, 1459482376, 1459482377, 
1459482378, 1459482379, 1459482380, 1459482381, 1459482382, 1459482384, 
1459482385, 1459482386, 1459482387, 1459482388, 1459482389, 1459482390, 
1459482392, 1459482393, 1459482394, 1459482395, 1459482396, 1459482397, 
1459482398, 1459482399, 1459482401, 1459482402, 1459482403, 1459482404, 
1459482405, 1459482406, 1459482407), tzone = "Asia/Calcutta", tclass = c("POSIXct", 
"POSIXt")), .Dim = c(100L, 1L), .Dimnames = list(NULL, "SYMBOL"), class = c("xts", "zoo"))

NA系列:

    dput(NA.df)
structure(c(NA, NA, NA), class = c("xts", "zoo"), .indexCLASS = c("POSIXct", 
"POSIXt"), tclass = c("POSIXct", "POSIXt"), .indexTZ = structure("Asia/Calcutta", .Names = "TZ"), tzone = structure("Asia/Calcutta", .Names = "TZ"), index = structure(c(1459741500, 
1459741560, 1459741620), tzone = structure("Asia/Calcutta", .Names = "TZ"), tclass = c("POSIXct", 
"POSIXt")), .Dim = c(3L, 1L), .Dimnames = list(NULL, "Empty"))

这是我的代码:

data2<-merge(data1,NA.df, all=TRUE)[,1:ncol(data1)]

结果如下:

dput(data2)
structure(c(108.4, 108.35, 108.35, 108.4, 108.2, 108.35, 108.25, 
108.25, 108.25, 108.25, 108.25, 108.25, 108.35, 108.3, 108.25, 
108.3, 108.25, 108.25, 108.2, 108.15, 108.2, 108.1, 108.25, 108.25, 
108.1, 108.2, 108.25, 108.15, 108.15, 108.1, 108.2, 108.1, 108.25, 
108.3, 108.25, 108.2, 108.2, 108.2, 108.1, 108.1, 108.1, 108.1, 
108.1, 108.15, 108.15, 108.05, 108.1, 108, 108, 108, 108.1, 108.05, 
108, 108.05, 108, 108.05, 107.9, 107.95, 107.85, 107.85, 107.85, 
107.85, 107.9, 107.9, 107.9, 108, 107.85, 107.9, 107.9, 107.9, 
108, 107.9, 108, 108, 108, 107.95, 107.95, 108, 108, 108, 108, 
107.95, 108, 108, 108, 108.05, 108.05, 108.1, 108.05, 108.1, 
108.1, 108.1, 108.1, 108.1, 108.15, 108.05, 108.1, 108.05, 108.05, 
108.05, NA, NA, NA), index = structure(c(1459481851, 1459482301, 
1459482303, 1459482304, 1459482305, 1459482306, 1459482307, 1459482308, 
1459482309, 1459482310, 1459482311, 1459482312, 1459482313, 1459482314, 
1459482315, 1459482316, 1459482317, 1459482318, 1459482319, 1459482320, 
1459482321, 1459482322, 1459482323, 1459482324, 1459482325, 1459482326, 
1459482327, 1459482328, 1459482329, 1459482330, 1459482331, 1459482332, 
1459482333, 1459482334, 1459482335, 1459482336, 1459482337, 1459482339, 
1459482340, 1459482341, 1459482342, 1459482343, 1459482344, 1459482345, 
1459482347, 1459482348, 1459482349, 1459482350, 1459482351, 1459482352, 
1459482353, 1459482354, 1459482355, 1459482356, 1459482357, 1459482358, 
1459482360, 1459482361, 1459482362, 1459482363, 1459482364, 1459482365, 
1459482366, 1459482367, 1459482368, 1459482369, 1459482370, 1459482371, 
1459482372, 1459482374, 1459482375, 1459482376, 1459482377, 1459482378, 
1459482379, 1459482380, 1459482381, 1459482382, 1459482384, 1459482385, 
1459482386, 1459482387, 1459482388, 1459482389, 1459482390, 1459482392, 
1459482393, 1459482394, 1459482395, 1459482396, 1459482397, 1459482398, 
1459482399, 1459482401, 1459482402, 1459482403, 1459482404, 1459482405, 
1459482406, 1459482407, 1459516500, 1459516560, 1459516620), tzone = "Asia/Calcutta", tclass = c("POSIXct", 
"POSIXt")), class = c("xts", "zoo"), .indexCLASS = c("POSIXct", 
"POSIXt"), .indexTZ = "Asia/Calcutta", tclass = c("POSIXct", 
"POSIXt"), tzone = "Asia/Calcutta", .Dim = c(103L, 1L), .Dimnames = list(
    NULL, "SYMBOL"))

但它在9:15:,9:16:00没有NA,依此类推。请帮我弄清楚我在代码中的错误。感谢

1 个答案:

答案 0 :(得分:1)

您的输出是:

> data2<-merge(data1, NA.df)
> tail(data2)
                    SYMBOL Empty
2016-04-01 09:16:45 108.05    NA
2016-04-01 09:16:46 108.05    NA
2016-04-01 09:16:47 108.05    NA
2016-04-04 09:15:00     NA    NA
2016-04-04 09:16:00     NA    NA
2016-04-04 09:17:00     NA    NA

由于你想要NA.df中的NA,要看清楚发生了什么,请尝试这一点,默认情况下将0放在与SYMBOL Empty不匹配的行中而不是默认的NA

> data2<-merge(data1, NA.df, fill = 0)
> tail(data2)
                    SYMBOL Empty
2016-04-01 09:16:45 108.05     0
2016-04-01 09:16:46 108.05     0
2016-04-01 09:16:47 108.05     0
2016-04-04 09:15:00   0.00    NA
2016-04-04 09:16:00   0.00    NA
2016-04-04 09:17:00   0.00    NA

我认为你想要的可能是让NA在同一列中。为此,您需要具有相同的列名称(此处为SYMBOL),并使用rbind,而不是mergemerge将创建两列,就像{{1} ,因为它认为它们是不同的系列,即使列名相同)。

cbind

您的colnames(NA.df) <- "SYMBOL" data2<-rbind(data1, NA.df) > tail(data2, 6) SYMBOL 2016-04-01 09:16:45 108.05 2016-04-01 09:16:46 108.05 2016-04-01 09:16:47 108.05 2016-04-04 09:15:00 NA 2016-04-04 09:16:00 NA 2016-04-04 09:17:00 NA 帧作为您要执行的操作的错误时间戳。

NA.df