R的动物园na.approx

时间:2012-01-29 22:56:13

标签: r xts zoo

我试图通过以下参考链接参考动物园中的na.approx来使用times

http://www.oga-lab.net/RGM2/func.php?rd_id=zoo:na.approx

这是我的代码:

library(tseries)
library(xts)
library(quantmod)
library(ggplot2)
x = read.table("test.dat", header = FALSE, sep="\t", skip=0)

dt<-sprintf("%s %04d",x$A2,x$A4)
dt<-as.POSIXlt(dt,format="%Y-%m-%d %H%M")

y <- data.frame(dt,x$A5)
colnames(y) <- c("date","price")
z <- xts(y[,2],y[,1])
core <- to.minutes(z, OHLC=TRUE, drop.time=FALSE)
colnames(core) <- c("Open","High","Low","Close")
tseq <- seq(start(core),end(core), by = times("00:01:00"))
core <- na.approx(core, xout = tseq)

这是错误消息:

> tseq <- seq(start(core),end(core), by = times("00:01:00"))
Error in seq.POSIXt(start(core), end(core), by = times("00:01:00")) : 
  could not find function "times"

我该如何解决?

如果我用'time'替换它,那么tseq会增加第二个而不是分钟。为什么?

下面是数据文件:

M11 2011-03-10  0   104 365 T   N   N   1
M11 2011-03-10  0   113 365 T   N   N   1
M11 2011-03-10  0   113 365 T   N   N   2
M11 2011-03-10  0   113 365 T   N   N   1
M11 2011-03-10  0   113 365 T   N   N   1
M11 2011-03-10  0   114 360 T   N   N   1
M11 2011-03-10  0   114 360 T   N   N   10
M11 2011-03-10  0   114 360 T   N   N   4
M11 2011-03-10  0   114 360 T   N   N   20
M11 2011-03-10  0   114 360 T   N   N   10
M11 2011-03-10  0   114 360 T   N   N   5
M11 2011-03-10  0   114 360 T   N   N   1
M11 2011-03-10  0   114 360 T   N   N   4
M11 2011-03-10  0   114 360 T   N   N   2
M11 2011-03-10  0   115 355 T   N   N   8
M11 2011-03-10  0   115 355 T   N   N   12
M11 2011-03-10  0   115 355 T   N   N   4
M11 2011-03-10  0   115 355 T   N   N   12
M11 2011-03-10  0   115 355 T   N   N   5
M11 2011-03-10  0   115 355 T   N   N   9
M11 2011-03-10  0   115 355 T   N   N   1
M11 2011-03-10  0   115 355 T   N   N   3
M11 2011-03-10  0   115 355 T   N   N   1
M11 2011-03-10  0   115 355 T   N   N   1
M11 2011-03-10  0   115 355 T   N   N   1
M11 2011-03-10  0   115 350 T   N   N   1
M11 2011-03-10  0   115 350 T   N   N   1
M11 2011-03-10  0   115 345 T   N   N   2
M11 2011-03-10  0   115 345 T   N   N   2
M11 2011-03-10  0   118 345 T   N   N   1
M11 2011-03-10  0   118 345 T   N   N   1
M11 2011-03-10  0   118 345 T   N   N   3
M11 2011-03-10  0   118 345 T   N   N   2
M11 2011-03-10  0   119 345 T   N   N   1
M11 2011-03-10  0   119 345 T   N   N   1
M11 2011-03-10  0   120 345 T   N   N   2
M11 2011-03-10  0   122 350 T   N   N   1
M11 2011-03-10  0   124 355 T   N   N   1
M11 2011-03-10  0   126 355 T   N   N   1

EDIt:我将我的代码编辑为G.Grothendieck发布的内容,因此运行以下内容:

DF <- read.table(text = Lines)
tt <- as.POSIXct(sprintf("%s %04d", DF[[2]], DF[[4]]), format = "%Y-%m-%d %H%M")
x <- xts(DF[[5]], tt)
xm <- to.minutes(x)
tseq <- seq(start(xm), end(xm), by = 60)
xm.x <- na.approx(xm, xout = tseq)

但是,当我运行以下代码时,我收到以下错误:

> apply.daily(xm.x,str(xm.x))
An ‘xts’ object from 2011-03-10 to 2011-06-08 containing:
  Data: num [1:129541, 1:4] 350 350 350 350 350 ...
 - attr(*, "dimnames")=List of 2
  ..$ : NULL
  ..$ : chr [1:4] "x.Open" "x.High" "x.Low" "x.Close"
  Indexed by objects of class: [POSIXct,POSIXt] TZ: 
  xts Attributes:  
 NULL
Error in get(as.character(FUN), mode = "function", envir = envir) : 
  object 'FUN' of mode 'function' was not found

我的错误在哪里:?

编辑:用统计函数替换str并遇到错误:

testFun <- function(d) {
    d <- data.frame(d, rt=seq(1-nrow(d),0));
    statfit <- lm(Close ~ poly(rt,4),d)
    smry<-summary(statfit)
    r_sq<-smry$r.squared
    s <- coef(statfit)
    ctr<-'X'
    nrows<-nrow(d)
    price<-d$Close[nrows]
    sroot <- polyroot(c(s[2],2*s[3],3*s[4],4*s[5]))
    sfun <- function(x) {  ret = 2*s[3]+6*s[4]*x+12*s[5]*x*x;ret;}   
    ret_val <- data.frame(ctr,r_sq,price)
    ret_val <- as. matrix(ret_val)
    ret_val
}
        debug(testFun)

    wfun <- function(w) {

  tr <- try(rollapply(w,width=20,FUN=testFun,by.column=FALSE, align="right"), silent = TRUE)
}                               



 #      is.weekend <- function(x) {
   #    w <- as.POSIXct(x)
    #   w %in% c(0,6)
    #}
    #apply.daily(xm[!is.weekend(index(xm))],wfun)
apply.daily(xm,wfun)

debug: d <- data.frame(d, rt = seq(1 - nrow(d), 0))
debug: statfit <- lm(Close ~ poly(rt, 4), d)
debug: smry <- summary(statfit)
debug: r_sq <- smry$r.squared
debug: s <- coef(statfit)
debug: ctr <- "X"
debug: nrows <- nrow(d)
debug: price <- d$Close[nrows]
debug: sroot <- polyroot(c(s[2], 2 * s[3], 3 * s[4], 4 * s[5]))
debug: sfun <- function(x) {
    ret = 2 * p[3] + 6 * p[4] * x + 12 * p[5] * x * x
    ret
}
debug: ret_val <- data.frame(ctr, r_sq, price)
debug: ret_val <- as.matrix(ret_val)
debug: ret_val
exiting from: FUN(data[posns, ], ...)
Error in coredata.xts(x) : currently unsupported data type
Calls: print ... coredata.xts -> structure -> coredata -> coredata.xts -> .Call
Execution halted
编辑:我意识到我的错误是因为我的函数testFun返回了一个zoo对象而apply.daily需要一个xts对象。

tr <- try(rollapply(w,width=20,FUN=testFun,by.column=FALSE, align="right"), silent = TRUE)
tr
str(tr)

2011-04-07 14:59:00 2011-04-07 15:00:00 2011-04-07 15:01:00 2011-04-07 15:02:00
               TRUE                TRUE                TRUE                TRUE
2011-04-07 15:03:00 2011-04-07 15:04:00 2011-04-07 15:05:00 2011-04-07 15:06:00
               TRUE                TRUE                TRUE                TRUE
2011-04-07 15:07:00 2011-04-07 15:08:00 2011-04-07 15:09:00 2011-04-07 15:10:00
               TRUE                TRUE                TRUE                TRUE
2011-04-07 15:11:00 2011-04-07 15:12:00 2011-04-07 15:13:00 2011-04-07 15:14:00
               TRUE                TRUE                TRUE                TRUE
‘zoo’ series from 2011-03-10 00:19:00 to 2011-04-07 15:14:00
  Data: logi [1:41156] TRUE TRUE TRUE TRUE TRUE TRUE ...
  Index:  POSIXct[1:41156], format: "2011-03-10 00:19:00" "2011-03-10 00:20:00" ...
function (x)
{
    inherits(x, "xts") && is.numeric(.index(x)) && !is.null(indexClass(x))
}
<environment: namespace:xts>

result<-apply.daily(xm,FUN=rollapply(xm,width=20,FUN=testFun,by.column=FALSE, align="right"))

我收到以下错误:

Error in get(as.character(FUN), mode = "function", envir = envir) :
  object 'FUN' of mode 'function' was not found
Calls: apply.daily -> period.apply -> match.fun -> get
Execution halted

但是,如果我修改testFun代码以返回xts对象,我会收到错误。 这是对函数的修订:

ret_val <- data.frame(ctr,r_sq,price)
        ret_val <- as. matrix(ret_val)
ret_val<-as.xts(ret_val)
        ret_val
    }

它产生的错误是:

Error in as.POSIXlt.character(x, tz, ...) :
  character string is not in a standard unambiguous format
Calls: rollapply ... as.POSIXct.default -> as.POSIXct -> as.POSIXlt -> as.POSIXlt.character
Execution halted

有人可以告诉我如何让我的testFun返回一个xts对象而不是一个zoo对象,这样我就可以使用apply.daily。

2 个答案:

答案 0 :(得分:3)

试试这个。

library(quantmod)

Lines <- "M11 2011-03-10  0   104 365 T   N   N   1
...other lines from post...
M11 2011-03-10  0   126 355 T   N   N   1"

DF <- read.table(text = Lines)
tt <- as.POSIXct(sprintf("%s %04d", DF[[2]], DF[[4]]), format = "%Y-%m-%d %H%M")
x <- xts(DF[[5]], tt)
xm <- to.minutes(x)
tseq <- seq(start(xm), end(xm), by = 60)
xm.x <- na.approx(xm, xout = tseq)

答案 1 :(得分:3)

这是解决方法。基本上,您应该在by ="mins"制定中使用seq。并始终记住您使用的时间对象类。您无法混合POSIXltPOSIXct

my.data <- read.table(text="Date,Open,High,Low,Close
2011-08-11 03:00:00,421.20,421.20,421.20,421.20
2011-08-11 03:02:00,421.50,421.40,421.20,422.50
2011-08-11 03:05:00,421.60,421.60,421.50,421.50", header=TRUE, as.is=TRUE,sep = ",")

library(xts)
core <-xts(my.data[,-1],as.POSIXlt(my.data[,1],format="%Y-%m-%d %H:%M:%S") )
tseq <- as.POSIXlt(seq(start(core),end(core), by ="mins"))
core <- na.approx(core, xout = tseq)
core

                        Open     High   Low    Close
2011-08-11 03:00:00 421.2000 421.2000 421.2 421.2000
2011-08-11 03:01:00 421.3500 421.3000 421.2 421.8500
2011-08-11 03:02:00 421.5000 421.4000 421.2 422.5000
2011-08-11 03:03:00 421.5333 421.4667 421.3 422.1667
2011-08-11 03:04:00 421.5667 421.5333 421.4 421.8333
2011-08-11 03:05:00 421.6000 421.6000 421.5 421.5000