Question

我试图生成一段代码，类似于来自zoo / xts的rollapply等函数，但适用于我的需求。我使用一些非常简单的示例数据生成了代码，一切都运行良好。但现在我尝试在edhec数据上运行它，我收到错误。我不清楚为什么但是假设它与if语句有关。有人能够诊断我收到错误的原因吗？

#rm(list=ls()) #Clear environment
cat("\014") #CTRL + L

library(xts)
library(lubridate)

is.even <- function(x) x %% 2 == 0 

roundUp <- function(x,to=2)
{
  to*(x%/%to + as.logical(x%%to))
}

functionTest <- function(data, window, slide){

  nyears_t = nyears(data)

  #IF statement for non-even numbers only
  if(is.even(nyears_t == FALSE)) {
    nyears_t <- roundUp(nyears_t)
    data_extend <- data

    start_extend <- .indexyear(data)[length(data)]+ 1900 + 1
    end_extend <- start_extend + length(data) - 1
    index(data_extend) <- update(index(data),year=start_extend:end_extend)

    data <- rbind(data, data_extend)

    warning("WARNING! The function has looped to the start of the timeseries. The final list(s) 
            will contain years that do not exist in the dataset. Please modify.")
  }

  nslides = nyears_t/slide

  #Matrix
  year_1 = (.indexyear(data)[1]+1900)

  start <- seq(from = year_1, by = slide, length.out = nslides)
  end <- start + window - 1 

  mat <- matrix(c(start, end), ncol = 2, dimnames = list(c(1:nslides), c("start", "end")))

  #For loop
  subsetlist <- vector('list')

  for(i in 1:nslides){
    subset <- data[paste0(mat[i,1], "/", mat[i,2])]
    subsetlist[[i]] <- subset
  }
  print(subsetlist)
}

我在制作上述功能时使用的示例代码：

a <- seq(from = as.POSIXct("2000", format = "%Y"), to = as.POSIXct("2008", format = "%Y"), by = "year")
a <- as.xts(1:length(a), order.by = a)
a

functionTest(data = a, window = 3, slide = 2)

我正在测试并收到错误的示例代码：

> data(edhec, package = "PerformanceAnalytics")
> edhec <- edhec[,1:3]
> edhec <- edhec["/2007"]
> head(edhec)
           Convertible Arbitrage CTA Global Distressed Securities
1997-01-31                0.0119     0.0393                0.0178
1997-02-28                0.0123     0.0298                0.0122
1997-03-31                0.0078    -0.0021               -0.0012
1997-04-30                0.0086    -0.0170                0.0030
1997-05-31                0.0156    -0.0015                0.0233
1997-06-30                0.0212     0.0085                0.0217
> functionTest(data = edhec, window = 3, slide = 2)
 Show Traceback

 Rerun with Debug
 Error in start_extend:end_extend : NA/NaN argument 
>

更新

现在代码运行时对if语句进行了以下更新（感谢Joshua Ulrich）（参见下面的代码）。但是，if语句仍然存在问题 - 无论数据集中是否存在偶数或奇数年，它似乎都会运行。虽然这不会影响函数的准确性，但考虑到大型数据集可能会出现问题。如果有人对此有任何想法，将不胜感激。否则这已经超级了！干杯

if(is.even(nyears_t == FALSE)) {
    nyears_t <- roundUp(nyears_t)
    data_extend <- data

    start_extend <- .indexyear(data)[nrow(data)] + 1900 + 1
    end_extend <- start_extend + nyears(data) - 1

    dates <- index(data)
    tmp <- as.POSIXlt(dates)
    tmp$year <- tmp$year + nyears(data)
    dates2 <- as.POSIXct(tmp, tz = tz)
    index(data_extend) <- dates2

    data <- rbind(data, data_extend)

    warning("WARNING! The function has looped to the start of the timeseries. The final list(s) 
            will contain years that do not exist in the dataset. Please modify.")
  }

Answer 1

在矩阵上调用length（这是xts / zoo对象的coredata）给出了元素的总数（即底层向量的长度）。您应该使用nrow代替。

start_extend <- .indexyear(data)[nrow(data)] + 1900 + 1
end_extend <- start_extend + nrow(data) - 1

如果您不确定data是矩阵还是向量，那么您应该使用NROW而不是nrow。如果nrow是向量，则在向量上调用NULL会返回NROW而length(x)将返回x。

Answer 2

我已经提出了现在具有预期效果的完整答案。谢谢@Joshua的帮助 - 我认为如果没有它我就不能修复代码。为了在大数据上运行它，我不得不进行一些额外的更改。

为了感兴趣，这是我的完整工作代码（减去我的其他自定义函数）：

bootOffset <- function(data, window, slide, tz = "GMT"){

    nyears_t = nyears(data)

  #IF statement for non-even numbers only
  if(is.even(nyears_t) == FALSE) {
    nyears_t <- roundUp(nyears_t)
    data_extend <- data

    start_extend <- .indexyear(data)[nrow(data)] + 1900 + 1
    end_extend <- start_extend + nyears(data) - 1

    dates <- index(data)
      tmp <- as.POSIXlt(dates); tmp$year <- tmp$year + nyears(data)
    dates2 <- as.POSIXct(tmp, tz = tz)

    index(data_extend) <- dates2
    data <- rbind(data, data_extend)
  }

    nslides = nyears_t/slide

    year_1 = (.indexyear(data)[1] + 1900)  

  #Matrix
  start <- seq(from = year_1, by = slide, length.out = nslides); end <- start + window - 1 
  mat <- matrix(c(start, end), ncol = 2, dimnames = list(c(1:nslides), c("start", "end")))

  #For loop
  subsetlist <- vector('list')

  for(i in 1:nslides){
    subset <- window(data, 
                     start = as.POSIXct(paste0(mat[i,1], "-01-01")), 
                     end = as.POSIXct(paste0(mat[i,2], "-12-31")))

    subsetlist[[i]] <- subset
  }
  print(subsetlist)
}

确认这些结果符合要求：

data(edhec, package = "PerformanceAnalytics")
edhec <- edhec[,1:3]
edhec08 <- edhec["/2008"]
edhec07 <- edhec["/2007"]

bootOffset(data = edhec08, #EVEN
                     window = 4,
                     slide = 3)

bootOffset(data = edhec07, #ODD
                     window = 4,
                     slide = 3)

＃

> bootOffset.Check <- function(boot){
+ dates <- lapply(boot, year)
+ dates <- lapply(dates, unique)
+ dates <- lapply(dates, `length<-`, max(lengths(dates)))
+ as.data.frame(dates, 
+ col.names = paste0("boot_", 1:length(boot)))
+ 
+ }
> 
> nyears(edhec08)
[1] 12
> bootOffset.Check(boot08) #EVEN number of years
  boot_1 boot_2 boot_3 boot_4
1   1997   2000   2003   2006
2   1998   2001   2004   2007
3   1999   2002   2005   2008
4   2000   2003   2006     NA
> 
> nyears(edhec07)
[1] 11
> bootOffset.Check(boot07) #ODD number of years
  boot_1 boot_2 boot_3 boot_4
1   1997   2000   2003   2006
2   1998   2001   2004   2007
3   1999   2002   2005   2008
4   2000   2003   2006   2009
>

if语句错误/不应用if语句

2 个答案: