均匀化时间序列类型,频率和长度

时间:2018-05-16 12:43:36

标签: r time-series cbind

我试图将我之前创建的一些变量绑定在一起。然而,时间序列的开始和结束日期在变量中并不相等。我试图这样做的方法是通过:

data.start <- c(1960,1)
data.end <- c(2018,2)
data.out <- window(cbind(gdp.log.ger, interest.ger, inflation, inflation.exp.ger),start = data.start, 
                   end = data.end)
write.table(data.out,file = 'InputData/rstar.data.ge.csv', sep = ',',
            col.names = c("gdp.log","inflation","inflation.expectations", "interest"),
            quote = FALSE, na = '.', row.names = FALSE)

我收到的错误:

Error in window.default(cbind(gdp.log.ger, interest.ger, inflation, inflation.exp.ger),  : 
  'start' cannot be after 'end'
In addition: Warning messages:
1: In cbind(gdp.log.ger, interest.ger, inflation, inflation.exp.ger) :
  number of rows of result is not a multiple of vector length (arg 1)
2: In window.default(cbind(gdp.log.ger, interest.ger, inflation, inflation.exp.ger),  :
  'end' value not changed

这可能是因为不同时间序列中的开始日期和结束日期不相同吗?请注意,date.startdate.end已设置为最早和最新的观察结果。

PS。为了重现这个问题:

#------------------------------------------------------------------------------#
# File:        prepare.rstar.data.ger.R
#
# Description: This file prepares the data for Germany to use in the    
#              HLW methodology.
#------------------------------------------------------------------------------#
setwd("/Users/seanbagcik/Dropbox/Master Thesis (2017 - 2018)/R-Code") #set working directory

rm(list = ls()) # clear workspace

if (!require("tis")) {install.packages("tis"); library('tis')} # Load time series library

if (!require("seasonal")) {install.packages("seasonal"); library('seasonal')}
Sys.setenv(X13_PATH = "/Library/Frameworks/R.framework/Versions/3.3/Resources/library/x13binary/bin")

# library('forecast') # for seasonal adjustment
# install.packages("forecast"); 

#------------------------------------------------------------------------------#
# Import raw data: GDP
#------------------------------------------------------------------------------#
gdp.start <- c(1991,1) # type "double"
gdp.end   <- c(2017,4)

gdp.raw <- "rawData/germany_gdp.csv"
gdp.table <- read.table(gdp.raw, skip = 1, header = F, sep = ',', stringsAsFactors = F)
gdp.ger <- ts(gdp.table[,2], start = gdp.start, frequency = 4) # time-series representation

#------------------------------------------------------------------------------#
# Import raw data: inflation
#------------------------------------------------------------------------------#
inflation.start <- c(1960,1)
inflation.end <- c(2018,1)

inflation.raw <- "rawData/germany_inflation.csv"
inflation.table <- read.table(inflation.raw, skip = 1, header = F, sep = ',', stringsAsFactors = F)
inflation.ger <- ts(inflation.table[,2], start = inflation.start, frequency = 4)

inflation.seasadj.ger <- final(seas(as.ts(naWindow(inflation.ger),freq=4))) # seasonal adjustment
inflation.seasadj.ger <- as.tis(cpi,start=inflation.start,tif='quarterly')

# Measure inflation expectations: 4-quarter moving average of past inflation:
inflation.exp.ger <- (inflation.seasadj + Lag(inflation.seasadj, k=1) + Lag(inflation.seasadj, k=2) +
                             Lag(inflation.seasadj, k=3))/4 

#------------------------------------------------------------------------------#
# inflation.fit <- auto.arima(inflation, ic = 'aic') # fit ARIMA model
# plot(forecast(inflation.fit,h=20)) # forecasting
# inflation.seasadj <- seasadj(decompose(inflation.fit, 'multiplicative'))
# inflation.ge <- 400*log(cpi/Lag(cpi, k=1)) # create annual inflation series
#------------------------------------------------------------------------------#

#------------------------------------------------------------------------------#
# Import raw data: short-term nominal interest rate
#------------------------------------------------------------------------------#  
interest.start <- c(1960,2)
interest.end <- c(2018,2)

interest.raw <- 'rawData/germany_interest.csv'
interest.table <- read.table(interest.raw, skip = 1, header = F, sep = ',', stringsAsFactors = F)
interest.m <- ts(interest.table[,2], start = interest.start, frequency = 12) # monthly time-series

interest <- convert(interest.m, tif ="quarterly", observed ="averaged") # monthly to quaterly frequency
interest <- final(seas(as.ts(naWindow(interest),freq=4))) # seasonal adjustment
interest <- as.tis(interest,start=interest.start,tif='quarterly')

interest.ger <- 100*((1+interest/36000)^365 -1) #  365-day annualized basis

#------------------------------------------------------------------------------#
# Prepare Data
#------------------------------------------------------------------------------#

# Take log of real GDP
gdp.log.ger <- log(gdp.ger)

#------------------------------------------------------------------------------#
# Output Data
#------------------------------------------------------------------------------#
data.start <- c(1960,1)
data.end <- c(2018,2)
data.out <- window(cbind(gdp.log.ger, inflation.seasadj.ger, inflation.exp.ger, interest.ger),
                   start = data.start, end = data.end)
write.table(data.out,file = 'InputData/rstar.data.ge.csv', sep = ',',
            col.names = c("gdp.log","inflation","inflation.expectations", "interest"),
            quote = FALSE, na = '.', row.names = FALSE)

使用数据集:R-Data

2 个答案:

答案 0 :(得分:1)

正如您所发现的,核心问题是四个时间序列的类型,频率和长度完全不同。所以解决方案是同质化,但这比我预期的要多一些。通常情况下,这些事情可以更加自动化。

我选择将季度时间序列变为月度,而不是相反。因为它们只用NA填充,但样条/线性/局部插值非常简单。

编辑:通过一些额外的修补,我设法简化了一些事情

library(tis)
library(zoo)
library(xts)
library(devtools)

source_gist("https://gist.github.com/AkselA/942097c99bfa22ddc2e3d68d8a198ab8",
  filename="data_ger.r")

# homogenize data types (all zoo yearmon)
gdp.log.ger.z <- zoo(gdp.log.ger)
index(gdp.log.ger.z) <- as.yearmon(index(gdp.log.ger.z))
inflation.seasadj.ger.z <- as.zooreg(inflation.seasadj.ger, class="yearmon")
inflation.exp.ger.z <- as.zooreg(inflation.exp.ger, class="yearmon")
interest.ger.z <- as.zooreg(interest.ger, class="yearmon")

# quick and dirty merge, brings everything to monthly
mrg <- merge(gdp.log.ger.z, inflation.seasadj.ger.z, 
             inflation.exp.ger.z, interest.ger.z)
mrg <- na.approx(mrg) 
colnames(mrg) <- c("gdp.log", "inflation", "inflation.expectations", "interest")

# aggregate to quarterly
mrg.q <- aggregate(mrg, by=yearqtr, FUN=mean)
rownames(mrg.q) <- NULL

# crop all NA at beginning and end
be <- max(apply(mrg.q, 2, function(x) min(which(!is.na(x)))))
en <- min(apply(mrg.q, 2, function(x) max(which(!is.na(x)))))
mrg.q <- mrg.q[be:en,]

# write csv
write.csv(ll.z, file="data.csv", quote=FALSE, na=".", row.names=FALSE)

# plot
e <- local({ 
   mtext <- function(...) graphics::mtext(..., cex = 0.8) 
   environment(plot.zoo) <-  environment() 
}) 
with(e, plot.zoo)(mrg.q, oma=c(2, 0, 2, 0), cex.axis=0.8) 

enter image description here

答案 1 :(得分:0)

这似乎有效:

#------------------------------------------------------------------------------#
# File:        prepare.rstar.data.ger.R
#
# Description: This file prepares the data for Germany to use in the    
#              HLW methodology.
#------------------------------------------------------------------------------#
setwd("/Users/seanbagcik/Dropbox/Master Thesis (2017 - 2018)/R-Code") #set working directory

rm(list = ls()) # clear workspace

if (!require("tis")) {install.packages("tis"); library('tis')} # Load time series library

Sys.setenv(X13_PATH = "/Library/Frameworks/R.framework/Versions/3.3/Resources/library/x13binary/bin")
if (!require("seasonal")) {install.packages("seasonal"); library('seasonal')}

# library('forecast')
# install.packages("forecast"); 

#------------------------------------------------------------------------------#
# Import raw data: GDP
#------------------------------------------------------------------------------#
gdp.start <- c(1991,1) # type "double"
gdp.end   <- c(2017,4)

gdp.raw <- "rawData/germany_gdp.csv"
gdp.table <- read.table(gdp.raw, skip = 1, header = F, sep = ',', stringsAsFactors = F)
gdp.ger <- ts(gdp.table[,2], start = gdp.start, frequency = 4) # time-series representation

#------------------------------------------------------------------------------#
# Import raw data: inflation
#------------------------------------------------------------------------------#
inflation.start <- c(1960,1)
inflation.end <- c(2018,1)

inflation.raw <- "rawData/germany_inflation.csv"
inflation.table <- read.table(inflation.raw, skip = 1, header = F, sep = ',', stringsAsFactors = F)
inflation.ger <- ts(inflation.table[,2], start = inflation.start, frequency = 4)

inflation.seasadj.ger <- final(seas(as.ts(naWindow(inflation.ger),freq=4))) # seasonal adjustment
inflation.seasadj.ger <- ts(inflation.seasadj.ger, start = inflation.start, frequency = 4)

# Measure inflation expectations: 4-quarter moving average of past inflation:
inflation.exp.ger <- (inflation.seasadj.ger + Lag(inflation.seasadj.ger, k=1) + 
                        Lag(inflation.seasadj.ger, k=2) + Lag(inflation.seasadj.ger, k=3))/4 

#------------------------------------------------------------------------------#
# inflation.fit <- auto.arima(inflation, ic = 'aic') # fit ARIMA model
# plot(forecast(inflation.fit,h=20)) # forecasting
# inflation.seasadj <- seasadj(decompose(inflation.fit, 'multiplicative'))
# inflation.ge <- 400*log(cpi/Lag(cpi, k=1)) # create annual inflation series
#------------------------------------------------------------------------------#

#------------------------------------------------------------------------------#
# Import raw data: short-term nominal interest rate
#------------------------------------------------------------------------------#  
interest.start <- c(1960,2)
interest.end <- c(2018,2)

interest.raw <- 'rawData/germany_interest.csv'
interest.table <- read.table(interest.raw, skip = 1, header = F, sep = ',', stringsAsFactors = F)
interest.m <- ts(interest.table[,2], start = interest.start, frequency = 12) # monthly time-series

interest <- convert(interest.m, tif ="quarterly", observed ="averaged") # monthly to quaterly frequency
interest <- final(seas(as.ts(naWindow(interest),freq=4))) # seasonal adjustment
interest <- ts(interest, start = interest.start, frequency = 4)

interest.ger <- 100*((1+interest/36000)^365 -1) #  365-day annualized basis

#------------------------------------------------------------------------------#
# Prepare Data
#------------------------------------------------------------------------------#

# Take log of real GDP
gdp.log.ger <- log(gdp.ger)

#------------------------------------------------------------------------------#
# Output Data
#------------------------------------------------------------------------------#
# save(gdp.log.ger, inflation.seasadj.ger, inflation.exp.ger, interest.ger, file="data_ger.RData")

data.start <- c(1960,1)
data.end <- c(2018,2)
data.out <- window(cbind(gdp.log.ger, inflation.seasadj.ger, inflation.exp.ger, interest.ger),
                   start = data.start, end = data.end)
write.table(data.out,file = 'InputData/rstar.data.ge.csv', sep = ',',
            col.names = c("gdp.log","inflation","inflation.expectations", "interest"),
           quote = FALSE, na = '.', row.names = FALSE)

DATA