我正在尝试一次对多个变量进行单位根测试。
我尝试了以下内容:
> library(tseries)
> #Unit Root Test
> data1 <- data.frame(data$Date_Quandl, data$GDP_Quaterly, data$Employment_Rate)
> dput(data1)
structure(list(data.Date_Quandl = structure(c(60L, 30L, 15L,
45L, 59L, 29L, 14L, 44L, 58L, 28L, 13L, 43L, 57L, 27L, 12L, 42L,
56L, 26L, 11L, 41L, 55L, 25L, 10L, 40L, 54L, 24L, 9L, 39L, 53L,
23L, 8L, 38L, 52L, 22L, 7L, 37L, 51L, 21L, 6L, 36L, 50L, 20L,
5L, 35L, 49L, 19L, 4L, 34L, 48L, 18L, 3L, 33L, 47L, 17L, 2L,
32L, 46L, 16L, 1L, 31L), .Label = c("30.06.2000", "30.06.2001",
"30.06.2002", "30.06.2003", "30.06.2004", "30.06.2005", "30.06.2006",
"30.06.2007", "30.06.2008", "30.06.2009", "30.06.2010", "30.06.2011",
"30.06.2012", "30.06.2013", "30.06.2014", "30.09.2000", "30.09.2001",
"30.09.2002", "30.09.2003", "30.09.2004", "30.09.2005", "30.09.2006",
"30.09.2007", "30.09.2008", "30.09.2009", "30.09.2010", "30.09.2011",
"30.09.2012", "30.09.2013", "30.09.2014", "31.03.2000", "31.03.2001",
"31.03.2002", "31.03.2003", "31.03.2004", "31.03.2005", "31.03.2006",
"31.03.2007", "31.03.2008", "31.03.2009", "31.03.2010", "31.03.2011",
"31.03.2012", "31.03.2013", "31.03.2014", "31.12.2000", "31.12.2001",
"31.12.2002", "31.12.2003", "31.12.2004", "31.12.2005", "31.12.2006",
"31.12.2007", "31.12.2008", "31.12.2009", "31.12.2010", "31.12.2011",
"31.12.2012", "31.12.2013", "31.12.2014"), class = "factor"),
data.GDP_Quaterly = c(17703.7, 17599.8, 17328.2, 17044, 17078.3,
16872.3, 16619.2, 16502.4, 16332.5, 16268.9, 16094.7, 15956.5,
15785.3, 15587.1, 15460.9, 15238.4, 15230.2, 15057.7, 14888.6,
14681.1, 14566.5, 14384.1, 14340.4, 14383.9, 14549.9, 14843,
14813, 14668.4, 14685.3, 14569.7, 14422.3, 14233.2, 14066.4,
13908.5, 13799.8, 13648.9, 13381.6, 13205.4, 12974.1, 12813.7,
12562.2, 12367.7, 12181.4, 11988.4, 11816.8, 11625.1, 11370.7,
11230.1, 11103.8, 11037.1, 10934.8, 10834.4, 10701.3, 10639.5,
10638.4, 10508.1, 10472.3, 10357.4, 10278.3, 10031), data.Employment_Rate = c(71.0619,
70.9383, 71.162, 71.138, 71.2286, 71.5095, 71.565, 71.3246,
71.4963, 71.3738, 71.4276, 71.3065, 71.0246, 71.3244, 71.0619,
70.9811, 71.2149, 70.8342, 70.5568, 70.5444, 70.3286, 70.179,
70.2555, 70.5103, 70.8038, 70.6748, 70.9769, 70.6988, 70.2125,
70.1661, 69.6284, 69.5613, 68.9837, 68.8606, 68.4223, 67.963,
67.6293, 67.5905, 67.1857, 67.1248, 66.7075, 66.5857, 66.4303,
66.2826, 68.7514, 68.8897, 69.0824, 68.9718, 68.7927, 68.6387,
68.8053, 68.7286, 68.4141, 68.2357, 68.4785, 68.4171, 68.4782,
68.3978, 68.5344, 68.4772)), .Names = c("data.Date_Quandl",
"data.GDP_Quaterly", "data.Employment_Rate"), row.names = c(NA,
-60L), class = "data.frame")
> apply(data1,data1[1:2],function(x){ adf.test(x,k=0) })
Error in ds[-MARGIN] : invalid subscript type 'list'
In addition: Warning message:
In Ops.factor(left) : ‘-’ not meaningful factors
应用功能无法正常工作。
任何建议我做错了什么?
答案 0 :(得分:3)
这是你想要获得的吗?
res <- sapply(data1[2:3],function(x){ adf.test(x,k=0) })
#> res
# data.GDP_Quaterly data.Employment_Rate
#statistic -1.198207 -1.601795
#parameter 0 0
#alternative "stationary" "stationary"
#p.value 0.8988656 0.7357338
#method "Augmented Dickey-Fuller Test" "Augmented Dickey-Fuller Test"
#data.name "x" "x"
P.S。:您确定数据的顺序正确吗?您的数据描述了一个时间序列,其值按逆时间顺序存储(最近的条目优先)。
#> head(data1)
# data.Date_Quandl data.GDP_Quaterly data.Employment_Rate
#1 31.12.2014 17703.7 71.0619
#2 30.09.2014 17599.8 70.9383
#3 30.06.2014 17328.2 71.1620
#4 31.03.2014 17044.0 71.1380
#5 31.12.2013 17078.3 71.2286
#6 30.09.2013 16872.3 71.5095
我怀疑逆转此顺序可能是一个好主意。这可以通过以下方式完成:
dat2<- data1[rev(rownames(data1)),]
rownames(dat2) <- c(1:nrow(data1))
res <- sapply(dat2[2:3],function(x){ adf.test(x,k=0) })
现在我们有了
#> head(dat2)
# data.Date_Quandl data.GDP_Quaterly data.Employment_Rate
#1 31.03.2000 10031.0 68.4772
#2 30.06.2000 10278.3 68.5344
#3 30.09.2000 10357.4 68.3978
#4 31.12.2000 10472.3 68.4782
#5 31.03.2001 10508.1 68.4171
#6 30.06.2001 10638.4 68.4785
,结果不同:
#> res
# data.GDP_Quaterly data.Employment_Rate
#statistic -1.062353 -1.835968
#parameter 0 0
#alternative "stationary" "stationary"
#p.value 0.9207886 0.6410799
#method "Augmented Dickey-Fuller Test" "Augmented Dickey-Fuller Test"
#data.name "x" "x"
希望这有帮助。
答案 1 :(得分:1)
apply
, MARGIN
将有效。假设你想按列循环
apply(data1[-1], MARGIN = 2, adf.test, k=0)
#$data.GDP_Quaterly
# Augmented Dickey-Fuller Test
#data: newX[, i]
#Dickey-Fuller = -1.1982, Lag order = 0, p-value = 0.8989
#alternative hypothesis: stationary
#$data.Employment_Rate
# Augmented Dickey-Fuller Test
#data: newX[, i]
#Dickey-Fuller = -1.6018, Lag order = 0, p-value = 0.7357
# alternative hypothesis: stationary
如果是反方向
apply(dat2[-1], 2, adf.test, k=0) #@RHertel's data