以下数据:
require(dplyr)
require(ggplot2)
ds <- read.table(header = TRUE, text ="
obs id year attend
1 47 2000 1
2 47 2001 3
3 47 2002 5
4 47 2003 8
5 47 2004 6
6 47 2005 4
7 47 2006 2
8 47 2007 1
9 47 2008 2
10 47 2009 3
11 47 2010 4
12 47 2011 5
")
print(ds)
我想计算线性模型的预测值
linear<- predict(lm(attend ~ year, ds))
quadratic<- predict(lm(attend ~ year + I(year^2),ds))
cubic<- predict(lm(attend ~ year + I(year^2) + I(year^3),ds))
ds<- ds %>% dplyr::mutate(linear=linear, quadratic=quadratic, cubic=cubic)
print(ds)
obs id year attend linear quadratic cubic
1 1 47 2000 1 3.820513 3.500000 3.500000
2 2 47 2001 3 3.792541 3.646853 3.646853
3 3 47 2002 5 3.764569 3.758741 3.758741
4 4 47 2003 8 3.736597 3.835664 3.835664
5 5 47 2004 6 3.708625 3.877622 3.877622
6 6 47 2005 4 3.680653 3.884615 3.884615
7 7 47 2006 2 3.652681 3.856643 3.856643
8 8 47 2007 1 3.624709 3.793706 3.793706
9 9 47 2008 2 3.596737 3.695804 3.695804
10 10 47 2009 3 3.568765 3.562937 3.562937
11 11 47 2010 4 3.540793 3.395105 3.395105
12 12 47 2011 5 3.512821 3.192308 3.192308
问题:尽管时间序列具有明确的立方体形状,但二次和三次预测是相同的。为什么?这是一个错误吗?
答案 0 :(得分:6)
这是因为2011 ^ 3是一个非常大的数字(更大,这导致系数被返回为NA
。如果你检查了模型,你会注意到这一点。 / p>
coef(lm(attend ~ year + I(year^2) + I(year^3),ds))
# (Intercept) year I(year^2) I(year^3)
# -7.025524e+04 7.009441e+01 -1.748252e-02 NA
使用poly
创建正交多项式更为明智
linear<- predict(lm(attend ~ year, ds))
quadratic<- predict(lm(attend ~ poly(year,2),ds))
cubic<- predict(lm(attend ~ poly(year,3),ds))
ds<- (ds %>% dplyr::mutate(linear=linear, quadratic=quadratic, cubic=cubic))
ds
# obs id year attend linear quadratic cubic
# 1 1 47 2000 1 3.820513 3.500000 0.7435897
# 2 2 47 2001 3 3.792541 3.646853 3.8974359
# 3 3 47 2002 5 3.764569 3.758741 5.5128205
# 4 4 47 2003 8 3.736597 3.835664 5.9238539
# 5 5 47 2004 6 3.708625 3.877622 5.4646465
# 6 6 47 2005 4 3.680653 3.884615 4.4693085
# 7 7 47 2006 2 3.652681 3.856643 3.2719503
# 8 8 47 2007 1 3.624709 3.793706 2.2066822
# 9 9 47 2008 2 3.596737 3.695804 1.6076146
# 10 10 47 2009 3 3.568765 3.562937 1.8088578
# 11 11 47 2010 4 3.540793 3.395105 3.1445221
# 12 12 47 2011 5 3.512821 3.192308 5.9487179