我通过每年进行汇总回归来开发预测模型,因此我有不同的模型系数。我使用lapply(myregressions,coef)将不同的系数存储在列表中,我想使用模型系数对子样本(SS)进行预测。
因此我有一个DF主题(SS)我想做预测,一个“列表”每年有不同的模型系数:
SS
id time inc ta div acc dd neg
9 1 1995 10.3995 234.4126 2.5155 3.6462 2.5155 0
10 1 1996 10.6098 245.7136 2.7936 -3.8121 2.7936 0
11 1 1997 9.9005 271.3418 2.6359 13.7573 2.6359 0
12 1 1998 8.4780 231.6341 2.6580 -19.0560 2.6580 0
13 1 1999 14.2969 293.2106 2.6992 11.7852 2.6992 0
14 1 2000 159.1115 374.4408 2.7745 223.5071 2.7745 0
15 1 2001 -10.2167 315.3258 4.2379 5.8569 4.2379 1
16 1 2002 -10.8737 207.4538 3.2023 -36.4547 3.2023 1
17 1 2003 10.7719 195.9657 3.0622 -12.2817 3.0622 0
18 1 2004 4.6669 269.0332 3.7677 -6.0882 3.7677 0
19 1 2005 5.5542 235.2783 3.9230 -10.7087 3.9230 0
20 1 2006 5.2375 300.8294 3.8623 -18.7482 3.8623 0
21 1 2007 15.1524 305.0057 3.8131 -9.9801 3.8131 0
22 1 2008 20.8307 307.4744 4.3381 -25.4373 4.3381 0
23 1 2009 33.5010 329.3090 5.2710 -16.7450 5.2710 0
24 1 2010 4.5180 344.7620 5.5650 -15.9290 5.5650 0
25 1 2011 5.7310 330.4060 6.1180 -3.2060 6.1180 0
26 1 2012 9.4360 505.8150 2.8510 -14.9710 2.8510 0
73 3 2007 5.4877 240.1564 1.8500 29.3132 1.8500 0
74 3 2008 14.9182 222.5949 0.0000 -76.0916 0.0000 0
以及以下模型系数:
> COEF
Year_1995 Year_1996 Year_1997 Year_1998 Year_1999 Year_2000 Year_2001 Year_2002 Year_2003 Year_2004 Year_2005
(Intercept) -1.416094e+02 -82.29723152 -34.48711235 -6.75285267 39.78186661 51.89597510 23.17918777 6.86072515 5.23465497 -5.08239969 -7.6220984
inc 1.117437e-03 -0.02122991 -0.02947603 -0.05828917 -0.16953822 -0.17572781 0.04419688 0.35492978 0.16529339 0.38122280 0.7805206
ta 9.491843e-02 0.07728032 0.08736790 0.07941649 0.05278938 0.04884008 0.04428136 0.02858724 0.02813507 0.01588882 0.0168884
div -2.110746e+00 -0.41682760 -0.28204986 0.63646277 2.40335454 2.47957711 1.75339550 1.15411677 1.85791345 1.43288590 0.2259217
acc -2.925896e-01 -0.08831186 0.06483823 0.15384923 0.15693459 0.08828990 -0.04028631 -0.04648089 0.03157282 -0.23257706 -0.2823107
dd 8.246035e+01 30.23337379 -23.72154753 -47.38041382 -84.13134965 -95.56086167 -58.06399289 -21.61298500 -12.11352064 -15.77302514 -26.3842782
neg -1.381479e+02 25.15047622 31.05613176 60.76519163 56.06664314 -36.13696534 -27.42185598 -24.58473166 -38.36806501 -25.03186619 17.3447807
Year_2006 Year_2007 Year_2008 Year_2009 Year_2010 Year_2011
(Intercept) -17.46962445 -12.99847554 -32.75957413 -20.99868791 -16.816973260 4.20348639
inc 0.92593004 0.77045742 0.78774688 0.85185356 0.784377210 0.63716695
ta 0.01374169 0.02841329 0.02729613 0.01045620 0.005867683 -0.00203465
div -0.29770950 -0.20114307 -0.18070822 0.04780002 0.310500463 0.69699053
acc -0.23553147 -0.18337198 -0.11535837 -0.12223995 -0.083179505 -0.08493535
dd 12.00464005 -15.31475612 18.57613650 8.13129910 8.855194809 0.86265216
neg 13.84589134 -6.42677325 24.44777629 39.13989889 31.941025536 5.76845025
然而,我对于如何通过代码执行此操作感到很遗憾。
我尝试将时间变量设置为因子(级别),然后以某种方式将其链接到正确的模型(年份),但我没有让它工作。这是我到目前为止所得到的:
as.factor(SS$time)
for (f in levels(SS$time)) {
SS$Forecast<-NA
SS[which(set$time == f),]$Forecast <- c(SS[which(SS$time == f),]$SS[,cbind("inc","ta","div","acc","dd","neg"] %*% COEF$Year_(f)[-1,])
我不知道如何正确处理模型正确年份的列。
进一步说明:这是我正在使用的模型:
## Store each subset regression in myregression(list)
myregression <- list()
count <- 1
## pooled regression in each year t, over the previous 6 years:
for(t in 1995:2011){
myregression[[count]] <- lm(inc.plus1 ~ inc+ta+div+acc+dd+neg,
subset(mydata, time<=t & time>=t-5))
## Name each regression based on the year:
names(myregression)[[count]] = paste0("Year_",t)
count <- count+1
}
编辑:
dput(COEF)
structure(list(Year_1995 = structure(c(-141.609371159809, 0.00111743731163439,
0.0949184274697431, -2.11074600795104, -0.292589586724503, 82.4603460821799,
-138.147911474103), .Names = c("(Intercept)", "inc", "ta", "div",
"acc", "dd", "neg")), Year_1996 = structure(c(-82.2972315187381,
-0.0212299058617464, 0.0772803184315252, -0.416827603793079,
-0.0883118611861919, 30.2333737922913, 25.1504762235072), .Names = c("(Intercept)",
"inc", "ta", "div", "acc", "dd", "neg")), Year_1997 = structure(c(-34.4871123463234,
-0.0294760332129564, 0.0873679009872829, -0.282049864938565,
0.0648382318254867, -23.7215475326697, 31.0561317591284), .Names = c("(Intercept)",
"inc", "ta", "div", "acc", "dd", "neg")), Year_1998 = structure(c(-6.7528526732182,
-0.0582891693780869, 0.0794164909970893, 0.636462774078724, 0.153849232685583,
-47.3804138199355, 60.7651916263095), .Names = c("(Intercept)",
"inc", "ta", "div", "acc", "dd", "neg")), Year_1999 = structure(c(39.7818666097103,
-0.169538215393184, 0.0527893752400966, 2.40335453967921, 0.156934591577236,
-84.1313496511335, 56.0666431355059), .Names = c("(Intercept)",
"inc", "ta", "div", "acc", "dd", "neg")), Year_2000 = structure(c(51.8959751012077,
-0.175727806845858, 0.0488400845863005, 2.4795771094385, 0.0882899033509166,
-95.5608616726431, -36.1369653445477), .Names = c("(Intercept)",
"inc", "ta", "div", "acc", "dd", "neg")), Year_2001 = structure(c(23.1791877732751,
0.0441968784823003, 0.0442813644637717, 1.75339550336409, -0.0402863054679132,
-58.063992891827, -27.4218559824398), .Names = c("(Intercept)",
"inc", "ta", "div", "acc", "dd", "neg")), Year_2002 = structure(c(6.86072514828435,
0.354929778347401, 0.0285872368436264, 1.15411677489571, -0.046480887062018,
-21.6129850032949, -24.5847316572374), .Names = c("(Intercept)",
"inc", "ta", "div", "acc", "dd", "neg")), Year_2003 = structure(c(5.23465497240386,
0.165293389856035, 0.0281350748183987, 1.85791345171875, 0.0315728168554789,
-12.1135206433413, -38.3680650149334), .Names = c("(Intercept)",
"inc", "ta", "div", "acc", "dd", "neg")), Year_2004 = structure(c(-5.08239969344246,
0.38122280421443, 0.0158888214408302, 1.43288589529407, -0.232577057535449,
-15.7730251398318, -25.031866190808), .Names = c("(Intercept)",
"inc", "ta", "div", "acc", "dd", "neg")), Year_2005 = structure(c(-7.62209838055104,
0.780520600545447, 0.0168883991578571, 0.225921710905689, -0.282310746447424,
-26.3842781765152, 17.3447807493898), .Names = c("(Intercept)",
"inc", "ta", "div", "acc", "dd", "neg")), Year_2006 = structure(c(-17.4696244494262,
0.925930038022017, 0.0137416876286758, -0.297709497723217, -0.235531473622334,
12.0046400465099, 13.8458913438642), .Names = c("(Intercept)",
"inc", "ta", "div", "acc", "dd", "neg")), Year_2007 = structure(c(-12.9984755404426,
0.770457416822948, 0.0284132933700557, -0.201143068996775, -0.183371982389497,
-15.3147561235654, -6.42677324935822), .Names = c("(Intercept)",
"inc", "ta", "div", "acc", "dd", "neg")), Year_2008 = structure(c(-32.7595741256977,
0.787746882558884, 0.0272961331794223, -0.180708217282236, -0.115358369479928,
18.5761365038557, 24.4477762931961), .Names = c("(Intercept)",
"inc", "ta", "div", "acc", "dd", "neg")), Year_2009 = structure(c(-20.9986879109346,
0.851853556341307, 0.0104562009202871, 0.0478000168970218, -0.122239947105099,
8.13129909792091, 39.1398988903904), .Names = c("(Intercept)",
"inc", "ta", "div", "acc", "dd", "neg")), Year_2010 = structure(c(-16.8169732598254,
0.784377210327309, 0.00586768328571868, 0.310500462697048, -0.0831795048437002,
8.85519480867171, 31.941025535722), .Names = c("(Intercept)",
"inc", "ta", "div", "acc", "dd", "neg")), Year_2011 = structure(c(4.20348638599851,
0.637166947185806, -0.00203465043623203, 0.696990528517877, -0.0849353510171298,
0.862652164443496, 5.76845024650845), .Names = c("(Intercept)",
"inc", "ta", "div", "acc", "dd", "neg"))), .Names = c("Year_1995",
"Year_1996", "Year_1997", "Year_1998", "Year_1999", "Year_2000",
"Year_2001", "Year_2002", "Year_2003", "Year_2004", "Year_2005",
"Year_2006", "Year_2007", "Year_2008", "Year_2009", "Year_2010",
"Year_2011"))