我创建了以下函数来确定两个变量的滞后。
但是,这个函数只需要两个参数,我想在整个数据集上运行它:
datSel <- structure(list(stat.resProp.Dwell.4 = c(0.000887705, 0.007954085,
-0.025859667, 0.024097552, 0.114052787, 0.023329207, 0.042143181,
-0.092587287, -0.004050228, -0.001624696, 0.020121403, -0.100502922,
0.057354185, 0.025463388, 0.037409854, 0.001561281, -0.028482938,
-0.004827041, 0.014411779, -0.029034298, 0.021053409, -0.067963182,
0.032070259, -0.038091783, 0.039751534, 0.027802281, -0.027802281,
-0.013355791, 0.009201236, -0.073403679, 0.021277398, -0.033901552,
0.012624153, -0.065733979, 0.032017801, -0.072042665, 0.041936911,
0.002861232, 0.017933468, -0.01698154, 0.006638242, -0.08375153,
-0.007220248, 0.0255507, 0.019980685, 0.013752673, 0.026000502,
-0.021134312, -0.019608471, 0.0166916, -0.021654389, 0.066402455,
0.024828862, -0.083302632, 0.042518482, -0.052439198, 0.037186281,
-0.056311172, -0.012270093), stat.lohn = c(0, -0.007558004, -0.015289567,
0, 0, -0.009609384, -0.019500305, 0, 0, -0.012458015, -0.025391532,
-0.000983501, 0, -0.00165265, -0.003313516, 0.000204576, 0, -0.004898564,
-0.009869709, 0, 0, -0.010574012, -0.021489482, 0, 0, -0.011534651,
-0.023476287, 0, 0, -0.00814845, -0.016498838, 0, 0, -0.0099856,
-0.020275409, -0.002818337, 0, -0.007212389, -0.014582736, 0,
0, -0.004121565, -0.008294445, 0, 0, -0.010766386, -0.021886884,
0, 0, -0.010179741, -0.02067574, 0, 0, -0.011797067, -0.024020039,
-0.002017983, -0.007343864, -0.007398196, -0.014962644), stat.resProp.Dwell.1 = c(0.012777325,
-0.002991775, -0.057819571, -0.00796817, -0.019386714, 0, 0.009740337,
0.005638356, -0.035148694, 0, 0.027084134, -0.160377856, 0.101169235,
-0.043007944, 0.043007944, -0.002580647, -0.015625318, 0.023347364,
0.007662873, -0.09607383, -0.024575906, 0.056733018, -0.000904568,
-0.058703392, 0.011450507, 0.007561473, 0.037879817, -0.032246,
0.042169401, -0.001796946, -0.024580209, -0.148788737, 0.082097362,
-0.000985707, -0.00098668, 0.003940892, -0.049380309, 0.005151995,
0.027371197, -0.025317808, 0.019299736, -0.047382704, -0.010604553,
0.082827084, -0.04516573, 0.003075348, 0.007139245, 0.022111454,
-0.004982571, -0.038701368, 0.018519048, -0.049096021, 0.061254226,
-0.020346582, 0.023363175, -0.00402415, -0.014213437, 0.023245109,
0.027587957), stat.carReg = c(0.022775414, 0.008073857, 0.002624717,
0.169431097, -0.144595366, 0.066716837, -0.086971929, 0.037928208,
0.071752161, -0.046824102, 0.106085873, 0.049965928, -0.057984255,
-0.091650262, 0.090732857, -0.082282389, 0.053376121, -0.044203971,
-0.022855425, 0.025856271, 0.000136493, 0.05579193, -0.293966656,
0.013645739, 0.059732986, 0.187020956, -0.145234848, 0.11041385,
-0.126539687, -0.000949877, 0.031473389, 0.020267816, -0.02180532,
-0.07175183, 0.147500145, -0.040559138, 0.008394819, 0.049045337,
-0.043050615, 0.094358754, -0.058408438, -0.005018402, -0.061717889,
0.100150837, -0.071100417, -0.084393865, 0.002854733, 0.002141389,
-0.026538398, 0.013480513, -0.046002189, -0.030495611, 0.052899746,
0.012842017, 0.064086498, 0.020757573, -0.043441298, -0.009563043,
0.048033848)), .Names = c("stat.resProp.Dwell.4", "stat.lohn",
"stat.resProp.Dwell.1", "stat.carReg"), row.names = c(NA, -59L
), class = "data.frame")
函数和我的函数调用是:
select.lags<-function(x,y,max.lag=8) {
y<-as.numeric(y)
y.lag<-embed(y,max.lag+1)[,-1,drop=FALSE]
x.lag<-embed(x,max.lag+1)[,-1,drop=FALSE]
t<-tail(seq_along(y),nrow(y.lag))
ms=lapply(1:max.lag,function(i) lm(y[t]~y.lag[,1:i]+x.lag[,1:i]))
pvals<-mapply(function(i) anova(ms[[i]],ms[[i-1]])[2,"Pr(>F)"],max.lag:2)
ind<-which(pvals<0.05)[1]
ftest<-ifelse(is.na(ind),1,max.lag-ind+1)
aic<-as.numeric(lapply(ms,AIC))
bic<-as.numeric(lapply(ms,BIC))
structure(list(ic=cbind(aic=aic,bic=bic),pvals=pvals,
selection=list(aic=which.min(aic),bic=which.min(bic),ftest=ftest)))
}
for (i in length(datSel) ) {
for (y in length(datSel) ) {
d1<-ts(datSel[i])
d2<-ts(datSel[y])
lag <- select.lags(d1,d2,5)
}
}
作为延迟的输出我得到:
> lag
$ic
aic bic
[1,] -115.3623 -109.56679
[2,] -114.3370 -106.60972
[3,] -116.2026 -106.54350
[4,] -114.7030 -103.11210
[5,] -112.7153 -99.19253
[6,] -110.8018 -95.34721
[7,] -110.0812 -92.69477
[8,] -110.1427 -90.82446
$pvals
[1] 0.1952302 0.3017934 0.7858944 0.9176337 0.5040079 0.0604511 0.3406657
$selection
$selection$aic
[1] 3
$selection$bic
[1] 1
$selection$ftest
[1] 1
正如您所看到的,我只得到8个结果,但是,我的data.frame有20个变量。
任何建议我做错了什么?
感谢您的回复!
答案 0 :(得分:1)
如果你想要,例如存储AIC标准的结果:
lag.aic.store = matrix(NA, 4, 4)
for (i in 1:length(datSel) ) {
for (y in 1:length(datSel) ) {
d1<-ts(datSel[,i])
d2<-ts(datSel[,y])
lag <- select.lags(d1,d2,5)
lag.store.aic[i,y] = lag$selection$aic
}
}
$ic
中有8个值,因为max.lag
为8,它与您的变量数量无关。
还请注意,为了清晰起见,我在变量索引时添加了逗号,并且必须循环遍历1:length(datSel)
,否则您将只捕获最后一个变量。