我有一个不平衡的面板数据集。以下数据可用于说明:
Id <- c(rep(1:4,3),rep(5,2))
Id <- Id[order(Id)]
Year <- c(rep(2000:2002,4),c(2000,2002))
z1 <- rnorm(14)
z2 <- rnorm(14)
z3 <- rnorm(14)
z4 <- rnorm(14)
CORR <- rbind(c(1,0.6,0.5,0.2),c(0.6,1,0.7,0.3),c(0.5,0.7,1,0.4),c(0.2,0.3,0.4,1))
CholCORR <- chol(CORR)
DataTest <- as.data.frame(cbind(z1,z2,z3,z4)%*%CholCORR)
names(DataTest)<-c("y","x1","x2","x3")
DataTest <- cbind(Id, Year, DataTest)
DataTest
Id Year y x1 x2 x3
1 1 2000 -0.7463355 -1.1920928 -1.2358912 -0.2527170
2 1 2001 -0.3475260 -0.1729497 -0.6252036 0.4366446
3 1 2002 0.6815678 0.1775869 0.2860103 0.8479373
4 2 2000 0.7927199 1.2830142 1.7018747 2.4475020
5 2 2001 0.5157535 0.4365100 1.1512340 1.0882581
6 2 2002 -1.2806121 0.3392759 0.5781835 0.2829615
7 3 2000 1.8741119 0.3904028 0.7984584 -1.7015025
8 3 2001 -0.4799960 0.6397883 0.4719150 0.8601328
9 3 2002 -2.2031991 0.3789198 -0.1143526 0.6771387
10 4 2000 -1.0634857 -0.6171644 -0.5732400 0.7718195
11 4 2001 0.3266059 -0.6882776 -0.2013544 -0.7242561
12 4 2002 0.2921222 -0.5149802 0.6180026 -0.4417939
13 5 2000 -2.2447755 -1.3328675 -1.6370130 0.4537451
14 5 2002 -0.9959034 -0.5958327 -0.3408927 0.2162799
我想每年进行一次横截面线性回归(3次回归)并保存估计系数,但由于面板不平衡(Id
5缺少对{{ 1}} 2001)因为我想用Year
和lag
估算以下动态公式:
diff
我考虑过使用formula(diff(y) ~ lag(x1) + x2 + x3)
,plm
或reshape
软件包,但是当我想使用plyr
时,我找不到有效的方法和lag
与我的不平衡小组。
谢谢,
中号
答案 0 :(得分:2)
我找到了一个非常有效的解决方案。它使用上面split
建议的lapply
和statquant
,但也使用plm
包来计算不平衡面板数据集的差异。
library(plm)
Id <- c(rep(1:4,3),rep(5,2))
Id <- Id[order(Id)]
Year <- c(rep(2000:2002,4),c(2000,2002))
z1 <- rnorm(14)
z2 <- rnorm(14)
z3 <- rnorm(14)
z4 <- rnorm(14)
CORR <- rbind(c(1,0.6,0.5,0.2),c(0.6,1,0.7,0.3),c(0.5,0.7,1,0.4),c(0.2,0.3,0.4,1))
CholCORR <- chol(CORR)
DataTest <- as.data.frame(cbind(z1,z2,z3,z4)%*%CholCORR)
names(DataTest)<-c("y","x1","x2","x3")
DataTest <- cbind(Id, Year, DataTest)
DataTest
Id Year y x1 x2 x3
1-2000 1 2000 -0.3837477 0.3065426 1.0646871 0.23757223
1-2001 1 2001 1.2804333 -0.2015468 -0.2769726 -0.63032551
1-2002 1 2002 -0.3242049 -1.3518821 -0.4720256 0.08556161
2-2000 2 2000 -0.3298273 -0.4354473 0.3588493 0.80121465
2-2001 2 2001 -0.5556866 1.1987959 1.6196555 1.28593473
2-2002 2 2002 0.2861269 0.2921481 1.1051309 1.66204274
3-2000 3 2000 0.9224208 0.4255198 0.8947040 1.11784735
3-2001 3 2001 -1.1052755 -1.7078627 -1.9503432 -2.82343057
3-2002 3 2002 -2.3020849 -0.8078460 -0.2692165 0.64940791
4-2000 4 2000 2.5565427 1.7034472 2.2688046 1.71329610
4-2001 4 2001 0.7015629 0.8518173 -0.2490498 0.70293713
4-2002 4 2002 0.9454791 0.8830710 0.7355299 0.51836849
5-2000 5 2000 0.4867604 0.3301825 0.7929939 0.55197991
5-2002 5 2002 -0.5859263 -0.1164737 0.5831942 -0.03307241
pDataTest <- pdata.frame(DataTest, index = c('Id','Year'))
pDataTest$yDiff <- diff(pDataTest$y)
pDataTest
Id Year y x1 x2 x3 yDiff
1-2000 1 2000 -0.3837477 0.3065426 1.0646871 0.23757223 NA
1-2001 1 2001 1.2804333 -0.2015468 -0.2769726 -0.63032551 1.6641810
1-2002 1 2002 -0.3242049 -1.3518821 -0.4720256 0.08556161 -1.6046382
2-2000 2 2000 -0.3298273 -0.4354473 0.3588493 0.80121465 NA
2-2001 2 2001 -0.5556866 1.1987959 1.6196555 1.28593473 -0.2258592
2-2002 2 2002 0.2861269 0.2921481 1.1051309 1.66204274 0.8418135
3-2000 3 2000 0.9224208 0.4255198 0.8947040 1.11784735 NA
3-2001 3 2001 -1.1052755 -1.7078627 -1.9503432 -2.82343057 -2.0276963
3-2002 3 2002 -2.3020849 -0.8078460 -0.2692165 0.64940791 -1.1968093
4-2000 4 2000 2.5565427 1.7034472 2.2688046 1.71329610 NA
4-2001 4 2001 0.7015629 0.8518173 -0.2490498 0.70293713 -1.8549798
4-2002 4 2002 0.9454791 0.8830710 0.7355299 0.51836849 0.2439162
5-2000 5 2000 0.4867604 0.3301825 0.7929939 0.55197991 NA
5-2002 5 2002 -0.5859263 -0.1164737 0.5831942 -0.03307241 NA
Temp <-split(pDataTest,'Year')
MyFormula <- formula(yDiff ~ lag(x1) + x2 + x3)
Fit <- lapply(Temp[-1], lm, formula=MyFormula)
Fit
$`2001`
Call:
FUN(formula = ..1, data = X[[1L]])
Coefficients:
(Intercept) lag(x1) x2 x3
15.27 -52.88 -1.53 39.17
$`2002`
Call:
FUN(formula = ..1, data = X[[2L]])
Coefficients:
(Intercept) lag(x1) x2 x3
-0.8738 0.1038 1.2805 0.1626