在R中对不平衡面板数据进行多个横截面回归

时间:2013-04-24 22:32:47

标签: r plyr reshape

我有一个不平衡的面板数据集。以下数据可用于说明:

Id <- c(rep(1:4,3),rep(5,2))
Id <- Id[order(Id)]
Year <- c(rep(2000:2002,4),c(2000,2002))

z1 <- rnorm(14)
z2 <- rnorm(14)
z3 <- rnorm(14)
z4 <- rnorm(14)

CORR <- rbind(c(1,0.6,0.5,0.2),c(0.6,1,0.7,0.3),c(0.5,0.7,1,0.4),c(0.2,0.3,0.4,1))
CholCORR <- chol(CORR)
DataTest <- as.data.frame(cbind(z1,z2,z3,z4)%*%CholCORR)
names(DataTest)<-c("y","x1","x2","x3")
DataTest <- cbind(Id, Year, DataTest)
DataTest

   Id Year          y         x1         x2         x3
1   1 2000 -0.7463355 -1.1920928 -1.2358912 -0.2527170
2   1 2001 -0.3475260 -0.1729497 -0.6252036  0.4366446
3   1 2002  0.6815678  0.1775869  0.2860103  0.8479373
4   2 2000  0.7927199  1.2830142  1.7018747  2.4475020
5   2 2001  0.5157535  0.4365100  1.1512340  1.0882581
6   2 2002 -1.2806121  0.3392759  0.5781835  0.2829615
7   3 2000  1.8741119  0.3904028  0.7984584 -1.7015025
8   3 2001 -0.4799960  0.6397883  0.4719150  0.8601328
9   3 2002 -2.2031991  0.3789198 -0.1143526  0.6771387
10  4 2000 -1.0634857 -0.6171644 -0.5732400  0.7718195
11  4 2001  0.3266059 -0.6882776 -0.2013544 -0.7242561
12  4 2002  0.2921222 -0.5149802  0.6180026 -0.4417939
13  5 2000 -2.2447755 -1.3328675 -1.6370130  0.4537451
14  5 2002 -0.9959034 -0.5958327 -0.3408927  0.2162799

我想每年进行一次横截面线性回归(3次回归)并保存估计系数,但由于面板不平衡(Id 5缺少对{{ 1}} 2001)因为我想用Yearlag估算以下动态公式:

diff

我考虑过使用formula(diff(y) ~ lag(x1) + x2 + x3) plmreshape软件包,但是当我想使用plyr时,我找不到有效的方法和lag与我的不平衡小组。

谢谢,

中号

1 个答案:

答案 0 :(得分:2)

我找到了一个非常有效的解决方案。它使用上面split建议的lapplystatquant,但也使用plm包来计算不平衡面板数据集的差异。

library(plm)

Id <- c(rep(1:4,3),rep(5,2))
Id <- Id[order(Id)]
Year <- c(rep(2000:2002,4),c(2000,2002))

z1 <- rnorm(14)
z2 <- rnorm(14)
z3 <- rnorm(14)
z4 <- rnorm(14)

CORR <- rbind(c(1,0.6,0.5,0.2),c(0.6,1,0.7,0.3),c(0.5,0.7,1,0.4),c(0.2,0.3,0.4,1))
CholCORR <- chol(CORR)
DataTest <- as.data.frame(cbind(z1,z2,z3,z4)%*%CholCORR)
names(DataTest)<-c("y","x1","x2","x3")
DataTest <- cbind(Id, Year, DataTest)
DataTest

       Id Year          y         x1         x2          x3   
1-2000  1 2000 -0.3837477  0.3065426  1.0646871  0.23757223   
1-2001  1 2001  1.2804333 -0.2015468 -0.2769726 -0.63032551  
1-2002  1 2002 -0.3242049 -1.3518821 -0.4720256  0.08556161 
2-2000  2 2000 -0.3298273 -0.4354473  0.3588493  0.80121465        
2-2001  2 2001 -0.5556866  1.1987959  1.6196555  1.28593473 
2-2002  2 2002  0.2861269  0.2921481  1.1051309  1.66204274  
3-2000  3 2000  0.9224208  0.4255198  0.8947040  1.11784735         
3-2001  3 2001 -1.1052755 -1.7078627 -1.9503432 -2.82343057 
3-2002  3 2002 -2.3020849 -0.8078460 -0.2692165  0.64940791 
4-2000  4 2000  2.5565427  1.7034472  2.2688046  1.71329610         
4-2001  4 2001  0.7015629  0.8518173 -0.2490498  0.70293713 
4-2002  4 2002  0.9454791  0.8830710  0.7355299  0.51836849  
5-2000  5 2000  0.4867604  0.3301825  0.7929939  0.55197991        
5-2002  5 2002 -0.5859263 -0.1164737  0.5831942 -0.03307241 

pDataTest <- pdata.frame(DataTest, index = c('Id','Year'))


pDataTest$yDiff <- diff(pDataTest$y)
pDataTest

       Id Year          y         x1         x2          x3      yDiff
1-2000  1 2000 -0.3837477  0.3065426  1.0646871  0.23757223         NA
1-2001  1 2001  1.2804333 -0.2015468 -0.2769726 -0.63032551  1.6641810
1-2002  1 2002 -0.3242049 -1.3518821 -0.4720256  0.08556161 -1.6046382
2-2000  2 2000 -0.3298273 -0.4354473  0.3588493  0.80121465         NA
2-2001  2 2001 -0.5556866  1.1987959  1.6196555  1.28593473 -0.2258592
2-2002  2 2002  0.2861269  0.2921481  1.1051309  1.66204274  0.8418135
3-2000  3 2000  0.9224208  0.4255198  0.8947040  1.11784735         NA
3-2001  3 2001 -1.1052755 -1.7078627 -1.9503432 -2.82343057 -2.0276963
3-2002  3 2002 -2.3020849 -0.8078460 -0.2692165  0.64940791 -1.1968093
4-2000  4 2000  2.5565427  1.7034472  2.2688046  1.71329610         NA
4-2001  4 2001  0.7015629  0.8518173 -0.2490498  0.70293713 -1.8549798
4-2002  4 2002  0.9454791  0.8830710  0.7355299  0.51836849  0.2439162
5-2000  5 2000  0.4867604  0.3301825  0.7929939  0.55197991         NA
5-2002  5 2002 -0.5859263 -0.1164737  0.5831942 -0.03307241         NA

Temp <-split(pDataTest,'Year')

MyFormula <- formula(yDiff ~ lag(x1) + x2 + x3)
Fit <- lapply(Temp[-1], lm, formula=MyFormula)
Fit

$`2001`

Call:
FUN(formula = ..1, data = X[[1L]])

Coefficients:
(Intercept)      lag(x1)           x2           x3  
      15.27       -52.88        -1.53        39.17  


$`2002`

Call:
FUN(formula = ..1, data = X[[2L]])

Coefficients:
(Intercept)      lag(x1)           x2           x3  
    -0.8738       0.1038       1.2805       0.1626