Question

我有一个小的N大T面板，我通过plm（面板线性回归模型）估算，具有固定效果。

有没有办法获得新数据集的预测值？（我想要估计我的样本子集的参数，然后使用这些参数计算整个样本的模型隐含值）。

谢谢！

Answer 1

包中有（至少）两种方法可以从plm对象生成估计值：

- fixef.plm：提取固定效果

- pmodel.response：提取model.response

的函数

在我看来，作者对提供“随机效应”的估计不感兴趣。这可能是一个问题“如果你不知道如何自己做，那么我们就不想给你一把锋利的刀来削减你自己太深。”

Answer 2

我写了一个名为predict.out.plm的函数，它可以为原始数据和操作数据集（具有相同的列名）创建预测。

predict.out.plm计算a）转换数据的预测（拟合）结果，b）根据水平结果构建。该函数适用于使用plm的第一差异（FD）估计和固定效应（FE）估计。对于FD，它会随着时间的推移产生不同的结果，对于FE，它会产生时间贬值的结果。

该功能在很大程度上未经测试，可能仅适用于强平衡数据帧。

非常欢迎任何建议和更正。帮助开发一个小R包将非常感激。

功能predict.out.plm

predict.out.plm<-function(
  estimate,
  formula,
  data,
  model="fd",
  pname="y",
  pindex=NULL,
  levelconstr=T
){
  # estimate=e.fe
  # formula=f
  # data=d
  # model="within"
  # pname="y"
  # pindex=NULL
  # levelconstr=T
  #get index of panel data
  if (is.null(pindex) && class(data)[1]=="pdata.frame") {
    pindex<-names(attributes(data)$index)
  } else {
    pindex<-names(data)[1:2]
  }
  if (class(data)[1]!="pdata.frame") { 
    data<-pdata.frame(data)
  }
  #model frame
  mf<-model.frame(formula,data=data)
  #model matrix - transformed data
  mn<-model.matrix(formula,mf,model)

  #define variable names
  y.t.hat<-paste0(pname,".t.hat")
  y.l.hat<-paste0(pname,".l.hat")
  y.l<-names(mf)[1]

  #transformed data of explanatory variables 
  #exclude variables that were droped in estimation
  n<-names(estimate$aliased[estimate$aliased==F])
  i<-match(n,colnames(mn))
  X<-mn[,i]

  #predict transformed outcome with X * beta
  # p<- X %*% coef(estimate)
  p<-crossprod(t(X),coef(estimate))
  colnames(p)<-y.t.hat

  if (levelconstr==T){
    #old dataset with original outcome
    od<-data.frame(
      attributes(mf)$index,
      data.frame(mf)[,1]
    )
    rownames(od)<-rownames(mf) #preserve row names from model.frame
    names(od)[3]<-y.l

    #merge old dataset with prediciton
    nd<-merge(
      od,
      p,
      by="row.names",
      all.x=T,
      sort=F
    )
    nd$Row.names<-as.integer(nd$Row.names)
    nd<-nd[order(nd$Row.names),]

    #construct predicted level outcome for FD estiamtions
    if (model=="fd"){
      #first observation from real data
      i<-which(is.na(nd[,y.t.hat]))
      nd[i,y.l.hat]<-NA
      nd[i,y.l.hat]<-nd[i,y.l]
      #fill values over all years
      ylist<-unique(nd[,pindex[2]])[-1]
      ylist<-as.integer(as.character(ylist))
      for (y in ylist){
        nd[nd[,pindex[2]]==y,y.l.hat]<-
          nd[nd[,pindex[2]]==(y-1),y.l.hat] + 
          nd[nd[,pindex[2]]==y,y.t.hat]
      }
    } 
    if (model=="within"){
      #group means of outcome
      gm<-aggregate(nd[, pname], list(nd[,pindex[1]]), mean)
      gl<-aggregate(nd[, pname], list(nd[,pindex[1]]), length)
      nd<-cbind(nd,groupmeans=rep(gm$x,gl$x))
      #predicted values + group means
      nd[,y.l.hat]<-nd[,y.t.hat] + nd[,"groupmeans"]
    } 
    if (model!="fd" && model!="within") {
      stop('funciton works only for FD and FE estimations')
    }
  }
  #results
  results<-p
  if (levelconstr==T){
    results<-list(results,nd)
    names(results)<-c("p","df")
  }
  return(results)
}

测试功能：

##packages
library(plm)

##test dataframe
#data structure
N<-4
G<-2
M<-5
d<-data.frame(
  id=rep(1:N,each=M),
  year=rep(1:M,N)+2000,
  gid=rep(1:G,each=M*2)
)
#explanatory variable
d[,"x"]=runif(N*M,0,1)
#outcome
d[,"y"] = 2 * d[,"x"] + runif(N*M,0,1)
#panel data frame
d<-pdata.frame(d,index=c("id","year"))

##new data frame for out of sample prediction
dn<-d
dn$x<-rnorm(nrow(dn),0,2)

##estimate
#formula
f<- pFormula(y ~ x + factor(year))
#fixed effects or first difffernce estimation
e<-plm(f,data=d,model="within",index=c("id","year"))
e<-plm(f,data=d,model="fd",index=c("id","year"))
summary(e)

##fitted values of estimation
#transformed outcome prediction 
predict(e)
c(pmodel.response(e)-residuals(e))
predict.out.plm(e,f,d,"fd")$p
# "level" outcome prediciton 
predict.out.plm(e,f,d,"fd")$df$y.l.hat
#both
predict.out.plm(e,f,d,"fd")

##out of sampel prediciton 
predict(e,newdata=d) 
predict(e,newdata=dn) 
# Error in crossprod(beta, t(X)) : non-conformable arguments
# if plm omits variables specified in the formula (e.g. one year in factor(year))
# it tries to multiply two matrices with different length of columns than regressors
# the new funciton avoids this and therefore is able to do out of sample predicitons
predict.out.plm(e,f,dn,"fd")

Answer 3

看起来有一个新的软件包可以为包括plm

在内的各种模型进行样本内预测

https://cran.r-project.org/web/packages/prediction/prediction.pdf

Answer 4

plm现在具有predict.plm()功能，尽管未记录/导出。

还请注意，predict适用于转换后的模型（即在执行了内部/中间/ fd转换之后），而不是原始模型。我推测其原因是在面板数据框架中进行预测更加困难。确实，您需要考虑自己是否在预测：

新时间段，对于现有个人，您是否使用了个人FE？然后，您可以将预测添加到现有的均值中
新时期，对于新个人？然后，您需要弄清楚您要使用哪个个人？
使用随机效应模型会更加复杂，因为效应不容易得出

在下面的代码中，我说明了如何在现有样本上使用拟合值：

library(plm)
#> Loading required package: Formula
library(tidyverse)

data("Produc", package = "plm")
zz <- plm(log(gsp) ~ log(pcap) + log(pc) + log(emp) + unemp,
          data = Produc, index = c("state","year"))


## produce a dataset of prediction, added to the group means
Produc_means <- Produc %>% 
  mutate(y = log(gsp)) %>% 
  group_by(state) %>% 
  transmute(y_mean = mean(y),
            y = y, 
            year = year) %>% 
  ungroup() %>% 
  mutate(y_pred = predict(zz) + y_mean) %>% 
  select(-y_mean)

## plot it
Produc_means %>% 
  gather(type, value, y, y_pred) %>% 
  filter(state %in% toupper(state.name[1:5])) %>% 
  ggplot(aes(x = year, y = value, linetype = type))+
  geom_line() +
  facet_wrap(~state) +
  ggtitle("Visualising in-sample prediction, for 4 states")
#> Warning: attributes are not identical across measure variables;
#> they will be dropped

^{由reprex package（v0.2.1）于2018-11-20创建}

Answer 5

您可以通过residuals(reg_name)计算残差。从这里，您可以从响应变量中减去它们并获得预测值。

R中是否存在PLM的预测函数？

5 个答案: