Question

我试图在R中编写一些非常简单的东西（我认为），但我似乎无法做到正确。我有一个50个国家（1到50）的数据集，每个国家15年，每个国家大约20个变量。现在我只在我的因变量（OS）上测试一个变量（SMD）。我想用循环国家/地区来做这个，所以我会得到每个国家的产出而不是总产出。

我认为首先创建一个子集是明智的（能够首先查看国家1，然后我的循环应该增加国家和测试国家2的数量）。我相信我在页面底部的回归应该会给出国家1的输出，而不是整个数据集的整体得分。但是我不断收到这些错误：

> pdata <- plm.data(newdata, index=c("Country","Date"))
  series    are constants and have been removed
> pooling <- plm(Y ~ X, data=pdata, model= "pooling") 
  series Country, xRegion are constants and have been removed
  Error in model.matrix.pFormula(formula, data, rhs = 1, model = model,  : 
  NA in the individual index variable
> summary(pooling)
  Error in summary(pooling) : object 'pooling' not found

我可能会看到这一切都错了，但我相信如果没有让它工作，那么进一步编程循环本身是没有意义的。关于解决我的错误或其他编程循环的方法的任何建议都非常感谢。

我的代码：

rm(list = ls())
mydata <- read.table(file = file.choose(), header = TRUE, dec = ",")
names(mydata)
attach(mydata)

Y <- cbind(SMD)
X <- cbind(OS)

newdata <- subset(mydata, Country %in% c(1))

newdata

pdata <- plm.data(newdata, index=c("Country","Date"))
pooling <- plm(Y ~ X, data=pdata, model= "pooling") 
summary(pooling)

编辑：导致相同错误的前2个国家/地区的数据样本

dput（MYDATA）结构（列表（区域=结构）（c（1L，1L，1L，1L，1L，1L，1L， 1L，1L，1L，1L，1L，1L，1L，1L，1L，1L，1L，1L，1L，2L，2L，2L， 2L，2L，2L，2L，2L，2L，2L，2L，2L，2L，2L，2L，2L，2L，2L，2L， 2L）,. Label = c（＆＃34; NAF＆＃34;，＆＃34; SAME＆＃34;），class =＆＃34; factor＆＃34;），Country = c（1L， 1L，1L，1L，1L，1L，1L，1L，1L，1L，1L，1L，1L，1L，1L，1L，1L， 1L，1L，1L，2L，2L，2L，2L，2L，2L，2L，2L，2L，2L，2L，2L，2L， 2L，2L，2L，2L，2L，2L，2L），日期= c（1995L，1996L，1997L，1998L， 1999L，2000L，2001L，2002L，2003L，2004L，2005L，2006L，2007L， 2008L，2009L，2010L，2011L，2012L，2013L，2014L，1995L，1996L， 1997L，1998L，1999L，2000L，2001L，2002L，2003L，2004L，2005L， 2006L，2007L，2008L，2009L，2010L，2011L，2012L，2013L，2014L ），OS =结构（c（19L，25L，27L，15L，22L，20L，23L，9L，7L， 5L，2L，1L，4L，3L，6L，10L，11L，13L，11L，8L，26L，25L，31L， 29L，28L，21L，30L，24L，24L，16L，11L，14L，12L，17L，18L，29L， 32L，32L，33L，34L）,.标签= c（＆＃34; 51.5＆＃34;，＆＃34; 52.2＆＃34;，＆＃34; 55.6＆＃34;，＆＃34; 56.4＆＃34 ;, ＆＃34; 56.7＆＃34;，＆＃34; 57.7＆＃34;，＆＃34; 57.8＆＃34;，＆＃34; 58.3＆＃34;，＆＃34; 59＆＃34;，＆＃34; 59.2＆＃34;，＆＃34; 59.6＆＃34;，＆＃34; 59.9＆＃34;，＆＃34; 60.2＆＃34;，＆＃34; 60.4＆＃34;，＆＃34; 61.1＆＃34;，＆＃34; 61.2＆＃34;，＆＃34; 62.2＆＃34;，＆＃34; 62.3＆＃34;，＆＃34; 62.8＆＃34;，＆＃34; 63.2＆＃34;，＆＃34; 63.3＆＃34;，＆＃34; 63.8＆＃34;，＆＃34; 63.9＆＃34;，＆＃34; 64.2＆＃34;，＆＃34; 64.3＆＃34;，＆＃34; 64.5＆＃34;，＆＃34; 64.7＆＃34;，＆＃34; 65.3＆＃34;，＆＃34; 65.5＆＃34;，＆＃34; 65.6＆＃34;，＆＃34; 66.4＆＃34;，＆＃34; 68＆＃34;，＆＃34; 69.6＆＃34;，＆＃34; 70.7＆＃34;），class =＆＃34; factor＆＃34;）， SMD =结构（c（7L，12L，20L，21L，17L，15L，13L，10L， 14L，22L，23L，33L，1L，32L，29L，34L，28L，25L，NA，NA， 9L，6L，8L，4L，2L，35L，3L，36L，5L，11L，16L，18L，24L， 19L，26L，31L，27L，30L，NA，NA）,. Label = c（＆＃34; 100.3565662＆＃34;，＆＃34; 13.44788845＆＃34;，＆＃34; 13.45858747＆＃34;，＆＃34; 13.56815534＆＃34;，＆＃34; 15.05892471＆＃34;，＆＃34; 17.63789658＆＃34;，＆＃34; 18.04088718＆＃34;，＆＃34; 18.3101351＆＃34;，＆＃34; 19.34226196＆＃34;，＆＃34; 21.25530884＆＃34;，＆＃34; 21.54423145＆＃34;，＆＃34; 23.75898948＆＃34;，＆＃34; 24.08770926＆＃34;，＆＃34; 26.39817342＆＃34;，＆＃34; 29.44079001＆＃34;，＆＃34; 31.40605191＆＃34;，＆＃34; 34.46667996＆＃34;，＆＃34; 34.52913657＆＃34;，＆＃34; 35.66070947＆＃34;，＆＃34; 36.4419931＆＃34;，＆＃34; 39.16875621＆＃34;，＆＃34; 44.0126137＆＃34;，＆＃34; 45.72949566＆＃34;，＆＃34; 49.13062679＆＃34;，＆＃34; 54.83730247＆＃34;，＆＃34; 56.87886311＆＃34;，＆＃34; 59.80971583＆＃34;，＆＃34; 60.5658962＆＃34;，＆＃34; 69.20148901＆＃34;，＆＃34; 70.91362874＆＃34;，＆＃34; 72.64845214＆＃34;，＆＃34; 73.97139238＆＃34;，＆＃34; 75.20140919＆＃34;，＆＃34; 76.18378138＆＃34;，＆＃34; 9.570435019＆＃34;，＆＃34; 9.867635305＆＃34;），class =＆＃34; factor＆＃34;）），。Name = c（＆＃34;地区＃34 ;, ＆＃34; Country＆＃34;，＆＃34; Date＆＃34;，＆＃34; OS＆＃34;，＆＃34; SMD＆＃34;），class =＆＃34; data.frame＆＃34;， row.names = c（NA， -40L））

Answer 1

您确定需要使用plm ??这将按国家/地区生成摘要列表。

# convert factors to numeric
mydata$SMD <- as.numeric(mydata$SMD)
mydata$OS  <- as.numeric(mydata$OS)

# Using lapply(...)
smry <- lapply(unique(mydata$Country),
               function(cntry)
                 summary(lm(SMD~OS,data=mydata[mydata$Country==cntry,])))
# Same thing, using for loop
smry <- list()
for (cntry in unique(mydata$Country)) {
  smry <- list(smry, 
               summary(lm(SMD~OS,data=mydata[mydata$Country==cntry,])))
}

在您的数据集中，SMD和OS是因素，需要首先转换为数字。

plm中的循环子集

1 个答案: