在R中循环回归

时间:2012-01-23 11:52:33

标签: r loops regression

将此代码重写为循环的最佳方法是什么?

a.data1 <- read.csv('outdata1.csv')
growth.sub.QOG1 <- merge(QOG, a.data1, by = c('year', 'country'), all = F)
growth.re1 <- plm(NY.GDP.PCAP.KD.ZG ~ log(Enrolment.in.all.programmes..Tertiary..Total)    + law + engineering + log(SP.POP.TOTL) + lp.legor
 ,data=growth.sub.QOG1, model="random")
summary(growth.re1)
eststo(growth.re1)


a.data2 <- read.csv('outdata2.csv')
growth.sub.QOG2 <- merge(QOG, a.data2, by = c('year', 'country'), all = F)
growth.re2 <- plm(NY.GDP.PCAP.KD.ZG ~ log(Enrolment.in.all.programmes..Tertiary..Total) + law + 
                  engineering + log(SP.POP.TOTL) + lp.legor
                    ,data=growth.sub.QOG2, model="random")
summary(growth.re2)
eststo(growth.re2)

a.data3 <- read.csv('outdata3.csv')
growth.sub.QOG3 <- merge(QOG, a.data3, by = c('year', 'country'), all = F)
growth.re3 <- plm(NY.GDP.PCAP.KD.ZG ~ log(Enrolment.in.all.programmes..Tertiary..Total) + law + 
                  engineering + log(SP.POP.TOTL) + lp.legor
                    ,data=growth.sub.QOG3, model="random")
summary(growth.re3)
eststo(growth.re3)

我试着这样做:

for (i  in 1:10) {
a.data[i] <- read.csv('outdata[i].csv')
growth.sub.QOG[i] <- merge(QOG, a.data[i], by = c('year', 'country'), all = F)
growth.re[i] <- plm(NY.GDP.PCAP.KD.ZG ~ log(Enrolment.in.all.programmes..Tertiary..Total) + law + 
                  engineering + log(SP.POP.TOTL) + lp.legor
                    ,data=growth.sub.QOG[i], model="random")
summary(growth.re[i])
eststo(growth.re[i])
}

但它不起作用,我做错了什么?

3 个答案:

答案 0 :(得分:1)

一些样本数据本来不错,但是我自发地看到了你无法在文件中读取的错误。尝试:

  file.name <- paste('outdata', i, '.csv', sep='')
  variable <- paste('a.data', i, sep='')
  data.in <- read.csv(file.name)

如果要将其存储在动态创建的变量中,则其工作方式如下:

  assign(variable, data.in)

这应该修复第一部分!

答案 1 :(得分:0)

我认为这有效

#instance of your directory
datadir  <-"D:/Regression"
# set working directory, i.e. R knows where to get the data files 
setwd(datadir)

csvfiles <- list.files(datadir,".csv$")

#read data from datadir
for(x in csvfiles)
{
  assign(gsub(" ","",sub(".csv","",x)),read.csv(x,header=TRUE,stringsAsFactors=F,sep=";"))
}

data<-c("outdata1,outdata2,outdata3,...")

i<-1
for(x in data)
{
  tmp <- eval(parse(text=x))
  growth.sub.QOG[i]<- merge(QOG,tmp, by = c('year', 'country'), all = F)
  growth.re[i] <- plm(NY.GDP.PCAP.KD.ZG ~ log(Enrolment.in.all.programmes..Tertiary..Total)
                    + law + engineering + log(SP.POP.TOTL) + lp.legor,
                    data=tmp, model="random") 
  Summary[i]<-summary(growth.re[i]) 
  Est[i]<-eststo(growth.re[i]) 
  rm(tmp)
  i<-i+1
}

祝你好运,如果你遇到一些错误,请告诉我......

答案 2 :(得分:0)

构建您的文件名。

files <- paste("outdata", 1:3, ".csv", sep = "")
#alternatively, use list.files/dir as suggested by Chris

如何构建代码的其余部分取决于您是否关心这些中间变量。我假设你这样做,所以你有很多独立的循环。如果你不在乎,请合并lapply语句。

读入数据。

all_data <- lapply(file, read.csv)

合并。

merged <- lapply(all_data, function(data) 
{
  merge(QOG, data, by = c('year', 'country'), all = FALSE)
})

模型。

models <- lapply(merged, function(data)
{
  plm(
    NY.GDP.PCAP.KD.ZG ~ log(Enrolment.in.all.programmes..Tertiary..Total) + law + engineering + log(SP.POP.TOTL) + lp.legor,
    data, 
    model = "random"
  )
})

显示一些输出。

(summaries <- lapply(models, summary))
(eststos <- lapply(models, eststo))