如何在多个文件分析中附加多个输出在R中使用for循环

时间:2017-10-10 21:08:04

标签: r

我正在伸出手去看看是否有人可以将我引导到我可以微调我的R代码的资源。我正在使用R对许多具有相同变量的文件进行分析。最后,我想附加所有输出表 - 我有12个表 - 并且只有一个输出表。关于如何成功的任何想法。 回顾一下。如果使用for循环有多个输出,则如何附加文件以生成唯一文件。 请参阅以下代码:

setwd("C:/Data/Key/Spring2017/era");

subjects=c("ALGEBRA1","BIOLOGY","LITERATURE");

 modules=c("module1","module2");

  `modes=c("O","P");`

#loopcount<-1;



 for(i in 1:length(subjects)){

    for(k in 1:length(modes)){

       for(m in 1:length(modules)){

    #i=1;
    #k=1;
    #m=1

                    subj <- subjects[i];
                    mode <- modes[k];
                    module <- modules[m];

         cat("Subj:",subj,",mode: ",mode,",module: ",module,"\n");

        ###Reading the analyses output;

        outfile<-
 paste("output/erasure_Mean_",mode,"_",module,"_",subj,".csv",sep="");


    ###Reading in the datafiles;

     infile<-paste("data/erasure_",mode,"_",module,"_",subj,".csv",sep="");


    dat<-read.csv(infile,header=TRUE,as.is=T);


    newid<-paste(dat[,"dis"],dat[,"sch"]);

    dat<-data.frame(dat,newid);

    head(dat);


    count<-function(dat){
                      length(na.omit(dat))
                       }

  ##computing students count per school for Algebra1;

   temp<-aggregate(dat[,"subj"],by=list(dat[,"newid"]),FUN="count")
   colnames(temp)=c("newid","N")

   #removing duplicate id;
#y<-dat[!duplicated(dat[,"newid"]),c("newid","dis","disname","schname","drcid","subj","mode","module")]
y<-dat[!duplicated(dat[,"newid"]),c(1,2,3,4,7:9,15)]

head(y)

MinN<-10;
out<-merge(y,temp,by="newid")

head(out)


#count of the students with FivePlus above;
temp<-aggregate(dat[,"FivePlus"],list(dat[,"newid"]),FUN=sum)
colnames(temp)<-c("newid","FivePlusN");
out<-merge(out,temp,by="newid")
temp<-aggregate(dat[,"FivePlus"],list(dat[,"newid"]),FUN=mean);
colnames(temp)<-c("newid","FivePlusPer");
temp[,2]=temp[,2]*100
out<-merge(out,temp,by="newid");

#state erasure mean
gmean<-mean(dat[,"tot_wr"])
gsd<-sd(dat[,"tot_wr"])
gn<-nrow(dat)
varused<-c("tot_wr");
x<-dat[!duplicated(dat[,"subj"]),c(7:9)]
pre.x<-data.frame(x,varused,gmean,gsd,gn)

##Statistics for wr;
#mean of wr by school

temp<-aggregate(dat[,"tot_wr"],list(dat[,"newid"]),FUN=mean)
colnames(temp)<-c("newid",paste("tot_wr","mean",sep="."))
head(temp)

#mean of wr per test
pertest<-temp[,2]
pertest<-as.matrix(pertest,ncol=1)
colnames(pertest)<-c(paste("tot_wr","pertest",sep="."))
temp<-data.frame(temp,pertest)
out<-merge(out,temp,by="newid")

#variance of WR
temp<-aggregate(dat[,"tot_wr"], list(dat[,"newid"]),sd)

#The standard deviation used is across item types by school
colnames(temp)<-c("newid",paste("tot_wr", "sd",sep="."))
out<-merge(out,temp,by="newid")

#z score of wr
Z<-(out[,"tot_wr.pertest"]-gmean)/(out[,"tot_wr.sd"]/sqrt(out[,"N"]))
out<-data.frame(out,Z)

##ncol(out) determines the column to rename using paste function.
colnames(out)[ncol(out)]<-paste("tot_wr","Z",sep=".")

##p value of wr
tdf<-out[,"N"]-1

##lower.tail logical if True, prob are P[X<=x],otherwise P[X>x]
##log.p if true, probabilities p are given as log(p)
pval<-pt(Z,tdf,lower.tail=F,log.p=FALSE)
out<-data.frame(out,pval)
colnames(out)[ncol(out)]<-paste("tot_wr","pval",sep=".")

#threat for wr
threat<-matrix(0,ncol=1,nrow=nrow(out))
prethreat<-as.matrix(round(abs(1.086*log(pval/(1-pval))),digits=4),ncol=1, nrow=nrow(out))


for(threatloop in 1:nrow(out)){
    if (out[threatloop, paste("tot_wr","pval",sep=".")]< 0.5 &
    is.na(out[threatloop,paste("tot_wr","pval",sep=".")])==F){
    threat[threatloop,]<-prethreat[threatloop,]
    }
     }

 threat <- as.matrix(threat,ncol=1)
 if(length(threat[which(threat[,1]> 49.9),1])>0){
    threat[which(threat[,1]> 49.9),1]<-49.9
   }

colnames(threat)<-paste("tot_wr","threat",sep=".")
out<-cbind(out,threat)

if (length(which(out[,paste("tot_wr","threat",sep=".")] > 9.9
          & out[,paste("tot_wr","threat",sep=".")] < 10))> 0){
          out[which(out[,paste("tot_wr","threat",sep=".")]< 10),paste("tot_wr","threat",sep=".")] < -9.9
    }


 if (length(which(out[,"N"]< MinN))> 0){
   out[which(out[,"N"]< MinN),paste("tot_wr","threat",sep=".")]<- NA

    }

if(length(which(out[,paste("tot_wr","Z",sep=".")]=="-Inf")) > 0){
    out[which(out[,paste("tot_wr","Z",sep=".")]=="-Inf"),paste("tot_wr","Z",sep=".")]<- -999999
}

if (length(which(out[,paste("tot_wr","Z",sep=".")]=="Inf")) > 0){
   out[which(out[,paste("tot_wr","Z",sep=".")]=="Inf"),paste("tot_wr","Z",sep=".")] <- 999999
}

eras<-rbind(pre.x,pre.x1,pre.x2,pre.x3)

write.table(out,outfile,quote=F,append=F,row.names=F,col.name=T,na= "",sep=",")



       }

    }

 }

1 个答案:

答案 0 :(得分:0)

考虑使用函数概括您的流程,并通过所有可能组合的for数据框,通过单次迭代替换嵌套的expand.grid循环。然后使用mapply或其包装器Map将参数传递给广义函数。

# DATAFRAME OF ALL POSSIBLE COMBINATIONS OF NESTED for LOOP
loopdf <- expand.grid(mode = c("O","P"), 
                      module = c("module1","module2"), 
                      subj = c("ALGEBRA1","BIOLOGY","LITERATURE"))

# USER-DEFINED FUNCTION nearly same code but two changes at beginning and end:
#   1. Remove assignment of mode, module, subj since they are passed in as parameters
#   2. Replace write.table with a return() since you will output file outside of function
table_process <- function(mode, module, subj){     

  cat("Subj:",subj,",mode: ",mode,",module: ",module,"\n")

  infile<-paste("data/erasure_",mode,"_",module,"_",subj,".csv",sep="")
  # ... EXACT SAME CODE EXCEPT LAST LINE

  return(out)
}

# LIST OF DATAFRAMES
dfList <- Map(table_process, loopdf$mode, loopdf$module, loopdf$subj)
# EQUIVALENTLY
# dfList <- mapply(table_process, loopdf$mode, loopdf$module, loopdf$subj, SIMPLIFY = FALSE)

# ROW BIND ALL DF ELEMENTS INTO ONE DATAFRAME (ASSUMED SAME COLUMN LENGTH AND NAMES)
finaldf <- do.call(rbind, dfList)

# OUTPUT SINGLE FILE
write.table(finaldf, "final.csv", quote=F, append=F, row.names=F, col.name=T, na= "", sep=",")    
# EQUIVALENTLY WITHOUT sep ARG
# write.csv(finaldf, "final.csv", quote=F, append=F, row.names=F, col.name=T, na= "")