我正在伸出手去看看是否有人可以将我引导到我可以微调我的R代码的资源。我正在使用R对许多具有相同变量的文件进行分析。最后,我想附加所有输出表 - 我有12个表 - 并且只有一个输出表。关于如何成功的任何想法。 回顾一下。如果使用for循环有多个输出,则如何附加文件以生成唯一文件。 请参阅以下代码:
setwd("C:/Data/Key/Spring2017/era");
subjects=c("ALGEBRA1","BIOLOGY","LITERATURE");
modules=c("module1","module2");
`modes=c("O","P");`
#loopcount<-1;
for(i in 1:length(subjects)){
for(k in 1:length(modes)){
for(m in 1:length(modules)){
#i=1;
#k=1;
#m=1
subj <- subjects[i];
mode <- modes[k];
module <- modules[m];
cat("Subj:",subj,",mode: ",mode,",module: ",module,"\n");
###Reading the analyses output;
outfile<-
paste("output/erasure_Mean_",mode,"_",module,"_",subj,".csv",sep="");
###Reading in the datafiles;
infile<-paste("data/erasure_",mode,"_",module,"_",subj,".csv",sep="");
dat<-read.csv(infile,header=TRUE,as.is=T);
newid<-paste(dat[,"dis"],dat[,"sch"]);
dat<-data.frame(dat,newid);
head(dat);
count<-function(dat){
length(na.omit(dat))
}
##computing students count per school for Algebra1;
temp<-aggregate(dat[,"subj"],by=list(dat[,"newid"]),FUN="count")
colnames(temp)=c("newid","N")
#removing duplicate id;
#y<-dat[!duplicated(dat[,"newid"]),c("newid","dis","disname","schname","drcid","subj","mode","module")]
y<-dat[!duplicated(dat[,"newid"]),c(1,2,3,4,7:9,15)]
head(y)
MinN<-10;
out<-merge(y,temp,by="newid")
head(out)
#count of the students with FivePlus above;
temp<-aggregate(dat[,"FivePlus"],list(dat[,"newid"]),FUN=sum)
colnames(temp)<-c("newid","FivePlusN");
out<-merge(out,temp,by="newid")
temp<-aggregate(dat[,"FivePlus"],list(dat[,"newid"]),FUN=mean);
colnames(temp)<-c("newid","FivePlusPer");
temp[,2]=temp[,2]*100
out<-merge(out,temp,by="newid");
#state erasure mean
gmean<-mean(dat[,"tot_wr"])
gsd<-sd(dat[,"tot_wr"])
gn<-nrow(dat)
varused<-c("tot_wr");
x<-dat[!duplicated(dat[,"subj"]),c(7:9)]
pre.x<-data.frame(x,varused,gmean,gsd,gn)
##Statistics for wr;
#mean of wr by school
temp<-aggregate(dat[,"tot_wr"],list(dat[,"newid"]),FUN=mean)
colnames(temp)<-c("newid",paste("tot_wr","mean",sep="."))
head(temp)
#mean of wr per test
pertest<-temp[,2]
pertest<-as.matrix(pertest,ncol=1)
colnames(pertest)<-c(paste("tot_wr","pertest",sep="."))
temp<-data.frame(temp,pertest)
out<-merge(out,temp,by="newid")
#variance of WR
temp<-aggregate(dat[,"tot_wr"], list(dat[,"newid"]),sd)
#The standard deviation used is across item types by school
colnames(temp)<-c("newid",paste("tot_wr", "sd",sep="."))
out<-merge(out,temp,by="newid")
#z score of wr
Z<-(out[,"tot_wr.pertest"]-gmean)/(out[,"tot_wr.sd"]/sqrt(out[,"N"]))
out<-data.frame(out,Z)
##ncol(out) determines the column to rename using paste function.
colnames(out)[ncol(out)]<-paste("tot_wr","Z",sep=".")
##p value of wr
tdf<-out[,"N"]-1
##lower.tail logical if True, prob are P[X<=x],otherwise P[X>x]
##log.p if true, probabilities p are given as log(p)
pval<-pt(Z,tdf,lower.tail=F,log.p=FALSE)
out<-data.frame(out,pval)
colnames(out)[ncol(out)]<-paste("tot_wr","pval",sep=".")
#threat for wr
threat<-matrix(0,ncol=1,nrow=nrow(out))
prethreat<-as.matrix(round(abs(1.086*log(pval/(1-pval))),digits=4),ncol=1, nrow=nrow(out))
for(threatloop in 1:nrow(out)){
if (out[threatloop, paste("tot_wr","pval",sep=".")]< 0.5 &
is.na(out[threatloop,paste("tot_wr","pval",sep=".")])==F){
threat[threatloop,]<-prethreat[threatloop,]
}
}
threat <- as.matrix(threat,ncol=1)
if(length(threat[which(threat[,1]> 49.9),1])>0){
threat[which(threat[,1]> 49.9),1]<-49.9
}
colnames(threat)<-paste("tot_wr","threat",sep=".")
out<-cbind(out,threat)
if (length(which(out[,paste("tot_wr","threat",sep=".")] > 9.9
& out[,paste("tot_wr","threat",sep=".")] < 10))> 0){
out[which(out[,paste("tot_wr","threat",sep=".")]< 10),paste("tot_wr","threat",sep=".")] < -9.9
}
if (length(which(out[,"N"]< MinN))> 0){
out[which(out[,"N"]< MinN),paste("tot_wr","threat",sep=".")]<- NA
}
if(length(which(out[,paste("tot_wr","Z",sep=".")]=="-Inf")) > 0){
out[which(out[,paste("tot_wr","Z",sep=".")]=="-Inf"),paste("tot_wr","Z",sep=".")]<- -999999
}
if (length(which(out[,paste("tot_wr","Z",sep=".")]=="Inf")) > 0){
out[which(out[,paste("tot_wr","Z",sep=".")]=="Inf"),paste("tot_wr","Z",sep=".")] <- 999999
}
eras<-rbind(pre.x,pre.x1,pre.x2,pre.x3)
write.table(out,outfile,quote=F,append=F,row.names=F,col.name=T,na= "",sep=",")
}
}
}
答案 0 :(得分:0)
考虑使用函数概括您的流程,并通过所有可能组合的for
数据框,通过单次迭代替换嵌套的expand.grid
循环。然后使用mapply
或其包装器Map
将参数传递给广义函数。
# DATAFRAME OF ALL POSSIBLE COMBINATIONS OF NESTED for LOOP
loopdf <- expand.grid(mode = c("O","P"),
module = c("module1","module2"),
subj = c("ALGEBRA1","BIOLOGY","LITERATURE"))
# USER-DEFINED FUNCTION nearly same code but two changes at beginning and end:
# 1. Remove assignment of mode, module, subj since they are passed in as parameters
# 2. Replace write.table with a return() since you will output file outside of function
table_process <- function(mode, module, subj){
cat("Subj:",subj,",mode: ",mode,",module: ",module,"\n")
infile<-paste("data/erasure_",mode,"_",module,"_",subj,".csv",sep="")
# ... EXACT SAME CODE EXCEPT LAST LINE
return(out)
}
# LIST OF DATAFRAMES
dfList <- Map(table_process, loopdf$mode, loopdf$module, loopdf$subj)
# EQUIVALENTLY
# dfList <- mapply(table_process, loopdf$mode, loopdf$module, loopdf$subj, SIMPLIFY = FALSE)
# ROW BIND ALL DF ELEMENTS INTO ONE DATAFRAME (ASSUMED SAME COLUMN LENGTH AND NAMES)
finaldf <- do.call(rbind, dfList)
# OUTPUT SINGLE FILE
write.table(finaldf, "final.csv", quote=F, append=F, row.names=F, col.name=T, na= "", sep=",")
# EQUIVALENTLY WITHOUT sep ARG
# write.csv(finaldf, "final.csv", quote=F, append=F, row.names=F, col.name=T, na= "")