使用Rpy2执行r-script时,我一直遇到输出文件中缺少记录的问题,并且脚本具有将分析结果写入csv文件的功能。请参阅下面的源代码。
例如,我的源文件(f2)中有785条记录,经过分析后,输出文件中应该有785条文件(已处理)。但是,我在输出文件中获得的记录更少。当我直接在R中运行r脚本时没有这样的问题。有人知道什么是冷错吗?感谢。
def make_standardization(filename,outdata):
traindata = os.path.join(settings.MEDIA_ROOT, "default_predict_train.csv")
testdata = os.path.join(settings.MEDIA_ROOT, filename)
robjects.r.assign("f1",traindata)
robjects.r.assign("f2",testdata)
robjects.r.assign("processed", outdata)
robjects.r("""
train.dta=read.csv(f1,header=T)
test.dta=read.csv(f2,header=T)
train.h=train.dta[,c(1:2)]
train.x=train.dta[,-c(1:2)]
train.colnames=sub("PFGE.XbaI.","",colnames(train.x))
train.colnames=as.numeric(train.colnames)
test.h=test.dta[,c(1:2)]
test.x=test.dta[,-c(1:2)]
test.x=t(t(test.x))
temp=matrix(as.numeric(test.x),nrow=nrow(test.x),ncol=ncol(test.x))
colnames(temp)=colnames(test.x)
test.x=temp
test.x[is.na(test.x)]=0.5
test.colnames=sub("PFGE.XbaI.","",colnames(test.x))
test.colnames=as.numeric(test.colnames)
temp=matrix(0,nrow=nrow(test.x),ncol=ncol(train.x))
colnames(temp)=colnames(train.x)
rownames(temp)=rownames(test.x)
for(i in 1:ncol(train.x)){
if(i == 1) {
index=which( test.colnames > (train.colnames[i]+train.colnames[i+1])/2 )
if(length(index)==1){
temp[,i]=test.x[,index]
}else{
temp[,i]=1*(rowSums(test.x[,index])>0)
}
}
if(i == ncol(train.x)) {
index=which( test.colnames <= (train.colnames[i]+train.colnames[i-1])/2 )
if(length(index)==1){
temp[,i]=test.x[,index]
}else{
temp[,i]=1*(rowSums(test.x[,index])>0)
}
}
if(i >1 && i< ncol(train.x)){
index=which( test.colnames > (train.colnames[i]+train.colnames[i+1])/2 & test.colnames <= (train.colnames[i]+train.colnames[i-1])/2 )
if(length(index)==1){
temp[,i]=test.x[,index]
}else{
temp[,i]=1*(rowSums(test.x[,index])>0)
}
}
}
test.x=cbind(test.h,temp)
write.csv(test.x,processed)
""")