我在R中运行它for循环它会给我一个很大的矩阵输出,它不会保留到我的系统内存中。基本上我的数据是文本,并找到像R1这样的每一行之间的距离必须找到R2,R3的距离,所以我试图循环它所有的观察。因此,我计划从循环中获取并行输出,并为每个循环从R中脱机保存这些结果。我是R的初学者,我非常感谢你对此的帮助。
Obs No. - 300726 变量 - 4 结果观察 - 90Crores
library(miscTools);library(plyr);library(reshape);library(gtools);library(matlab);
library(stringdist);library(SimilarityMeasures);library(gplots);library(manipulate);library(matrixcalc);
Customer=read.table("Consolidated_Rndm_Added.csv",header = TRUE, sep = ',', stringsAsFactors = FALSE)
Customer$Createdid=paste0("C",row.names(Customer))
base_data=subset(Customer,select=c(ADHAR_CARD,FIRST_NAME,LAST_NAME,STREET,Createdid))
names_temp=base_data$Createdid
Createdid_Actual=subset(Customer,select=c(Createdid,CUSTOMER_ID))
names_base_data=names(base_data)
names_base_data=names_base_data[names_base_data != "Createdid"]
FC_LCS_stringdist=data.frame()
ch=as.data.frame(names_temp)
rm(Customer)
for(k in 1:(length(names_base_data)))
{
selec_var=names_base_data[k]
present_base_data=subset(base_data,select=c(selec_var))
present_refer_data=subset(base_data,select=c(selec_var,"Createdid"))
present_refer_data$trnw=row.names(present_refer_data)
present_refer_data$trnw=as.numeric(present_refer_data$trnw)
present_refer_data$decile<-ceil(present_refer_data$trnw/100)
spli_ref=unique(present_refer_data$decile)
Consolidated_LCS_stringdist=data.frame()
present_base_data1=as.matrix(present_base_data)
for(tk in 1:length(spli_ref))
{
present_refer_data2=subset(present_refer_data, decile %in% tk)
present_refer_data2=subset(present_refer_data2,select=c(selec_var,"Createdid"))
names_temp_ref=present_refer_data2$Createdid
present_refer_data2=subset(present_refer_data2,select=c(selec_var))
present_refer_data2=as.data.frame(present_refer_data2)
present_refer_data1=as.matrix(present_refer_data2)
class(present_refer_data1)
LCS_stringdist=stringdistmatrix(present_base_data1,present_refer_data1,method="lcs")
LCS_stringdist=as.data.frame(LCS_stringdist)
names(LCS_stringdist)=names_temp_ref
LCS_stringdist=cbind(ch,LCS_stringdist)
LCS_stringdist=melt(LCS_stringdist,id=c("names_temp"))
LCS_stringdist$var_name=selec_var
LCS_stringdist=rename(LCS_stringdist, replace = c("names_temp" = "Referid","variable"="baseid"))
Consolidated_LCS_stringdist=smartbind(Consolidated_LCS_stringdist,LCS_stringdist)
rm(LCS_stringdist,present_refer_data2)
}
FC_LCS_stringdist=smartbind(FC_LCS_stringdist,Consolidated_LCS_stringdist)
rm(Consolidated_LCS_stringdist)
print(selec_var)
}
FC_LCS_stringdist =cast(FC_LCS_stringdist, baseid+Referid ~ var_name, sum, value = 'value',margins=T)
write.csv(FC_LCS_stringdist,"FC_LCS_stringdist.csv")