文件 condition.txt 和 gene.txt 都包含8000000行,但每行的列大小不同。使用以下代码进行的计算运行了两周,但仍未完成。如何使用r或python中的并行计算重写以下代码。问题的介绍请参见 R code runs too slow,how to rewrite this code。谢谢。
library(compiler)
library(Matrix)
enableJIT(3)
i=0;
con <- file("condition.txt", "r")
con2<-file("gene.txt","r")
x1<-readLines(con,n=-1)
x2<-readLines(con2,n=-1)
str2mat <- function(s) {
n <- length(s)
ni <- sapply(s, length)
s <- unlist(s)
u <- unique(s)
spMatrix(nrow=n, ncol=length(u), i=rep(1L:n, ni), j=match(s, u), x=rep(1, length(s)))
}
m1 <- str2mat(strsplit(x1, "|", fixed=TRUE))
m2 <- str2mat(strsplit(x2, "|", fixed=TRUE))
n1 <- rowSums(m1)
n2 <- rowSums(m2)
num <- tcrossprod(m1)*tcrossprod(m2)
n12 <- n1*n2
den <- outer(n12, n12, pmin)
use <- num/den > 0.6
diag(use) <- FALSE
use[lower.tri(use)] <- FALSE
out <- which(use, arr.ind=TRUE)
使用较小的输入文件(20行)进行 Rprof 分析的结果如下:
$by.self
self.time self.pct total.time total.pct
"options" 0.10 17.86 0.10 17.86
"findCenvVar" 0.08 14.29 0.14 25.00
"unique" 0.06 10.71 0.12 21.43
"unlist" 0.06 10.71 0.12 21.43
"$" 0.06 10.71 0.06 10.71
"<Anonymous>" 0.02 3.57 0.52 92.86
"tryCatchOne" 0.02 3.57 0.46 82.14
"FUN" 0.02 3.57 0.12 21.43
"findVar" 0.02 3.57 0.04 7.14
"%in%" 0.02 3.57 0.02 3.57
".simpleInheritanceGeneric" 0.02 3.57 0.02 3.57
"as.list" 0.02 3.57 0.02 3.57
"get" 0.02 3.57 0.02 3.57
"getClassDef" 0.02 3.57 0.02 3.57
"parent.env" 0.02 3.57 0.02 3.57
$by.total
total.time total.pct self.time self.pct
"<Anonymous>" 0.52 92.86 0.02 3.57
"test" 0.50 89.29 0.00 0.00
"tryCatchOne" 0.46 82.14 0.02 3.57
"cmpfun" 0.46 82.14 0.00 0.00
"doTryCatch" 0.46 82.14 0.00 0.00
"tryCatch" 0.46 82.14 0.00 0.00
"tryCatchList" 0.46 82.14 0.00 0.00
"standardGeneric" 0.42 75.00 0.00 0.00
"cmp" 0.32 57.14 0.00 0.00
"cmpCall" 0.32 57.14 0.00 0.00
"genCode" 0.32 57.14 0.00 0.00
"h" 0.28 50.00 0.00 0.00
"tryInline" 0.28 50.00 0.00 0.00
"initialize" 0.26 46.43 0.00 0.00
"new" 0.26 46.43 0.00 0.00
"spMatrix" 0.24 42.86 0.00 0.00
"str2mat" 0.24 42.86 0.00 0.00
"cmpCallArgs" 0.16 28.57 0.00 0.00
"cmpCallSymFun" 0.16 28.57 0.00 0.00
"findCenvVar" 0.14 25.00 0.08 14.29
"findLocalsList" 0.14 25.00 0.00 0.00
"funEnv" 0.14 25.00 0.00 0.00
"make.functionContext" 0.14 25.00 0.00 0.00
"unique" 0.12 21.43 0.06 10.71
"unlist" 0.12 21.43 0.06 10.71
"FUN" 0.12 21.43 0.02 3.57
"/" 0.12 21.43 0.00 0.00
"as" 0.12 21.43 0.00 0.00
"callGeneric" 0.12 21.43 0.00 0.00
"checkCall" 0.12 21.43 0.00 0.00
"eval" 0.12 21.43 0.00 0.00
"findLocalsList1" 0.12 21.43 0.00 0.00
"options" 0.10 17.86 0.10 17.86
"cmpForBody" 0.10 17.86 0.00 0.00
"lapply" 0.10 17.86 0.00 0.00
".findInheritedMethods" 0.08 14.29 0.00 0.00
"asMethod" 0.08 14.29 0.00 0.00
"isSymmetric" 0.08 14.29 0.00 0.00
"$" 0.06 10.71 0.06 10.71
"cmpBuiltinArgs" 0.06 10.71 0.00 0.00
"cmpSym" 0.06 10.71 0.00 0.00
"getInlineInfo" 0.06 10.71 0.00 0.00
"findVar" 0.04 7.14 0.02 3.57
".asCoerceMethod" 0.04 7.14 0.00 0.00
".local" 0.04 7.14 0.00 0.00
"all.equal" 0.04 7.14 0.00 0.00
"anyStrings" 0.04 7.14 0.00 0.00
"cmpPrim1" 0.04 7.14 0.00 0.00
"findFunDef" 0.04 7.14 0.00 0.00
"forceSymmetric" 0.04 7.14 0.00 0.00
"isTRUE" 0.04 7.14 0.00 0.00
"validityMethod" 0.04 7.14 0.00 0.00
"validObject" 0.04 7.14 0.00 0.00
"%in%" 0.02 3.57 0.02 3.57
".simpleInheritanceGeneric" 0.02 3.57 0.02 3.57
"as.list" 0.02 3.57 0.02 3.57
"get" 0.02 3.57 0.02 3.57
"getClassDef" 0.02 3.57 0.02 3.57
"parent.env" 0.02 3.57 0.02 3.57
"*" 0.02 3.57 0.00 0.00
".a.e.comb" 0.02 3.57 0.00 0.00
".eligibleSuperClasses" 0.02 3.57 0.00 0.00
">" 0.02 3.57 0.00 0.00
"all.equal_num" 0.02 3.57 0.00 0.00
"as.vector" 0.02 3.57 0.00 0.00
"checkSkipLoopCntxt" 0.02 3.57 0.00 0.00
"checkSkipLoopCntxtList" 0.02 3.57 0.00 0.00
"cmpBuiltin" 0.02 3.57 0.00 0.00
"cmpCallExprFun" 0.02 3.57 0.00 0.00
"cmpComplexAssign" 0.02 3.57 0.00 0.00
"cmpPrim2" 0.02 3.57 0.00 0.00
"cmpSetterCall" 0.02 3.57 0.00 0.00
"cmpSetterDispatch" 0.02 3.57 0.00 0.00
"cmpSymbolAssign" 0.02 3.57 0.00 0.00
"extends" 0.02 3.57 0.00 0.00
"Filter" 0.02 3.57 0.00 0.00
"findLocVar" 0.02 3.57 0.00 0.00
"is" 0.02 3.57 0.00 0.00
"isBaseVar" 0.02 3.57 0.00 0.00
"isLoopTopFun" 0.02 3.57 0.00 0.00
"match.fun" 0.02 3.57 0.00 0.00
"Matrix" 0.02 3.57 0.00 0.00
"outer" 0.02 3.57 0.00 0.00
"rowSums" 0.02 3.57 0.00 0.00
"sapply" 0.02 3.57 0.00 0.00
"try" 0.02 3.57 0.00 0.00
"trySetterInline" 0.02 3.57 0.00 0.00
"which" 0.02 3.57 0.00 0.00
$sample.interval
[1] 0.02
$sampling.time
[1] 0.56