如何使用并行计算或python重写此代码

时间:2014-03-31 07:47:29

标签: python r parallel-processing

文件 condition.txt gene.txt 都包含8000000行,但每行的列大小不同。使用以下代码进行的计算运行了两周,但仍未完成。如何使用r或python中的并行计算重写以下代码。问题的介绍请参见 R code runs too slow,how to rewrite this code谢谢。

library(compiler)
library(Matrix)
enableJIT(3)
i=0;

con <- file("condition.txt", "r")
con2<-file("gene.txt","r")
x1<-readLines(con,n=-1)
x2<-readLines(con2,n=-1)


str2mat <- function(s) {
  n <- length(s)
  ni <- sapply(s, length)
  s <- unlist(s)
  u <- unique(s)
  spMatrix(nrow=n, ncol=length(u), i=rep(1L:n, ni), j=match(s, u), x=rep(1, length(s)))
}


m1 <- str2mat(strsplit(x1, "|", fixed=TRUE))
m2 <- str2mat(strsplit(x2, "|", fixed=TRUE))
n1 <- rowSums(m1)
n2 <- rowSums(m2)
num <- tcrossprod(m1)*tcrossprod(m2)
n12 <- n1*n2
den <- outer(n12, n12, pmin)
use <- num/den > 0.6
diag(use) <- FALSE
use[lower.tri(use)] <- FALSE
out <- which(use, arr.ind=TRUE)

使用较小的输入文件(20行)进行 Rprof 分析的结果如下:

$by.self
                            self.time self.pct total.time total.pct
"options"                        0.10    17.86       0.10     17.86
"findCenvVar"                    0.08    14.29       0.14     25.00
"unique"                         0.06    10.71       0.12     21.43
"unlist"                         0.06    10.71       0.12     21.43
"$"                              0.06    10.71       0.06     10.71
"<Anonymous>"                    0.02     3.57       0.52     92.86
"tryCatchOne"                    0.02     3.57       0.46     82.14
"FUN"                            0.02     3.57       0.12     21.43
"findVar"                        0.02     3.57       0.04      7.14
"%in%"                           0.02     3.57       0.02      3.57
".simpleInheritanceGeneric"      0.02     3.57       0.02      3.57
"as.list"                        0.02     3.57       0.02      3.57
"get"                            0.02     3.57       0.02      3.57
"getClassDef"                    0.02     3.57       0.02      3.57
"parent.env"                     0.02     3.57       0.02      3.57

$by.total
                            total.time total.pct self.time self.pct
"<Anonymous>"                     0.52     92.86      0.02     3.57
"test"                            0.50     89.29      0.00     0.00
"tryCatchOne"                     0.46     82.14      0.02     3.57
"cmpfun"                          0.46     82.14      0.00     0.00
"doTryCatch"                      0.46     82.14      0.00     0.00
"tryCatch"                        0.46     82.14      0.00     0.00
"tryCatchList"                    0.46     82.14      0.00     0.00
"standardGeneric"                 0.42     75.00      0.00     0.00
"cmp"                             0.32     57.14      0.00     0.00
"cmpCall"                         0.32     57.14      0.00     0.00
"genCode"                         0.32     57.14      0.00     0.00
"h"                               0.28     50.00      0.00     0.00
"tryInline"                       0.28     50.00      0.00     0.00
"initialize"                      0.26     46.43      0.00     0.00
"new"                             0.26     46.43      0.00     0.00
"spMatrix"                        0.24     42.86      0.00     0.00
"str2mat"                         0.24     42.86      0.00     0.00
"cmpCallArgs"                     0.16     28.57      0.00     0.00
"cmpCallSymFun"                   0.16     28.57      0.00     0.00
"findCenvVar"                     0.14     25.00      0.08    14.29
"findLocalsList"                  0.14     25.00      0.00     0.00
"funEnv"                          0.14     25.00      0.00     0.00
"make.functionContext"            0.14     25.00      0.00     0.00
"unique"                          0.12     21.43      0.06    10.71
"unlist"                          0.12     21.43      0.06    10.71
"FUN"                             0.12     21.43      0.02     3.57
"/"                               0.12     21.43      0.00     0.00
"as"                              0.12     21.43      0.00     0.00
"callGeneric"                     0.12     21.43      0.00     0.00
"checkCall"                       0.12     21.43      0.00     0.00
"eval"                            0.12     21.43      0.00     0.00
"findLocalsList1"                 0.12     21.43      0.00     0.00
"options"                         0.10     17.86      0.10    17.86
"cmpForBody"                      0.10     17.86      0.00     0.00
"lapply"                          0.10     17.86      0.00     0.00
".findInheritedMethods"           0.08     14.29      0.00     0.00
"asMethod"                        0.08     14.29      0.00     0.00
"isSymmetric"                     0.08     14.29      0.00     0.00
"$"                               0.06     10.71      0.06    10.71
"cmpBuiltinArgs"                  0.06     10.71      0.00     0.00
"cmpSym"                          0.06     10.71      0.00     0.00
"getInlineInfo"                   0.06     10.71      0.00     0.00
"findVar"                         0.04      7.14      0.02     3.57
".asCoerceMethod"                 0.04      7.14      0.00     0.00
".local"                          0.04      7.14      0.00     0.00
"all.equal"                       0.04      7.14      0.00     0.00
"anyStrings"                      0.04      7.14      0.00     0.00
"cmpPrim1"                        0.04      7.14      0.00     0.00
"findFunDef"                      0.04      7.14      0.00     0.00
"forceSymmetric"                  0.04      7.14      0.00     0.00
"isTRUE"                          0.04      7.14      0.00     0.00
"validityMethod"                  0.04      7.14      0.00     0.00
"validObject"                     0.04      7.14      0.00     0.00
"%in%"                            0.02      3.57      0.02     3.57
".simpleInheritanceGeneric"       0.02      3.57      0.02     3.57
"as.list"                         0.02      3.57      0.02     3.57
"get"                             0.02      3.57      0.02     3.57
"getClassDef"                     0.02      3.57      0.02     3.57
"parent.env"                      0.02      3.57      0.02     3.57
"*"                               0.02      3.57      0.00     0.00
".a.e.comb"                       0.02      3.57      0.00     0.00
".eligibleSuperClasses"           0.02      3.57      0.00     0.00
">"                               0.02      3.57      0.00     0.00
"all.equal_num"                   0.02      3.57      0.00     0.00
"as.vector"                       0.02      3.57      0.00     0.00
"checkSkipLoopCntxt"              0.02      3.57      0.00     0.00
"checkSkipLoopCntxtList"          0.02      3.57      0.00     0.00
"cmpBuiltin"                      0.02      3.57      0.00     0.00
"cmpCallExprFun"                  0.02      3.57      0.00     0.00
"cmpComplexAssign"                0.02      3.57      0.00     0.00
"cmpPrim2"                        0.02      3.57      0.00     0.00
"cmpSetterCall"                   0.02      3.57      0.00     0.00
"cmpSetterDispatch"               0.02      3.57      0.00     0.00
"cmpSymbolAssign"                 0.02      3.57      0.00     0.00
"extends"                         0.02      3.57      0.00     0.00
"Filter"                          0.02      3.57      0.00     0.00
"findLocVar"                      0.02      3.57      0.00     0.00
"is"                              0.02      3.57      0.00     0.00
"isBaseVar"                       0.02      3.57      0.00     0.00
"isLoopTopFun"                    0.02      3.57      0.00     0.00
"match.fun"                       0.02      3.57      0.00     0.00
"Matrix"                          0.02      3.57      0.00     0.00
"outer"                           0.02      3.57      0.00     0.00
"rowSums"                         0.02      3.57      0.00     0.00
"sapply"                          0.02      3.57      0.00     0.00
"try"                             0.02      3.57      0.00     0.00
"trySetterInline"                 0.02      3.57      0.00     0.00
"which"                           0.02      3.57      0.00     0.00

$sample.interval
[1] 0.02

$sampling.time
[1] 0.56

0 个答案:

没有答案