使嵌套for循环在R中运行得更快

时间:2014-01-08 21:12:28

标签: r loops for-loop match

我在R中有以下代码(嵌套for循环),这是非常慢的。循环匹配两列的值。然后选取相应的文件并遍历文件以查找匹配项。然后它从文件中获取该行。迭代次数可能超过100,000次。如果有人可以提供有关如何加快流程的见解。

for(i in 1: length(Jaspar_ids_in_Network)) {
  m <- Jaspar_ids_in_Network[i]
  gene_ids <- as.character(GeneTFS$GeneIds[i])
  gene_names <- as.character(GeneTFS$Genes[i])

  print("i")
  print(i)

  for(j in 1: length(Jaspar_ids_in_Exp)) {
    l <- Jaspar_ids_in_Exp[j]
    print("j")
    print(j)

    if (m == l) {
      check <- as.matrix(read.csv(file=paste0(dirpath,listoffiles[j]),sep=",",header=FALSE))
      data_check <- data.frame(check)
      for(k in 1: nrow(data_check)) {
        gene_ids_JF <- as.character(data_check[k,3])
        genenames_JF <- as.character(data_check[k,4])

        if(gene_ids_JF == gene_ids) {
          GeneTFS$Source[i] <- as.character(data_check[k,3])
          data1 <- rbind(data1, cbind(as.character(data_check[k,3]),  
                                      as.character(data_check[k,8]), 
                                      as.character(data_check[k,9]),  
                                      as.character(data_check[k,6]), 
                                      as.character(data_check[k,7]),  
                                      as.character(data_check[k,5])))
        } else if (toupper(genenames_JF) == toupper(gene_names)) { 
          GeneTFS$Source[i] <- as.character(data_check[k,4])
          data1 <- rbind(data1, cbind(as.character(data_check[k,4]),
                                      as.character(data_check[k,5]), 
                                      as.character(data_check[k,6]), 
                                      as.character(data_check[k,7]),
                                      as.character(data_check[k,8]),
                                      as.character(data_check[k,2])))
        } else {
         # GeneTFS[i,4] <- "No Evidence"    
        }
      }
    } else {
      # GeneTFS[i,4] <- "Record Not Found"          
    }
  }  
}

1 个答案:

答案 0 :(得分:0)

如果你将处理一对的逻辑拉出到一个函数f(m,l)中,那么你可以用以下代码替换双循环:

outer(Jaspar_ids_in_Network, Jaspar_ids_in_Exp, Vectorize(f))