C ++和Openmp的关键性太慢了

时间:2017-11-21 00:13:15

标签: openmp rcpp

我尝试使用Rcpp和Openmp来加速我的代码。这是我的cpp代码。我想知道为什么。通过openmp加速此代码的最佳方法是什么。

 // #include <Rcpp.h>
 #include <vector>
 #include <string.h>
 #include <RcppArmadillo.h>
 #include "omp.h"

 using namespace Rcpp;
 using namespace std;

 // Function subset("[.data.frame");
 // [[Rcpp::plugins(openmp) ]]
 // [[Rcpp::depends(RcppArmadillo)]] 
 // [[Rcpp::export]]
 DataFrame reformdata(DataFrame rawfile, DataFrame genefile){
     vector<string> rawchr = rawfile["chr"];
     NumericVector rawpos = rawfile["start"];
     vector<string> genechr = genefile["X.1"];
     NumericVector genestart = genefile["TSS.start"];
     NumericVector geneend = genefile["TSS.end"];

     vector<string> geneID = genefile["X"];
     NumericVector rawnumCs = rawfile["numCs"];
     NumericVector rawnumTs = rawfile["numTs"];
     NumericVector rawmethyl = rawfile["methyl"];


     int n_raw = rawchr.size();
     int n_gene = genechr.size();
     int i = 0,j = 0;
     vector<string> outputgeneID;
     vector<string> outputchr;
     NumericVector outputstart;
     NumericVector outputend;
     NumericVector outputmethyl;
     NumericVector outputnumCs;
     NumericVector outputnumTs;

     #pragma omp parallel for num_threads(8)
     for(i = 0; i < n_gene; i++){
         string loc_gene_name = genechr[i];
         int gene_start = genestart[i];
         int gene_end = geneend[i];

         for(j = 0;j < n_raw; j++){
             string raw_name = rawchr[j];
             int raw_pos = rawpos[j];
             if(raw_name.compare(loc_gene_name)==0&&raw_pos >= gene_start&&raw_pos <= gene_end){

                 #pragma omp critical
                 {
                     outputgeneID.push_back(geneID[i]);
                     outputchr.push_back(rawchr[j]);
                     outputstart.push_back(rawpos[j]);
                     outputend.push_back(rawpos[j]);
                     outputmethyl.push_back(rawmethyl[j]);
                     outputnumCs.push_back(rawnumCs[j]);
                     outputnumTs.push_back(rawnumTs[j]);
                 }
             }
        }
   }
   return   DataFrame::create(Named("geneID")=outputgeneID,Named("chr")=outputchr,
                       Named("start")=outputstart,Named("end")=outputend,
                        Named("methyl")=outputmethyl,
                         Named("numCs")=outputnumCs,Named("numTs")=outputnumTs);

}

我只想在R中输入两个Dataframe,然后在这两个数据帧之间进行匹配。也许push_back就是问题所在。有没有一种简单的方法可以避免它?我正在处理大数据,速度很重要。

0 个答案:

没有答案