多线程比使用c ++中的插件的nodejs中的串行执行花费更多的时间

时间:2016-12-23 00:30:00

标签: c++ node.js multithreading node.js-addon

我在nodejs中有一个使用c ++中的插件的代码。该代码是串行代码和并行代码中矩阵乘积的算法。

当我在节点中为大小为2000的矩阵执行代码时,并行不会提高速度。但是,当我执行代码时,只有在c ++中,才能提高速度。

代码本质上是相同的,这使我认为nodejs或者某些东西存在问题,比如nodejs用于处理线程的方式。

我正在使用ubuntu 16.04,nodejs v6.9.1,gcc 5.4.0和库pthread。

另一个重要的事情是,当我使用termux在我的智能手机中运行nodejs代码时,可以提高速度。

C ++中的串行代码

#define ROW_BUTTON_ACTION 9

c ++中的并行代码

//g++ serial.cc -o serial

#include <cstdlib>
#include <iostream>
#include <cmath>
#include <stdio.h>
using namespace std;

#define BILLION  1E9;

float **A;
float **B;
float **result;
int n;

void createMatrix();

int main(int argc, char *argv[]){

  n = atoi(argv[1]);
  createMatrix();

  result = (float**) malloc(n *sizeof(float*));
  for (unsigned int i = 0; i < n; i++) {
    result[i] = (float*) malloc(n *sizeof(float));
  }
  std::cout << "calculating product..." << std::endl;
  struct timespec requestStart, requestEnd;
  //start execution time
  clock_gettime(CLOCK_REALTIME, &requestStart);

  for (unsigned int i = 0; i < n ; ++i){
    for (unsigned int j = 0; j < n ; ++j){
      result[i][j] = 0;
      for (unsigned int k = 0; k < n ; k++) {
        result[i][j]  += (A[i][k] * B[k][j]);
      }
    }
  }

  //end execution time
  clock_gettime(CLOCK_REALTIME, &requestEnd);
  double accum = ( requestEnd.tv_sec - requestStart.tv_sec )
      + ( requestEnd.tv_nsec - requestStart.tv_nsec )
      / BILLION;
  printf( "Serial Time taken: %lf\n", accum );

  return 0;
}

void createMatrix(){

  std::cout << "creating matrix A ..." << std::endl;
  A = (float**) malloc(n * sizeof(float*));
  for (int i = 0; i < n; i++) {
    A[i] = (float*) malloc(n * sizeof(float));
    for (int j = 0; j < n; j++) {
      A[i][j] = rand() % 10;
    }
  }

  std::cout << "creating matrix B ..." << std::endl;
  B = (float**) malloc(n * sizeof(float*));
  for (int i = 0; i < n; i++) {
    B[i] = (float*) malloc(n * sizeof(float));
    for (int j = 0; j < n; j++) {
      B[i][j] = rand() % 10;
    }
  }
}

Nodejs中的插件代码

//g++ -std=c++11 parallel.cc -o parallel -pthread

#include <cstdlib>
#include <iostream>
#include <cmath>
#include <stdio.h>
#include <pthread.h>
#include <thread>
using namespace std;

#define BILLION  1E9;

float **A;
float **B;
float **result;
int n;
int task_per_thread;

void createMatrix();

void *runner(void *pid) {

    int slice = (long) pid;
    int to = task_per_thread*slice;
    int from = (task_per_thread*(slice+2)<n) ? task_per_thread*(slice+1) : n;
    for (unsigned int i = to; i < from ; ++i){
    for (unsigned int j = 0; j < n ; ++j){
            result[i][j] = 0;
      for (unsigned int k = 0; k < n ; k++) {
        result[i][j]  += (A[i][k] * B[k][j]);
      }
    }
  }
  pthread_exit(NULL);
}

int main(int argc, char *argv[]){

  n = atoi(argv[1]);
  createMatrix();

  result = (float**) malloc(n *sizeof(float*));
  for (unsigned int i = 0; i < n; i++) {
    result[i] = (float*) malloc(n *sizeof(float));
  }
  unsigned cpus = 4;
    //std::cout << "cpus: "<< cpus << std::endl;
    pthread_t threads[cpus];
    task_per_thread = n / cpus;
    int rc;

    std::cout << "calculating product..." << std::endl;
    struct timespec requestStart, requestEnd;
    //start execution time
    clock_gettime(CLOCK_REALTIME, &requestStart);
    for(int i=0; i < cpus; i++ ){
    rc = pthread_create(&threads[i], NULL, runner, (void *) i);
    if (rc){
       cout << "Error:unable to create thread," << rc << endl;
       exit(-1);
    }
  }

  for(int i=0; i < cpus; i++ ){
    pthread_join(threads[i],NULL);
  }
    //end execution time
  clock_gettime(CLOCK_REALTIME, &requestEnd);
  double accum = ( requestEnd.tv_sec - requestStart.tv_sec )
      + ( requestEnd.tv_nsec - requestStart.tv_nsec )
      / BILLION;
  printf( "Parallel Time taken: %lf\n", accum );

  return 0;
}

void createMatrix(){

  std::cout << "creating matrix A ..." << std::endl;
  A = (float**) malloc(n * sizeof(float*));
  for (int i = 0; i < n; i++) {
    A[i] = (float*) malloc(n * sizeof(float));
    for (int j = 0; j < n; j++) {
      A[i][j] = rand() % 10;
    }
  }

  std::cout << "creating matrix B ..." << std::endl;
  B = (float**) malloc(n * sizeof(float*));
  for (int i = 0; i < n; i++) {
    B[i] = (float*) malloc(n * sizeof(float));
    for (int j = 0; j < n; j++) {
      B[i][j] = rand() % 10;
    }
  }
}

binding.gyp

#include <cstdlib>
#include <iostream>
#include <stdio.h>
#include <pthread.h>
#include <node.h>
#include <v8.h>

using v8::Exception;
using v8::FunctionCallbackInfo;
using v8::Isolate;
using v8::Local;
using v8::Number;
using v8::Object;
using v8::String;
using v8::Value;
using v8::Array;
using v8::Integer;

using namespace std;

#define BILLION  1E9;

float **A;
float **B;
float **result;
int n;
int task_per_thread;

void createMatrix();

void *runner(void *pid) {

    int slice = (long) pid;
    int to = task_per_thread*slice;
    int from = (task_per_thread*(slice+2)<n) ? task_per_thread*(slice+1) : n;
    for (unsigned int i = to; i < from ; ++i){
    for (unsigned int j = 0; j < n ; ++j){
            result[i][j] = 0;
      for (unsigned int k = 0; k < n ; k++) {
        result[i][j]  += (A[i][k] * B[k][j]);
      }
    }
  }
  pthread_exit(NULL);
}

void parallelProduct(const FunctionCallbackInfo<Value>& args){
  Isolate* isolate = args.GetIsolate();
    if(args.Length() < 1){
    isolate->ThrowException(Exception::TypeError(
      String::NewFromUtf8(isolate, "Wrong numbers of arguments")));
    return;
  }


  n = args[0]->NumberValue();
  std::cout << "n: "<< n << std::endl;
  createMatrix();

  result = (float**) malloc(n *sizeof(float*));
  for (unsigned int i = 0; i < n; i++) {
    result[i] = (float*) malloc(n *sizeof(float));
  }
  unsigned cpus = 4;
    //std::cout << "cpus: "<< cpus << std::endl;
    pthread_t threads[cpus];
    task_per_thread = n / cpus;
    int rc;

    std::cout << "calculating product..." << std::endl;
    struct timespec requestStart, requestEnd;
    //start execution time
    clock_gettime(CLOCK_REALTIME, &requestStart);
    for(int i=0; i < cpus; i++ ){
    rc = pthread_create(&threads[i], NULL, runner, (void *) i);
    if (rc){
       cout << "Error:unable to create thread," << rc << endl;
       exit(-1);
    }
  }

  for(int i=0; i < cpus; i++ ){
    pthread_join(threads[i],NULL);
  }

    //end execution time
  clock_gettime(CLOCK_REALTIME, &requestEnd);
  double accum = ( requestEnd.tv_sec - requestStart.tv_sec )
      + ( requestEnd.tv_nsec - requestStart.tv_nsec )
      / BILLION;
  printf( "Parallel Time taken: %lf\n", accum );

}

void createMatrix(){

  std::cout << "creating matrix A ..." << std::endl;
  A = (float**) malloc(n * sizeof(float*));
  for (int i = 0; i < n; i++) {
    A[i] = (float*) malloc(n * sizeof(float));
    for (int j = 0; j < n; j++) {
      A[i][j] = rand() % 10;
    }
  }

  std::cout << "creating matrix B ..." << std::endl;
  B = (float**) malloc(n * sizeof(float*));
  for (int i = 0; i < n; i++) {
    B[i] = (float*) malloc(n * sizeof(float));
    for (int j = 0; j < n; j++) {
      B[i][j] = rand() % 10;
    }
  }
}

void serialProduct(const FunctionCallbackInfo<Value>& args){
  Isolate* isolate = args.GetIsolate();
    if(args.Length() < 1){
    isolate->ThrowException(Exception::TypeError(
      String::NewFromUtf8(isolate, "Wrong numbers of arguments")));
    return;
  }

  std::cout << "n: "<< args[0]->NumberValue() << std::endl;
  n = args[0]->NumberValue();
  createMatrix();

  result = (float**) malloc(n *sizeof(float*));
  for (unsigned int i = 0; i < n; i++) {
    result[i] = (float*) malloc(n *sizeof(float));
  }
  std::cout << "calculating product..." << std::endl;
  struct timespec requestStart, requestEnd;
  //start execution time
  clock_gettime(CLOCK_REALTIME, &requestStart);

  for (unsigned int i = 0; i < n ; ++i){
    for (unsigned int j = 0; j < n ; ++j){
      result[i][j] = 0;
      for (unsigned int k = 0; k < n ; k++) {
        result[i][j]  += (A[i][k] * B[k][j]);
      }
    }
  }
  //end execution time
  clock_gettime(CLOCK_REALTIME, &requestEnd);
  double accum = ( requestEnd.tv_sec - requestStart.tv_sec )
      + ( requestEnd.tv_nsec - requestStart.tv_nsec )
      / BILLION;
  printf( "Serial Time taken: %lf\n", accum );

}

void Init(Local<Object> exports) {
NODE_SET_METHOD(exports, "serialProduct", serialProduct);
NODE_SET_METHOD(exports, "parallelProduct", parallelProduct);
}

NODE_MODULE(addon, Init)

index.js

{
  "targets": [
    {
      "target_name": "addon",
      "sources": [ "addon.cc" ],
      "conditions":[ ['OS=="linux"', {
            'cflags': [
              '-w'
              ]},
             {# OS != "linux"
               'cflags': [
                 '-w'
                 ]},
          ],
         ]
    }
  ],
  "link_settings":
    {
      "libraries": [
        "-pthread"
      ]
    }
}

0 个答案:

没有答案