Question

我在DLL中有一个函数，它使用OpenMP来并行化几个循环。这些函数已经通过使用C ++内置的应用程序调用它们进行了测试/导出，一切运行良好。然后我将函数导出到另一个平台（ MetaTrader Terminal 4 ，它使用 MQL4 编程语言），其中代码也可以工作但基本上较慢（请参阅下面使用OpenMP的代码段）。所以我最好的猜测是从平台调用时并行化不起作用。我虽然在我的项目中使用Visual Studio 2015。

            double dtime;
            dtime = omp_get_wtime();

            ofstream fopen("C:\\output.txt", 'a');
            ofstream fout("C:\\output.txt", 'a');

            dtime = omp_get_wtime();


#pragma omp parallel for num_threads(num)
            for (int p = 1; p <= r1; p++) {
                int k = omp_get_thread_num();
                int i = I1[p], j = i + l;
                double alpha = 0, beta = 0, gamma = 0;
                double zeta, t, c, s;
                for (int k = 0; k < N; k++) {
                    alpha = alpha + (U_t[i][k] * U_t[i][k]);
                    beta = beta + (U_t[j][k] * U_t[j][k]);
                    gamma = gamma + (U_t[i][k] * U_t[j][k]);
                }
                C[k] = max(C[k], abs(gamma) / sqrt(alpha*beta));
                //converge = max(converge, abs(gamma)/sqrt(alpha*beta));    //compute convergence
                //basicaly is the angle
                //between column i and j


                zeta = (beta - alpha) / (2.0 * gamma);
                t = sgn(zeta) / (abs(zeta) + sqrt(1.0 + (zeta*zeta)));        //compute tan of angle
                c = 1.0 / (sqrt(1.0 + (t*t)));              //extract cos
                s = c*t;                            //extrac sin


                for (int k = 0; k<N; k++) {

                    t = U_t[i][k];

                    U_t[i][k] = c*t - s*U_t[j][k];
                    if (!(U_t[i][k] < 0 || U_t[i][k] > 0)) {
                        U_t[i][k] = 0;
                    }

                    U_t[j][k] = s*t + c*U_t[j][k];
                    if (!(U_t[j][k] < 0 || U_t[j][k] > 0)) {
                        U_t[j][k] = 0;
                    }


                    t = V_t[i][k];

                    V_t[i][k] = c*t - s*V_t[j][k];
                    if (!(V_t[i][k] < 0 || V_t[i][k] > 0)) {
                        V_t[i][k] = 0;
                    }

                    V_t[j][k] = s*t + c*V_t[j][k];
                    if (!(V_t[j][k] < 0 || V_t[j][k] > 0)) {
                        V_t[j][k] = 0;
                    }

                }
            }
            fout << endl;
#pragma omp parallel for num_threads(num)
            for (int p = 1; p <= r2; p++) {
                int k = omp_get_thread_num();
                int i = I2[p], j = i + l;
                double alpha = 0, beta = 0, gamma = 0;
                double zeta, t, c, s;
                for (int k = 0; k < N; k++) {
                    alpha = alpha + (U_t[i][k] * U_t[i][k]);
                    beta = beta + (U_t[j][k] * U_t[j][k]);
                    gamma = gamma + (U_t[i][k] * U_t[j][k]);
                }
                C[k] = max(C[k], abs(gamma) / sqrt(alpha*beta));
                //converge = max(converge, abs(gamma)/sqrt(alpha*beta));    //compute convergence
                //basicaly is the angle
                //between column i and j

                zeta = (beta - alpha) / (2.0 * gamma);
                t = sgn(zeta) / (abs(zeta) + sqrt(1.0 + (zeta*zeta)));        //compute tan of angle
                c = 1.0 / (sqrt(1.0 + (t*t)));              //extract cos
                s = c*t;                            //extrac sin


                for (int k = 0; k<N; k++) {


                    t = U_t[i][k];

                    U_t[i][k] = c*t - s*U_t[j][k];
                    if (!(U_t[i][k] < 0 || U_t[i][k] > 0)) {
                        U_t[i][k] = 0;
                    }

                    U_t[j][k] = s*t + c*U_t[j][k];
                    if (!(U_t[j][k] < 0 || U_t[j][k] > 0)) {
                        U_t[j][k] = 0;
                    }



                    t = V_t[i][k];

                    V_t[i][k] = c*t - s*V_t[j][k];
                    if (!(V_t[i][k] < 0 || V_t[i][k] > 0)) {
                        V_t[i][k] = 0;
                    }

                    V_t[j][k] = s*t + c*V_t[j][k];
                    if (!(V_t[j][k] < 0 || V_t[j][k] > 0)) {
                        V_t[j][k] = 0;
                    }


                }

            }
            fout << endl;
            for (int k = 0; k < num; k++)
                converge = max(converge, C[k]);
            fout << endl;
            if (l == M) {
                fout << converge << '\t';

                fout << endl;
                dtime = omp_get_wtime() - dtime;
                fout << "\n" << "dtime: " << dtime << " ";
                dtime = omp_get_wtime();
                fout << endl;

            }
            fout << endl; 
            fout.close();

这些是收敛和时间的输出：

C ++ app：

0.999996    dtime: 7.91817e-05
0.954982    dtime: 8.01593e-05
0.964351    dtime: 0.000116817
0.934475    dtime: 7.86929e-05
0.781737    dtime: 7.77154e-05
0.812496    dtime: 7.96705e-05
0.500925    dtime: 7.77154e-05
0.174739    dtime: 7.77154e-05
0.0407444   dtime: 7.86929e-05 
0.0137945   dtime: 8.01593e-05 
0.0039458   dtime: 0.000136857 
0.000550945 dtime: 7.86929e-05 
0.000149865 dtime: 7.96705e-05 
3.76775e-05 dtime: 7.96705e-05 
6.86001e-06 dtime: 8.0648e-05 
2.04005e-06 dtime: 7.82042e-05 
5.6817e-07  dtime: 8.84685e-05 
2.70614e-07 dtime: 7.96705e-05
5.78656e-08 dtime: 7.86929e-05
1.90527e-08 dtime: 8.01593e-05
1.00316e-09 dtime: 7.96705e-05

从平台：

0.999997    dtime: 0.222026
0.917038    dtime: 0.219041
0.982879    dtime: 0.215614
0.723091    dtime: 0.219034
0.295653    dtime: 0.215915
0.097825    dtime: 0.21803
0.0350881   dtime: 0.21804
0.00654856  dtime: 0.219009
0.00188476  dtime: 0.217366
0.000435981 dtime: 0.223172
9.50818e-05 dtime: 0.21804
2.27348e-05 dtime: 0.260625
1.39124e-05 dtime: 0.219027
1.72161e-06 dtime: 0.218035
3.18178e-07 dtime: 0.218927
1.77708e-07 dtime: 0.218026
3.81575e-08 dtime: 0.204294
9.53867e-09 dtime: 0.221036

MQL4 函数声明：

#property                                                  copyright "Adrijus"
#property                                                  version   "1.00"
#property strict
#import  "LMBRDLL.dll"
                      double  getWeights( double &data[],
                                          int    &topology[],
                                          int     topSize,
                                          double &TV[],
                                          double  validationDifference,
                                          int     vSize,
                                          int     timeSteps,
                                          int     nVabs,
                                          double &weights[]
                                          );

                      double testWeights( double &weights[],
                                          double &currentData[],
                                          int    &topology[],
                                          int     topSize,
                                          int     timeSteps,
                                          int     nVabs
                                          );
#import
#include <stdlib.mqh>

MQL4 函数调用：

                 getWeights( data,
                             topology,
                             topSize,
                             TV,
                             validationDifference,
                             vSize,
                             timeSteps,
                             nVabs,
                             weights
                             );
double output = testWeights( weights,      // DLL function returns predicted percentage change
                             currentData,
                             topology,
                             topSize,
                             timeSteps,
                             nVabs
                             );

DLL ：

中的定义

#include "stdafx.h"

double *getWeights( double const *idata,
                    int    const *aTopology,
                    int           topSize,
                    double const *aTV,
                    double        validationDifference,
                    int           vSize,
                    int           tSteps,
                    int           nVabs,
                    double       *T
                    ) {

    vector<unsigned> topology(topSize);
    for (i = 0; i < topSize; i++) {
        topology[i] = aTopology[i];
    }

    vector<double> TV(2);
    TV[0] = aTV[0];
    TV[1] = aTV[1];

    Matrix inputVals = buildInputs(idata, vSize, tSteps, nVabs);
    inputVals = sortInputs(inputVals, TV);
    sortTargets(TV);
    Matrix targetVals = getTargets();
    Net myNet(topology, inputVals);

    double currentValidationError = 1000000000777;
    double previousValidationError = 1000000000000000;
    double difference = 1;

    while (currentValidationError < previousValidationError) {

        difference = previousValidationError - currentValidationError;

        if (difference < validationDifference)
            break;

        FeedForward(&topology, &myNet);

        Backpropagation(&topology, &myNet, &targetVals);

        BuildJacobian(&topology, &myNet);

        LevenberMarquardtBeyesianRegularization( &topology,
                                                 &myNet,
                                                 &targetVals
                                                  );
        previousValidationError = currentValidationError;

        currentValidationError = Validation( &myNet.allLMweights,
                                              topology
                                              );

        //UpdateSynapses(topology, myNet);
    }

    vector<double> rowWeights = buildRWeights(myNet.allSynapses);

    for (i = 0; i < rowWeights.size(); i++) {
        T[i] = rowWeights[i];
    }

    return T;
}

double testWeights( double const *rowWeights,
                    double const *testData,
                    int    const *aTopology,
                    int           topSize,
                    int           timeSteps,
                    int           nVariables
                    ) {

    vector<unsigned> topology(topSize);
    for (i = 0; i < topSize; i++) {
        topology[i] = aTopology[i];
    }

    vector<Matrix> testWeights = buildWeightMatrices( rowWeights,
                                                      topology
                                                      );
    vector<double> input = buildTestInputs( testData,
                                            timeSteps,
                                            nVariables
                                            );
    double output = getOutput(testWeights, input);

    return output;
}

用于导出到MetaTrader终端平台的

LMBRDLL.def ：

LIBRARY "LMBRDLL"
EXPORTS
getWeights
testWeights

HeaderDLL.h ，用于导出到C ++应用：

#include "stdafx.h"
using namespace std;

    __declspec(dllexport) double *getWeights( double const *idata,
                                              int    const *aTopology,
                                              int           topSize,
                                              double const *aTV,
                                              double        validationDifference,
                                              int           vSize,
                                              int           tSteps,
                                              int           nVabs,
                                              double       *T
                                              );
    __declspec(dllexport) double testWeights( double const *rowWeights,
                                              double const *testData,
                                              int    const *aTopology,
                                              int           topSize,
                                              int           timeSteps,
                                              int           nVariables
                                              );

OpenMP并在DLL中导出并行化函数

0 个答案: