Question

我有一个C ++（CPP）文件，它占用大量的CPU，并且有很多循环/迭代。它被编译并包装为python作为共享对象。一位医生问我是否可以并发购买，然后在便宜的GPU上草拟草稿，然后再购买昂贵的东西（我是学生）。我已经阅读并做了一些基本的cuda，它正在工作，但是现在不知道如何开始。并且可以并行化这一点。

这是我们正在讨论的循环：

vector<double> Calculate (vector< vector<double> >  Oricoor, int size_coor, vector< vector<double> > Property, string normalization, 
        vector<int> rotationStepList, float resolution, int beginProbe, int endProbe, vector<double> radius) {
    const int N_PROPERTIES = 4; 
    int size = rotationStepList.size(); 
    const int numberOfProbes = endProbe - beginProbe;

    //self.__getBox__(oricoor)
    vector< vector<double> > boxPoint = getBox(Oricoor, size_coor, resolution, radius);

    //self.__getEnergies__(oricoor, prop)
    vector<double> e = getEnergy(boxPoint, Oricoor, size_coor, Property, beginProbe, endProbe);

    //self.sphore = self.energy
    vector<double> sphore(N_PROPERTIES * numberOfProbes);
    for (int i = 0; i < N_PROPERTIES * numberOfProbes; ++i) {
        sphore[i] = e[i];
    }

    for (int i = 0; i < size; i++) {
        int rotationStep = rotationStepList[i];
        //cout << "Rotation Sterp: " << rotationStep << endl;
        for (int iTheta = 0; iTheta < 180; iTheta += rotationStep){
            double theta = 0.017453292519943 * iTheta;
            double cos_theta = cos(theta);
            double sin_theta = sin(theta);

            double rotateY[3][3] = {{cos_theta, 0, -sin_theta},
                {        0, 1,          0},
                {sin_theta, 0, cos_theta}};

            vector< vector<double> > Matrix_Y = dotMatrix(Oricoor, size_coor, rotateY);

            for (int iPsi = 0; iPsi < 360; iPsi += rotationStep) {
                double psi = 0.017453292519943 * iPsi;
                double cos_psi = cos(psi);
                double sin_psi = sin(psi);

                double rotateZ[3][3] = {{cos_psi, -sin_psi, 0},
                    {sin_psi,  cos_psi, 0},
                    {      0,        0, 1}};

                vector< vector<double> > Matrix_Z = dotMatrix(Matrix_Y, size_coor, rotateZ);

                for (int iPhi = 0; iPhi < 360; iPhi += rotationStep){
                    double phi = 0.017453292519943 * iPhi;
                    double cos_phi = cos(phi);
                    double sin_phi = sin(phi); 

                    double rotateX[3][3] = {{1,       0,        0},
                        {0, cos_phi, -sin_phi},
                        {0, sin_phi,  cos_phi}};

                    vector< vector<double> > Matrix_X = dotMatrix(Matrix_Z, size_coor, rotateX);
                    vector< vector<double> > boxP  = getBox(Matrix_X, size_coor, resolution, radius);

                    //self.__getEnergies__(oricoor, prop)
                    vector<double> energy = getEnergy(boxP, Matrix_X, size_coor, Property, beginProbe, endProbe); 

                    //self.sphore = self.energy
                    for (int a = 0; a < N_PROPERTIES * numberOfProbes; ++a) {
                        if (energy[a] < sphore[a]) {
                            sphore[a] = energy[a];
                        }
                    }
                }
            }
        }
    }

可以使用CUDA / Investigation并行化嵌套在循环中的c ++

0 个答案: