我有一个C ++(CPP)文件,它占用大量的CPU,并且有很多循环/迭代。它被编译并包装为python作为共享对象。一位医生问我是否可以并发购买,然后在便宜的GPU上草拟草稿,然后再购买昂贵的东西(我是学生)。我已经阅读并做了一些基本的cuda,它正在工作,但是现在不知道如何开始。 并且可以并行化这一点。
这是我们正在讨论的循环:
vector<double> Calculate (vector< vector<double> > Oricoor, int size_coor, vector< vector<double> > Property, string normalization,
vector<int> rotationStepList, float resolution, int beginProbe, int endProbe, vector<double> radius) {
const int N_PROPERTIES = 4;
int size = rotationStepList.size();
const int numberOfProbes = endProbe - beginProbe;
//self.__getBox__(oricoor)
vector< vector<double> > boxPoint = getBox(Oricoor, size_coor, resolution, radius);
//self.__getEnergies__(oricoor, prop)
vector<double> e = getEnergy(boxPoint, Oricoor, size_coor, Property, beginProbe, endProbe);
//self.sphore = self.energy
vector<double> sphore(N_PROPERTIES * numberOfProbes);
for (int i = 0; i < N_PROPERTIES * numberOfProbes; ++i) {
sphore[i] = e[i];
}
for (int i = 0; i < size; i++) {
int rotationStep = rotationStepList[i];
//cout << "Rotation Sterp: " << rotationStep << endl;
for (int iTheta = 0; iTheta < 180; iTheta += rotationStep){
double theta = 0.017453292519943 * iTheta;
double cos_theta = cos(theta);
double sin_theta = sin(theta);
double rotateY[3][3] = {{cos_theta, 0, -sin_theta},
{ 0, 1, 0},
{sin_theta, 0, cos_theta}};
vector< vector<double> > Matrix_Y = dotMatrix(Oricoor, size_coor, rotateY);
for (int iPsi = 0; iPsi < 360; iPsi += rotationStep) {
double psi = 0.017453292519943 * iPsi;
double cos_psi = cos(psi);
double sin_psi = sin(psi);
double rotateZ[3][3] = {{cos_psi, -sin_psi, 0},
{sin_psi, cos_psi, 0},
{ 0, 0, 1}};
vector< vector<double> > Matrix_Z = dotMatrix(Matrix_Y, size_coor, rotateZ);
for (int iPhi = 0; iPhi < 360; iPhi += rotationStep){
double phi = 0.017453292519943 * iPhi;
double cos_phi = cos(phi);
double sin_phi = sin(phi);
double rotateX[3][3] = {{1, 0, 0},
{0, cos_phi, -sin_phi},
{0, sin_phi, cos_phi}};
vector< vector<double> > Matrix_X = dotMatrix(Matrix_Z, size_coor, rotateX);
vector< vector<double> > boxP = getBox(Matrix_X, size_coor, resolution, radius);
//self.__getEnergies__(oricoor, prop)
vector<double> energy = getEnergy(boxP, Matrix_X, size_coor, Property, beginProbe, endProbe);
//self.sphore = self.energy
for (int a = 0; a < N_PROPERTIES * numberOfProbes; ++a) {
if (energy[a] < sphore[a]) {
sphore[a] = energy[a];
}
}
}
}
}
}