Question

我不知道如何并行这个循环，因为我有很多因变量，我很困惑你能帮助和指导我吗？第一是：

for (int a = 0; a < sigmaLen; ++a) {
        int f = freq[a];
        if (f >= sumFreqLB)
            if (updateRemainingDistances(s, a, pos))
                if (prunePassed(pos + 1)) {
                    lmer[pos] = a;
                    enumerateStrings(pos + 1, sumFreqLB - f);
                }
    }

第二个是：

void preprocessLowerBounds() {
    int i = stackSz - 1;
    int pairOffset = (i * (i - 1)) >> 1;
    for (int k = L; k; --k) {
        int *dsn = dist[k] + pairOffset;
        int *ds = dist[k - 1] + pairOffset;
        int *s = colS[k - 1];
        char ci = s[i];
        for (int j = 0; j < i; ++j) {
            char cj = s[j];
            *ds++ = (*dsn++) + (ci != cj);
        }
    }

真的另一个是：

    void enumerateSubStrings(int rowNumber, int remainQTolerance) {
    int nItems = rowSize[rowNumber][stackSz];
    if (shouldGenerateNeighborhood(rowNumber, nItems)) {
        bruteForceIt(rowNumber, nItems);
    } else {
        indexType *row = rowItem[rowNumber];
        for (int j = 0; j < nItems; ++j) {
            indexType ind = row[j];
            addString(lmers + ind);
            preprocessLowerBounds();
            uint threshold = maxLB[stackSz] - addMaxFreq();
            if (hasSolution(0, threshold)) {
                if (getValid<hasPreprocessedPairs, useQ>(rowNumber + 1,
                        (stackSz <= 2 ? n : smallN), threshold + LminusD,
                        ind, remainQTolerance)) {
                    enumerateSubStrings<hasPreprocessedPairs, useQ>(
                            rowNumber + 1, remainQTolerance);
                }
            }
            removeLastString();
        }
    }

void addString(const char *t) {
    int *mf = colMf[stackSz + 1];
    for (int j = 0; j < L; ++j) {
        int c = t[j];
        colS[j][stackSz] = c;
        mf[j] = colMaxFreq[j] + (colMaxFreq[j] == colFreq[j][c]++);
    }
    colMaxFreq = mf;
    ++stackSz;
}


void preprocessLowerBounds() {
    int i = stackSz - 1;
    int pairOffset = (i * (i - 1)) >> 1;
    for (int k = L; k; --k) {
        int *dsn = dist[k] + pairOffset;
        int *ds = dist[k - 1] + pairOffset;
        int *s = colS[k - 1];
        char ci = s[i];
        for (int j = 0; j < i; ++j) {
            char cj = s[j];
            *ds++ = (*dsn++) + (ci != cj);
        }
    }
}

void removeLastString() {
    --stackSz;
    for (int j = 0; j < L; ++j)
        --colFreq[j][colS[j][stackSz]];
    colMaxFreq = colMf[stackSz];
}

Answer 1

好的，对于OpenMP并行化循环你基本上遵循这两个规则，第一个从不在不同线程的同一内存位置写入，第二个规则永远不依赖于读取可能修改另一个线程的内存区域，现在在第一个循环中，您只需更改lmer变量，其他操作是只读变量，我假设它们不会同时从代码的另一部分更改，因此第一个循环如下：

#pragma omp for private(s,a,pos) //According to my intuition these variables are global or belong to a class, so you must convert private to each thread, on the other hand sumFreqLB and freq not included because only these reading
for (int a = 0; a < sigmaLen; ++a) {
    int f = freq[a];
    if (f >= sumFreqLB)
        if (updateRemainingDistances(s, a, pos))
            if (prunePassed(pos + 1)) {

                #pragma omp critical //Only one thread at a time can enter otherwise you will fail at runtime
                {             
                lmer[pos] = a;
                }
                enumerateStrings(pos + 1, sumFreqLB - f);
            }
}

在第二个循环中，我无法理解您是如何使用for的，但是没有问题，因为您只使用读取并且只修改了线程局部变量。

您必须确保函数updateRemainingDistances，prunePassed和enumerateStrings不使用其中的静态或全局变量。

在下面的函数中，您最常使用的读操作可以从多个线程完成（如果有任何线程修改这些变量）并写入本地内存位置，所以只需更改FOR for OpenMP的形状即可识别FOR。 / p>

void preprocessLowerBounds() {
int i = stackSz - 1;
int pairOffset = (i * (i - 1)) >> 1;

#pragma omp for
for (int var=0; var<=k-L; var++){  

    int newK=k-var;//This will cover the initial range and in the same order    

    int *dsn = dist[newK] + pairOffset;
    int *ds = dist[newK - 1] + pairOffset;
    int *s = colS[newK - 1];
    char ci = s[i];
    for (int j = 0; j < i; ++j) {
        char cj = s[j];
        *ds++ = (*dsn++) + (ci != cj);
    }
}

在最后一个函数中，您使用了许多我不知道源代码的函数，因此无法知道它们是否在寻找可并行化的示例，下面的示例是错误的：

std::vector myVector;

void notParalelizable_1(int i){
miVector.push_back(i); 
}

void notParalelizable_2(int i){
static int A=0;
A=A+i;
}

int varGlobal=0;
void notParalelizable_3(int i){
varGlobal=varGlobal+i;
}

void oneFunctionParalelizable(int i)
{
int B=i;
}

int main()
{

#pragma omp for
 for(int i=0;i<10;i++)
 {
 notParalelizable_1(i);//Error because myVector is modified simultaneously from multiple threads, The error here is that myVector not store the values in ascending order as this necessarily being accesing  by multiple threads, this more complex functions can generate erroneous results or even errors in run time.
 }



#pragma omp for
 for(int i=0;i<10;i++)
 {
 notParalelizable_2(i);//Error because A is modified simultaneously from multiple threads
 }

 #pragma omp for
 for(int i=0;i<10;i++)
 {
 notParalelizable_3(i);//Error because varGlobal is modified simultaneously from multiple threads
 }   

 #pragma omp for
 for(int i=0;i<10;i++)
 {
 oneFunctionParalelizable(i);//no problem
 }

//The following code is correct
int *vector=new int[10];

#pragma omp for
 for(int i=0;i<10;i++)
 {
 vector[i]=i;//No problem because each thread writes to a different memory pocicion
 } 

//The following code is wrong
int k=2;
#pragma omp for
for(int i=0;i<10;i++)
{
k=k+i; //The result of the k variable at the end will be wrong as it is modified from different threads  
}

 return 0;   
}

如何将这个循环与open mp并行？

1 个答案: