Question

在访问for循环中的数组时，我遇到了分段错误。我想要做的是生成DNA串的所有子序列。

当我在for中创建数组时发生了这种情况。阅读一段时间后，我发现openmp限制了堆栈大小，因此使用堆更安全。所以我更改代码以使用malloc，但问题仍然存在。

这是完整的代码：

#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <omp.h>

#define DNA_SIZE 26 
#define DNA "AGTC"

static char** powerset(int argc, char* argv)
{
    unsigned int i, j, bits, i_max = 1U << argc;

    if (argc >= sizeof(i) * CHAR_BIT) {
        fprintf(stderr, "Error: set too large\n");
        exit(1);
    }
    omp_set_num_threads(2);
    char** subsequences = malloc(i_max*sizeof(char*));

    #pragma omp parallel for shared(subsequences, argv) 
    for (i = 0; i < i_max ; ++i) {
        //printf("{");
        int characters = 0;
        for (bits=i; bits ; bits>>=1)
            if (bits & 1)
                ++characters;

        //This is the line where the error is happening. 
        char *ss = malloc(characters+1 * sizeof(char)*16);//the *16 is just to save the cache lin       

        int ssindex = 0;

        for (bits = i, j=0; bits; bits >>= 1, ++j) {
            if (bits & 1) {
                //char a = argv[j];
                ss[ssindex++] = argv[j] ;
            } 
        }
        ss[ssindex] = '\0';
        subsequences[i] = ss;       
    }
    return subsequences;
}

char* getdna()
{
    int i;

    char *dna = (char *)malloc((DNA_SIZE+1) * sizeof(char));

    for(i = 0; i < DNA_SIZE; i++)
    {
        int randomDNA = rand() % 4;
        dna[i] = DNA[randomDNA];
    }

    dna[DNA_SIZE] = '\0';

    return dna;
}

void printResult(char** ss, int size)
{
    //PRINTING THE SUBSEQUENCES
    printf("SUBSEQUENCES FOUND:\r\n");
    int i;
    for(i = 0; i < size; i++)
    {
        printf("%i.\t{ %s } \r\n",i+1 , ss[i]);
        free(ss[i]);
    }
    free(ss);
}

int main(int argc, char* argv[])
{
    srand(time(NULL));
    double starttime, stoptime;
    starttime = omp_get_wtime();
    char* a = getdna();
    printf("%s\r\n", a);
    int size = pow(2, DNA_SIZE);
    printf("number of subsequences: %i\r\n", size);

    char** subsequences = powerset(DNA_SIZE, a);    
    //todo: make it optional printing to the stdout or saving to a file
    //printResult(subsequences, size);
    stoptime = omp_get_wtime();

    printf("Tempo de execucao: %3.2f segundos\n\n", stoptime-starttime);
    printf("Numero de sequencias geradas: %i\n\n", size);
    free(a);
    return 0;
}

我还尝试使malloc行对#pragma omp critical起作用并没有帮助。此外，我尝试使用-mstackrealign进行编译，但这也无效。

感谢所有帮助。

Answer 1

使用高效的线程安全内存管理。

应用程序可以显式地使用malloc()和free()，也可以隐式地在编译器生成的代码中使用动态/可分配数组，向量化内在函数等。

某些libc实现中的线程安全malloc()和free()带有内部锁定导致的高同步开销。存在针对线程应用程序的更快的分配器。例如，在Solaris上，多线程应用程序应与“MT-hot”分配器mtmalloc链接，即与-lmtmalloc链接以使用mtmalloc而不是默认的libc分配器。 glibc用于Linux和一些带有GNU用户区的OpenSolaris和FreeBSD发行版，使用修改后的ptmalloc2分配器，它基于Doug Lea的dlmalloc。它使用多个内存竞技场来实现近乎无锁的行为。它也可以配置为使用每线程竞技场和一些分配，特别是RHEL 6和派生，启用该功能。

static char** powerset(int argc, char* argv)
{
    int i, j, bits, i_max = 1U << argc;

    if (argc >= sizeof(i) * CHAR_BIT) {
        fprintf(stderr, "Error: set too large\n");
        exit(1);
    }
    omp_set_num_threads(2);


    char** subsequences = malloc(i_max*sizeof(char*));

    int characters = 0;
    for (i = 0; i < i_max ; ++i)
    {
         for (bits=i; bits ; bits>>=1)
            if (bits & 1)
                ++characters;

        subsequences[i] = malloc(characters+1 * sizeof(char)*16);
        characters = 0;
    }


    #pragma omp parallel for shared(subsequences, argv) private(j,bits)
    for (i = 0; i < i_max; ++i)
    {     

        int ssindex = 0;

        for (bits = i, j=0; bits; bits >>= 1, ++j) {
            if (bits & 1) {
                subsequences[i][ssindex++] = argv[j] ;
            } 
        }
       subsequences[i][ssindex] = '\0';
    }

    return subsequences;
}

我之前创建数据类型，然后进行扩孔计算。

以上版本在群集中有12个线程采用“Tempo de execucao：9.44 segundos”

当我尝试并行以下代码并行时：

   #pragma omp parallel for shared(subsequences) private(bits,characters)
    for (i = 0; i < i_max ; ++i)
            {
                 for (bits=i; bits ; bits>>=1)
                    if (bits & 1)
                        ++characters;

                subsequences[i] = malloc(characters+1 * sizeof(char)*16);
                characters = 0;
            }

它需要“Tempo de execucao：10.19 segundos”

正如您所看到的，并行执行malloc可能会导致您的开销。

你最终也会遇到这样的问题：每个子malloc都试图分配(characters+1*DNA_SIZE*sizeof(char))而不是((characters+1)*DNA_SIZE*sizeof(char))，并且在内部不需要乘以缓存行大小的因子如果我理解你要避免的东西，那就是并行部分。

这段代码似乎也存在一些问题：

for (bits = i, j=0; bits; bits >>= 1, ++j) {
    if (bits & 1) {
        //char a = argv[j];
        ss[ssindex++] = argv[j] ;
    }
}

使用此代码，j有时会点击DNA_SIZE或DNA_SIZE+1，从而导致读取数据末尾的argv[j]。（另外，使用argc和argv作为此函数中参数的名称有点令人困惑。）

Answer 2

问题在于dna[DNA_SIZE] = '\0';。到目前为止，您已经为26个字符分配了内存（比方说），并且您正在尝试访问第27个字符。始终记住数组索引从0开始。

分段创建阵列时使用OpenMP时出错

2 个答案: