pthread并行不按预期工作

时间:2015-03-05 04:31:55

标签: c multithreading pthreads

我的计算机类涵盖了POSIX中的线程和并行性。对于我在课堂上的实验室任务,我们被要求在Linux中创建一个名为 search 的C程序,其功能与bash命令相似" grep -R"对于作为参数传入的一组文件/目录。但是,我们必须使用线程并在处理文本搜索时实现并行性。

当我们遇到一个文件(搜索它)时,我们被要求分叉一个新线程,当我们到达一个子目录时,我们会在主线程中递归调用search。

我试图以这种方式解决问题

#include <pthread.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <dirent.h>
#include <sys/stat.h>
#include <string.h>

pthread_t array[1000000]; // high number just to be safe
int current = 0; // current amount of threads.. should only be accessed by initial thread (main)
// so i dont think mutex is necessary..? im not sure though

struct arguments
{
    char* pattern;
    int file_index;
    char* file_name;
};


char* remove_newline(char* line)
{
    int i = strlen(line)-1;
    if ((i > 0) && (line[i] == '\n'))
    {
        line[i] = '\0';
    }
    return line;
}
void print_match(char* file_name, int file_index, int line_number, char* line)
{
    printf("%d %s %d %s\n", file_index, file_name, line_number, line); // will this work in parallel threads?
}

void add_all_matching_lines(char* pattern, int file_index, char* file_name)
{
    FILE* file = fopen(file_name, "r");
    if (file == NULL)
    {
        return; // error checking, gotta deal with this later
    }
    int line_number = 1;
    char line[10256];
    while (fgets(line, sizeof(line), file))
    {
        if (strstr(line, pattern) != NULL)
        {
            print_match(file_name, file_index, line_number, line);
        }
        line_number++;
    }
    fclose(file);
}

void* thread_work (void* ptr)
{
    struct arguments* parameters;
    parameters = (struct arguments *) ptr;
    add_all_matching_lines(parameters->pattern, parameters->file_index, parameters->file_name);
    free(parameters);
    pthread_exit(NULL);
}

void search(char* pattern, int file_index, char* file_name)
{
    struct stat buffer;
    if (stat(file_name, &buffer) == -1) // also occurs if file is unreadable
    {
        return;
    }
    if (buffer.st_mode & S_IFREG )
    {        
        struct arguments* struct_test = malloc(sizeof(struct arguments));
        struct_test->pattern = pattern;
        struct_test->file_index = file_index;
        struct_test->file_name = file_name;
        pthread_create(&array[current], NULL, thread_work, (void *) struct_test);
        current++;
    }
    else if (buffer.st_mode & S_IFDIR )
    {
       DIR* directory = opendir(file_name);
       struct dirent* file;
       while ((file = readdir(directory)) != NULL)
       {
            if ((strcmp(".", file->d_name) < 0) && (strcmp("..", file->d_name) < 0))
            {
                char test[2] = "/";
                char buffer[1024];
                memset(buffer, 0, 1024);
                strcpy(buffer, file_name);
                strcat(buffer, test);
                strcat(buffer, file->d_name);
                search(pattern, file_index, buffer); // recursive call if it's a subdirectory
            }
       }
    }
}

int main(int argc, char* argv[])
{
    int file_index = 1;
    if (argc < 3)
    {
        printf("Usage: search pattern file\n");
        exit(-1);
    }
    for (int i = 2; i != argc; i++)
    {
        search(argv[1], file_index++, argv[i]);
    }
    for (int z = 0; z != current; z++)
    {
        int test = (pthread_join(array[z], NULL));
        if (test != 0)
        {
            perror("pthread");
        }
    }
    return 0;
}

命令的预期输出是

SAMPLE INPUT (note this example only creates between 3 and 6 threads)
$ search encyclopedia /usr/share/dict /usr/share/dict /usr/share/dict
SAMPLE OUTPUT (assuming the word encyclopedia is on line 345 of /usr/share/dict/linux.words)
1 /usr/share/dict/linux.words 126576 encyclopedia
3 /usr/share/dict/linux.words 126577 encyclopediac
2 /usr/share/dict/linux.words 126578 encyclopediacal
1 /usr/share/dict/linux.words 126579 encyclopedial
2 /usr/share/dict/linux.words 126580 encyclopedian
3 /usr/share/dict/linux.words 126581 encyclopedias
1 /usr/share/dict/linux.words 126582 encyclopediast
2 /usr/share/dict/linux.words 126576 encyclopedia
2 /usr/share/dict/linux.words 126577 encyclopediac
3 /usr/share/dict/linux.words 126578 encyclopediacal
3 /usr/share/dict/linux.words 126579 encyclopedial
1 /usr/share/dict/linux.words 126580 encyclopedian
2 /usr/share/dict/linux.words 126581 encyclopedias
2 /usr/share/dict/linux.words 126582 encyclopediast
3 /usr/share/dict/linux.words 126576 encyclopedia
1 /usr/share/dict/linux.words 126577 encyclopediac
2 /usr/share/dict/linux.words 126578 encyclopediacal
2 /usr/share/dict/linux.words 126579 encyclopedial
3 /usr/share/dict/linux.words 126580 encyclopedian
1 /usr/share/dict/linux.words 126581 encyclopedias
3 /usr/share/dict/linux.words 126582 encyclopediast
(Note they can come out in any order, so I just chose the order 2 1 3 for this example.)

然而,当我运行我的程序时,我的输出通常类似于

$ search encyclopedia /usr/share/dict /usr/share/dict /usr/share/dict
1 /usr/share/dict/linux.words 126576 encyclopedia
1 /usr/share/dict/linux.words 126577 encyclopediac
1 /usr/share/dict/linux.words 126579 encyclopediacal
1 /usr/share/dict/linux.words 126580 encyclopedian
1 /usr/share/dict/linux.words 126582 encyclopediast
1 /usr/share/dict/linux.words 126581 encyclopedias
2 /usr/share/dict/linux.words 126576 encyclopedia
2 /usr/share/dict/linux.words 126577 encyclopediac
2 /usr/share/dict/linux.words 126578 encyclopediacal
2 /usr/share/dict/linux.words 126580 encyclopedian
2 /usr/share/dict/linux.words 126578 encyclopediacal
2 /usr/share/dict/linux.words 126579 encyclopedial
2 /usr/share/dict/linux.words 126581 encyclopedias
2 /usr/share/dict/linux.words 126582 encyclopediast
3 /usr/share/dict/linux.words 126577 encyclopediac
3 /usr/share/dict/linux.words 126581 encyclopedias
3 /usr/share/dict/linux.words 126578 encyclopediacal
3 /usr/share/dict/linux.words 126579 encyclopedial
3 /usr/share/dict/linux.words 126576 encyclopedia
3 /usr/share/dict/linux.words 126580 encyclopedian
3 /usr/share/dict/linux.words 126582 encyclopediast

或者有时候,它会在这些线之间的某处停止并意外结束。

或有时,它按预期工作,数字变得杂乱无章。

我很难调试我的程序并弄清楚如何在100%的时间内实现预期的输出。我每次遇到一个文件时都会创建一个线程,这样当主线程继续处理剩余的目录/文件时,线程会对文件进行实际搜索,这样搜索就可以相互平行。

然后为了在主要退出之前等待所有线程完成,我加入它们以便主要赢得终止直到所有线程都完成。

任何人都可以帮我弄清楚我做错了什么吗?我不确定它是否是一个程序错误(我花了几个小时试图调试我的程序而没有运气)或者是一个概念错误的线程。

非常感谢您的帮助!

0 个答案:

没有答案