我的计算机类涵盖了POSIX中的线程和并行性。对于我在课堂上的实验室任务,我们被要求在Linux中创建一个名为 search 的C程序,其功能与bash命令相似" grep -R"对于作为参数传入的一组文件/目录。但是,我们必须使用线程并在处理文本搜索时实现并行性。
当我们遇到一个文件(搜索它)时,我们被要求分叉一个新线程,当我们到达一个子目录时,我们会在主线程中递归调用search。
我试图以这种方式解决问题
#include <pthread.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <dirent.h>
#include <sys/stat.h>
#include <string.h>
pthread_t array[1000000]; // high number just to be safe
int current = 0; // current amount of threads.. should only be accessed by initial thread (main)
// so i dont think mutex is necessary..? im not sure though
struct arguments
{
char* pattern;
int file_index;
char* file_name;
};
char* remove_newline(char* line)
{
int i = strlen(line)-1;
if ((i > 0) && (line[i] == '\n'))
{
line[i] = '\0';
}
return line;
}
void print_match(char* file_name, int file_index, int line_number, char* line)
{
printf("%d %s %d %s\n", file_index, file_name, line_number, line); // will this work in parallel threads?
}
void add_all_matching_lines(char* pattern, int file_index, char* file_name)
{
FILE* file = fopen(file_name, "r");
if (file == NULL)
{
return; // error checking, gotta deal with this later
}
int line_number = 1;
char line[10256];
while (fgets(line, sizeof(line), file))
{
if (strstr(line, pattern) != NULL)
{
print_match(file_name, file_index, line_number, line);
}
line_number++;
}
fclose(file);
}
void* thread_work (void* ptr)
{
struct arguments* parameters;
parameters = (struct arguments *) ptr;
add_all_matching_lines(parameters->pattern, parameters->file_index, parameters->file_name);
free(parameters);
pthread_exit(NULL);
}
void search(char* pattern, int file_index, char* file_name)
{
struct stat buffer;
if (stat(file_name, &buffer) == -1) // also occurs if file is unreadable
{
return;
}
if (buffer.st_mode & S_IFREG )
{
struct arguments* struct_test = malloc(sizeof(struct arguments));
struct_test->pattern = pattern;
struct_test->file_index = file_index;
struct_test->file_name = file_name;
pthread_create(&array[current], NULL, thread_work, (void *) struct_test);
current++;
}
else if (buffer.st_mode & S_IFDIR )
{
DIR* directory = opendir(file_name);
struct dirent* file;
while ((file = readdir(directory)) != NULL)
{
if ((strcmp(".", file->d_name) < 0) && (strcmp("..", file->d_name) < 0))
{
char test[2] = "/";
char buffer[1024];
memset(buffer, 0, 1024);
strcpy(buffer, file_name);
strcat(buffer, test);
strcat(buffer, file->d_name);
search(pattern, file_index, buffer); // recursive call if it's a subdirectory
}
}
}
}
int main(int argc, char* argv[])
{
int file_index = 1;
if (argc < 3)
{
printf("Usage: search pattern file\n");
exit(-1);
}
for (int i = 2; i != argc; i++)
{
search(argv[1], file_index++, argv[i]);
}
for (int z = 0; z != current; z++)
{
int test = (pthread_join(array[z], NULL));
if (test != 0)
{
perror("pthread");
}
}
return 0;
}
命令的预期输出是
SAMPLE INPUT (note this example only creates between 3 and 6 threads)
$ search encyclopedia /usr/share/dict /usr/share/dict /usr/share/dict
SAMPLE OUTPUT (assuming the word encyclopedia is on line 345 of /usr/share/dict/linux.words)
1 /usr/share/dict/linux.words 126576 encyclopedia
3 /usr/share/dict/linux.words 126577 encyclopediac
2 /usr/share/dict/linux.words 126578 encyclopediacal
1 /usr/share/dict/linux.words 126579 encyclopedial
2 /usr/share/dict/linux.words 126580 encyclopedian
3 /usr/share/dict/linux.words 126581 encyclopedias
1 /usr/share/dict/linux.words 126582 encyclopediast
2 /usr/share/dict/linux.words 126576 encyclopedia
2 /usr/share/dict/linux.words 126577 encyclopediac
3 /usr/share/dict/linux.words 126578 encyclopediacal
3 /usr/share/dict/linux.words 126579 encyclopedial
1 /usr/share/dict/linux.words 126580 encyclopedian
2 /usr/share/dict/linux.words 126581 encyclopedias
2 /usr/share/dict/linux.words 126582 encyclopediast
3 /usr/share/dict/linux.words 126576 encyclopedia
1 /usr/share/dict/linux.words 126577 encyclopediac
2 /usr/share/dict/linux.words 126578 encyclopediacal
2 /usr/share/dict/linux.words 126579 encyclopedial
3 /usr/share/dict/linux.words 126580 encyclopedian
1 /usr/share/dict/linux.words 126581 encyclopedias
3 /usr/share/dict/linux.words 126582 encyclopediast
(Note they can come out in any order, so I just chose the order 2 1 3 for this example.)
然而,当我运行我的程序时,我的输出通常类似于
$ search encyclopedia /usr/share/dict /usr/share/dict /usr/share/dict
1 /usr/share/dict/linux.words 126576 encyclopedia
1 /usr/share/dict/linux.words 126577 encyclopediac
1 /usr/share/dict/linux.words 126579 encyclopediacal
1 /usr/share/dict/linux.words 126580 encyclopedian
1 /usr/share/dict/linux.words 126582 encyclopediast
1 /usr/share/dict/linux.words 126581 encyclopedias
2 /usr/share/dict/linux.words 126576 encyclopedia
2 /usr/share/dict/linux.words 126577 encyclopediac
2 /usr/share/dict/linux.words 126578 encyclopediacal
2 /usr/share/dict/linux.words 126580 encyclopedian
2 /usr/share/dict/linux.words 126578 encyclopediacal
2 /usr/share/dict/linux.words 126579 encyclopedial
2 /usr/share/dict/linux.words 126581 encyclopedias
2 /usr/share/dict/linux.words 126582 encyclopediast
3 /usr/share/dict/linux.words 126577 encyclopediac
3 /usr/share/dict/linux.words 126581 encyclopedias
3 /usr/share/dict/linux.words 126578 encyclopediacal
3 /usr/share/dict/linux.words 126579 encyclopedial
3 /usr/share/dict/linux.words 126576 encyclopedia
3 /usr/share/dict/linux.words 126580 encyclopedian
3 /usr/share/dict/linux.words 126582 encyclopediast
或者有时候,它会在这些线之间的某处停止并意外结束。
或有时,它按预期工作,数字变得杂乱无章。
我很难调试我的程序并弄清楚如何在100%的时间内实现预期的输出。我每次遇到一个文件时都会创建一个线程,这样当主线程继续处理剩余的目录/文件时,线程会对文件进行实际搜索,这样搜索就可以相互平行。
然后为了在主要退出之前等待所有线程完成,我加入它们以便主要赢得终止直到所有线程都完成。
任何人都可以帮我弄清楚我做错了什么吗?我不确定它是否是一个程序错误(我花了几个小时试图调试我的程序而没有运气)或者是一个概念错误的线程。
非常感谢您的帮助!