Question

我正在写一个需要一些输入的程序;目录，文件名和一些标志。该程序的目的是在给定目录中搜索给定文件。在搜索时，如果找到另一个目录，它将打开该目录并继续搜索。其中一个标志允许用户选择程序将用于搜索文件的线程数。

目录存储在堆栈中，我遇到的问题是线程之间的同步。我目前正在使用互斥锁和定时等待条件。这意味着如果线程等待了一定的时间，则线程将结束，并且存储目录的堆栈为空。问题是，当只运行2个线程时，1个线程可能最终完成所有工作，即打开400个目录而另一个打开0。

所以我的问题是......如何以更好的方式同步我的线程？也许没有使用定时等待条件？线程什么时候终止？

#include <sys/stat.h>
#include <sys/types.h>
#include <sys/time.h>
#include <unistd.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <dirent.h>
#include <getopt.h>
#include <string.h>
#include <limits.h>
#include <errno.h>
#include <pthread.h>

void search_func(char *path, char *name, int d, int f, int l);
void *thread_func(void *arg);
void push(char *data);
char* pop();
#define MAXLENGTH 1000
#define MAXSIZE 10000
#define WAIT_TIME_SECONDS 0.1
pthread_mutex_t lock;
pthread_cond_t count_threshold_cv;
struct stack
{
    char stk[MAXSIZE][MAXLENGTH];
    int top;
};
typedef struct stack STACK;
STACK s;

struct arg_keeper {
    char **argv;
    int argc;
    int d;
    int f;
    int l;
};

int main(int argc, char **argv) {
    if(argc < 3) {
        fprintf(stderr, "Not enough arguments\n");
        return 1;
    }
    char *xValue = NULL;
    int x;
    int d = 0;
    int f = 0;
    int l = 0;
    int nrthr = 0;
    opterr = 0;
    int thread_count = 0;
    int directory_exist = 0;
    pthread_t tid[1024];

    while ((x = getopt(argc, argv, "t:p:")) != -1) {
        switch (x) {
        case 't':
            xValue = optarg;
            if (*xValue == 'd') {
                d = 1;
            } else if (*xValue == 'f') {
                f = 1;
            } else if (*xValue == 'l') {
                l = 1;
            }
            break;
        case 'p':
            nrthr = atoi(optarg);
            if(nrthr == 0) {
                fprintf(stderr, "Invalid thread count\n");
                return 1;
            }
            break;
        case '?':
            if (isprint (optopt))
                fprintf(stderr, "Unknown option '-%c'.\n",
                                optopt);
            return 1;
        default:
            abort();
        }
    }

    if (argc >= 3) {
        int i;
        for (i = optind; i < argc - 1; i++) {
            directory_exist = 1;
            push(argv[i]);
        }
    }
    if(directory_exist == 0) {
        fprintf(stderr, "No directories entered\n");
        return 1;
    }

    struct arg_keeper * arg_struct = malloc(sizeof(*arg_struct));
    arg_struct->argv = argv;
    arg_struct->argc = argc;
    arg_struct->d = d;
    arg_struct->f = f;
    arg_struct->l = l;

    if(pthread_mutex_init(&lock, NULL) != 0) {
        fprintf(stderr, "Mutex initialisation failed\n");
        return 1;
    }
    if(pthread_cond_init(&count_threshold_cv, NULL) != 0) {
        fprintf(stderr, "Condition variable initialisation failed\n");
        return 1;
    }

    while(thread_count < nrthr - 1) {
        if(pthread_create(&(tid[thread_count++]), NULL, thread_func,
                            arg_struct) != 0)
            fprintf(stderr, "Can't create thread\n");
    }

    if(nrthr!=0)
        thread_func(arg_struct);
    else
        thread_func(arg_struct);

    int c;
    for(c = 0; c < nrthr; c++) {
        pthread_join(tid[c], NULL);
    }
    pthread_mutex_destroy(&lock);
    free(arg_struct);
    return 0;
}

void *thread_func(void *arg) {
    int dirOpened = 0;
    struct arg_keeper arg_struct = *(struct arg_keeper *)arg;
    char *data;

    pthread_mutex_lock(&lock);
    struct timespec ts;
    struct timeval tp;
    while(1) {
        gettimeofday(&tp, NULL);
        ts.tv_sec  = tp.tv_sec;
        ts.tv_nsec = tp.tv_usec * 1000;
        ts.tv_sec += WAIT_TIME_SECONDS;

        if (pthread_cond_timedwait(&count_threshold_cv, &lock, &ts) == ETIMEDOUT) {
            if (s.top) {
                data = pop();
                pthread_cond_signal(&count_threshold_cv);
                dirOpened++;
                search_func(data, arg_struct.argv[arg_struct.argc - 1], arg_struct.d,
                        arg_struct.f, arg_struct.l);
            }
            else
                break;
        }
    }
    pthread_mutex_unlock(&lock);
    fprintf(stdout, "Thread with id %lu opened %d directories\n",
                pthread_self(), dirOpened);
    return NULL;
}

void search_func(char *inPath, char *testName, int d, int f, int l) {
    char path[PATH_MAX];
    strcpy(path, inPath);
    struct dirent *pDirent;
    DIR *pDir;
    struct stat file_info;

    if ((pDir = opendir(path)) == NULL) {
        fprintf(stderr, "Error:'%s': %s\n", path, strerror(errno));
    } else {
        int v1;
        int v2;
        char *str1 = ".";
        char *str2 = "..";

        char name[PATH_MAX];
        strcpy(name, testName);

        char testPath[PATH_MAX];
        strcpy(testPath, path);

        char testPathLast[PATH_MAX];
        strcpy(testPathLast, path);

        while ((pDirent = readdir(pDir)) != NULL) {
            if (strcmp(pDirent->d_name, name) == 0 && d == 0 &&
                    f == 0 && l == 0) {
                if (path[strlen(path) - 1] != '/')
                    strcat(testPathLast, "/");

                strcat(testPathLast, pDirent->d_name);
                fprintf(stdout, "%s\n", testPathLast);
            }

            char testPath2[PATH_MAX];
            strcpy(testPath2, testPath);
            strcat(testPath2, "/");
            strcat(testPath2, pDirent->d_name);

            if (lstat(testPath2, &file_info) != 0)
                fprintf(stderr, "lstat error2: %s\n",
                            strerror(errno));

            if (d == 1) {
                if (strcmp(pDirent->d_name, name)
                    == 0 && S_ISDIR(file_info.st_mode)) {
                    if (path[strlen(path) - 1] != '/')
                        strcat(testPathLast, "/");

                    strcat(testPathLast, pDirent->d_name);
                    fprintf(stdout, "%s\n", testPathLast);
                }
            }

            if (f == 1) {
                if (strcmp(pDirent->d_name, name)
                    == 0 && S_ISREG(file_info.st_mode)) {
                    if (path[strlen(path) - 1] != '/')
                        strcat(testPathLast, "/");

                    strcat(testPathLast, pDirent->d_name);
                    fprintf(stdout, "%s\n", testPathLast);
                }
            }

            if (l == 1) {
                if (strcmp(pDirent->d_name, name)
                    == 0 && S_ISLNK(file_info.st_mode)) {
                    if (path[strlen(path) - 1] != '/')
                        strcat(testPathLast, "/");

                    strcat(testPathLast, pDirent->d_name);
                    fprintf(stdout, "%s\n", testPathLast);
                }
            }

            v1 = strcmp(pDirent->d_name, str1);
            v2 = strcmp(pDirent->d_name, str2);

            if ((v1 != 0 && v2 != 0) && S_ISDIR(file_info.st_mode)) {
                strcpy(path, testPath);
                strcpy(path, testPath);
                if (path[strlen(path) - 1] != '/')
                    strcat(path, "/");
                strcat(path, pDirent->d_name);
                push(path);
            }
        }
        closedir(pDir);
    }
}

void push(char *data)
{
    if(s.top == (MAXSIZE - 1)) {
        fprintf(stderr, "Stack is full\n");
        return;
    }
    else {
        s.top = s.top + 1;
        strcpy(&(s.stk[s.top][0]), data);
    }
    return;
}

char* pop()
{
    char *data;
    if(s.top == -1) {
        fprintf(stderr, "Stack is empty\n");
        return NULL;
    }
    else {
        data = s.stk[s.top];
        s.top = s.top - 1;
    }
    return data;
}

Answer 1

虽然使用POSIX nftw()或BSD fts（在Linux中的glibc中都可用）可以更好地解决OP的实现，但是这个实现中的基本问题实际上非常有趣：每个工作线程最初消耗一个数据，然后处理一段时间，并可能产生额外的基准。

问题是在使用所有现有基准时，但是有一个或多个工作线程，可能会产生要处理的其他基准。因此，不再需要处理的数据不足以成为工作线程退出的原因。工作线程只应在没有更多要处理的基准时退出，并且没有可以生成其他基准的工作线程。

显而易见的解决方案是使用互斥锁，条件变量（用于等待新的基准），以及当前运行的工作线程数的计数器。

假设基准数据存储在单链表中：

struct work_item {
    struct work_item *next;
    char              path[];
};

上面的path成员是C99灵活数组成员。我们可以用来描述要完成的工作的结构可以是

struct work {
    pthread_mutex_t   mutex;
    pthread_cond_t    cond;
    long              active;
    struct work_item *item;
};
#define WORK_INITIALIZER {     \
    PTHREAD_MUTEX_INITIALIZER, \
    PTHREAD_COND_INITIALIZER,  \
    0L, NULL }

在将初始项目推送到item列表后，在给定指向共享struct work结构的指针的情况下，创建一个或多个线程。

逻辑类似于以下内容：

void *worker_thread(void *work_ptr)
{
    struct work *const work = (struct work *)word_ptr;
    struct work_item  *item;

    pthread_mutex_lock(&(work->mutex));

    while (1) {

        /* If there are no active workers,
           nor any work items, we're done. */
        if (!work->item && !work->active) {
            /* Ensure threads waiting on the condition
               variable are woken up, so they quit too. */
            pthread_cond_broadcast(&(work->cond));
            pthread_mutex_unlock(&(work->mutex));
            return NULL;
        }

        /* No work items left? */
        if (!work->item) {
            /* Wait for a new one to be produced,
               or a worker to notice we're done. */
            pthread_cond_wait(&(work->cond), &(work->mutex));
            continue;
        }

        /* Increment active worker count, grab an item,
           and work on it. */
        work->active++;
        item = work->item;
        work->item = work->item->next;
        item->next = NULL;

        /* Unlock mutex while working. */
        pthread_mutex_unlock(&(work->mutex));

        /*
         * TODO: Work on item
        */

        pthread_mutex_lock(&(work->mutex));
        work->active--;
    }
}

当然，在处理项目时，必须在将项目推送到堆栈时重新获取互斥锁，并在条件变量上发出信号以唤醒工作线程（如果正在等待新工作）：

        struct work_item *temp;

        /* TODO: Allocate and initialize temp */

        pthread_mutex_lock(&(work->mutex));
        temp->next = work->item;
        work->item = temp;
        pthread_cond_signal(&(work->cond));
        pthread_mutex_unlock(&(work->mutex));

请注意活动计数器如何反映当前正在处理某个项目的线程数（基本上是当前正在运行的 producer 的数量）。不现有工作线程的数量！

如果一个线程注意到没有更多项可以使用，也没有任何生产者在运行，则广播条件变量，这样如果有任何线程等待新工作，将被唤醒（并且同样注意到没有更多工作）。每当项目被添加到工作清单时，条件变量也会发出信号（仅唤醒一个等待的线程）。

Answer 2

如何更好地同步线程？也许不使用定时的等待条件？

是-我将丢弃条件变量，并使用两个semaphores的集合，第一个信号量统计堆栈中的挂起目录，第二个信号量统计繁忙的工作线程。为了使工作线程简单，我将在创建的线程中进行所有搜索工作，而不要从thread_func()调用main()。可以保留pthread_mutex_t lock，以保护STACK s免受并发访问。

线程应何时终止？

正如名义动物写道：工作线程仅应在没有更多要处理的基准并且没有可以运行其他基准的工作线程退出时退出。上述信号量提供了必需的信息并允许在main()线程中轻松等待该条件。

对程序的更改将是

在文件范围内：
```
#include <sys/sem.h>
int semid;
```

在push()中：

void push(char *data)
{
    pthread_mutex_lock(&lock);
    if (s.top == MAXSIZE-1)
        fprintf(stderr, "Stack is full\n");
    else
        strcpy(s.stk[++s.top], data),
        semop(semid, &(struct sembuf){0, 1}, 1);    // add 1 to "dirs on stack"
    pthread_mutex_unlock(&lock);
    return;
}

请注意，我们在这里计算第一个信号量。

在pop()中：

char *pop()
{
    char *data;
    pthread_mutex_lock(&lock);
    if (s.top == -1)
        fprintf(stderr, "Stack is empty\n"),
        data = NULL;
    else
        data = strdup(s.stk[s.top--]);  // Don't return freed stack slot!
    pthread_mutex_unlock(&lock);
    return data;
}

请注意，我们返回数据的副本，而不仅仅是指向数据的指针，因为释放的栈顶可以随时被另一个线程重用和覆盖。

main()

：

    // create semaphore set of 2 sems: [0] dirs on stack, [1] threads at work
    semid = semget(IPC_PRIVATE, 2, S_IRWXU);
    semctl(semid, 0, SETALL, (unsigned short [2]){});   // zero the sem values

请注意，必须将其放置在第一次调用push()之前，以便它可以计数信号量。

main()

thread_func()

：

    while (thread_count < nrthr)
        if (pthread_create(&tid[thread_count++], NULL, thread_func, arg_struct))
            fprintf(stderr, "Can't create thread\n");

    // wait until no more dirs on stack and no more threads at work
    semop(semid, (struct sembuf []){{0, 0}, {1, 0}}, 2);
    semctl(semid, 0, IPC_RMID); // remove the semaphores, make threads finish

请注意，我们创建nrthr而不是nrthr - 1线程，因为main()线程不参与工作，它只是等待所有工作完成。然后，它破坏了信号量集，从而导致工作线程退出循环并完成操作（请参见下文）。

在thread_func()中：

void *thread_func(void *arg)
{
    int dirOpened = 0;
    struct arg_keeper arg_struct = *(struct arg_keeper *)arg;
    char *data;
    // wait for work, subtract 1 from dirs on stack and add 1 to threads at work
    while (semop(semid, (struct sembuf []){{0, -1}, {1, 1}}, 2) == 0)
    {   // this loop ends when semid is removed
        data = pop();
        dirOpened++;
        search_func(data, arg_struct.argv[arg_struct.argc-1],
                    arg_struct.d, arg_struct.f, arg_struct.l);
        free(data);
        semop(semid, &(struct sembuf){1, -1}, 1);   // "threads at work" -= 1
    }
    fprintf(stdout, "Thread with id %lu opened %d directories\n",
            pthread_self(), dirOpened);
    return (void *)dirOpened;
}

请注意，semop()销毁了信号量集后，main()返回 -1 ，因此循环结束。另外请注意，我们释放了在pop()中分配的数据副本。

线程同步 - 何时终止线程？

2 个答案: