Question

我正在编写一个简单的程序，它从文本文件中读取URL，并使用多线程编程检查它们的有效性。我使用互斥锁和条件变量来同步我的线程，但我的应用程序仍然崩溃，经过一些调试会话后我决定得到一些帮助：）

文件的输入是一个包含以下网址的文本文件： http://www.youtube.com/ http://www.facebook.com/

输出应该是每个url上curl请求的聚合结果（无论是返回OK，UNKOWN还是ERROR）

这是我的代码：

/*
 * ex3.c
 */

#define _GNU_SOURCE

#include <curl/curl.h>
#include <errno.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>


#define REQUEST_TIMEOUT_SECONDS 2L

#define URL_OK 0
#define URL_ERROR 1
#define URL_UNKNOWN 2

#define QUEUE_SIZE 32

#define handle_error_en(en, msg) \
        do { errno = en; perror(msg); exit(EXIT_FAILURE); } while (0)

typedef struct {
    int ok, error, unknown;
} UrlStatus;


typedef struct {
    void **array;
    int size;
    int capacity;
    int head;
    int tail;
    pthread_mutex_t mutex;
    pthread_cond_t cv_empty; /* get notified when the queue is not full */
    pthread_cond_t cv_full; /* get notified when the queue is not empty */
} Queue;

void queue_init(Queue *queue, int capacity) {
    /*
     * Initializes the queue with the specified capacity.
     * This function should allocate the internal array, initialize its properties
     * and also initialize its mutex and condition variables.
     */
    queue->array = (void**)malloc(sizeof(void*) * capacity);
    if (queue->array == NULL) {
        perror("unable to allocate memory");
        exit(EXIT_FAILURE);
    }
    queue->capacity = capacity;
    queue->size = 0;
    queue->head = 0;
    queue->tail = 0;
    pthread_mutex_init(&(queue->mutex), NULL);
    pthread_cond_init(&(queue->cv_empty), NULL);
    pthread_cond_init(&(queue->cv_full), NULL);
}

void enqueue(Queue *queue, void *data) {
    /*
     * Enqueue an object to the queue.
     *
     * TODO:
     * 1. This function should be synchronized on the queue's mutex
     * 2. If the queue is full, it should wait until it is not full
     *      (i.e. cv_empty)
     * 3. Add an element to the tail of the queue, and update the tail & size
     *      parameters
     * 4. Signal that the queue is not empty (i.e. cv_full)
     */

    pthread_mutex_lock(&(queue->mutex));
    while (queue->size >= QUEUE_SIZE) {
        pthread_cond_wait(&(queue->cv_empty), &(queue->mutex));
    }

    if(queue->size == 0) {
        queue->head = 0;
    }

    queue->array[queue->tail] = data;
    queue->size++;

    queue->tail++;
    pthread_cond_signal(&(queue->cv_full));
    pthread_mutex_unlock(&(queue->mutex));

}

void *dequeue(Queue *queue) {
    /*
     * Dequeue an object from the queue.
     *
     * TODO:
     * 1. This function should be synchronized on the queue's mutex
     * 2. If the queue is empty, it should wait until it is not empty (i.e. cv_full)
     * 3. Read the head element, and update the head & size parameters
     * 4. Signal that the queue is not full (i.e. cv_empty)
     * 5. Return the dequeued item
     */
    void *data;

    pthread_mutex_lock(&(queue->mutex));
    while (queue->size <= 0) {
        pthread_cond_wait(&(queue->cv_full), &(queue->mutex));
    }

    queue->head++;
    data = queue->array[queue->head];
    queue->size--;


    pthread_cond_signal(&(queue->cv_empty));
    pthread_mutex_unlock(&(queue->mutex));


    return data;
}

void queue_destroy(Queue *queue) {
    /*
     * Free the queue memory and destroy the mutex and the condition variables.
     */
    int ret;

    free(queue->array);

    ret = pthread_mutex_destroy(&(queue->mutex));
    if (ret != 0) {
        handle_error_en(ret, "unable to destroy mutex");
    }
    ret = pthread_cond_destroy(&(queue->cv_empty));
    if (ret != 0) {
        handle_error_en(ret, "unable to destroy cv_empty condition variable");
    }
    ret = pthread_cond_destroy(&(queue->cv_full));
    if (ret != 0) {
        handle_error_en(ret, "unable to destroy cv_full condition variable");
    }
}

void usage() {
    fprintf(stderr, "usage:\n\t./ex3 FILENAME NUMBER_OF_THREADS\n");
    exit(EXIT_FAILURE);
}

int count = 0;
int check_url(const char *url) {
    CURL *curl;
    CURLcode res;
    long response_code = 0L;
    int http_status = URL_UNKNOWN;

    curl = curl_easy_init();

    if(curl) {
        curl_easy_setopt(curl, CURLOPT_URL, url);
        curl_easy_setopt(curl, CURLOPT_TIMEOUT, REQUEST_TIMEOUT_SECONDS);
        curl_easy_setopt(curl, CURLOPT_NOBODY, 1L); /* do a HEAD request */

        res = curl_easy_perform(curl);
        if(res == CURLE_OK) {
            res = curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &response_code);

            if (res == CURLE_OK &&
                    response_code >= 200 &&
                    response_code < 400) {
                http_status = URL_OK;
            } else {
                http_status = URL_ERROR;
            }
        }
        curl_easy_cleanup(curl);
    }


    return http_status;
}

typedef struct {
    Queue *url_queue;
    Queue *result_queue;
} WorkerArguments;

void *worker(void *args) {
    /*
     * TODO:
     * 1. Initialize a UrlStatus (allocate memory using malloc, and zero it
     *      using memset)
     * 2. Dequeue URL from url_queue, run check_url on it, and update results
     *    (don't forget to free() the url)
     * 3. After dequeuing a NULL value:
     *      Enqueue results to result_queue and return
     */

    WorkerArguments *worker_args = (WorkerArguments *)args;
    UrlStatus *results = NULL;
    char *url;

    results = malloc(sizeof(UrlStatus));
    if(results == NULL) {
        perror("Could not allocate memory");
        exit(-1);
    }

    memset(results, 0, sizeof(UrlStatus));

    while((url = (char *)dequeue(worker_args->url_queue)) != NULL) {    
        switch(check_url(url)) {
            case URL_OK:
                results->ok++;
            break;
            case URL_UNKNOWN:
                results->unknown++;
            break;
            case URL_ERROR:
                results->error++;
            break;
        }
        /*free(url);*/
    }
    enqueue(worker_args->result_queue, results);
    return NULL;
}

typedef struct {
    const char *filename;
    Queue *url_queue;
} FileReaderArguments;

void *file_reader(void *args) {
    /*
     * TODO:
     * 1. Open filename (use fopen, check for errors)
     * 2. Use getline() to read lines (i.e. URLs) from the file (use errno to check for errors)
     * 3. Copy each url to the heap (use malloc and strncpy)
     * 4. Enqueue the URL to url_queue
     * 5. Don't forget to free the line variable, and close the file (and check for errors!)
     */

    FileReaderArguments *file_reader_args = (FileReaderArguments *)args;
    FILE *toplist_file;
    char *line = NULL;
    char *url = NULL;
    size_t len = 0;
    ssize_t read = 0;

    toplist_file = fopen(file_reader_args->filename, "r");


        if (toplist_file == NULL) {
            exit(EXIT_FAILURE);
        }


    while ((read = getline(&line, &len, toplist_file)) != -1) {
        if (read == -1) {
            perror("error reading file");
        }
        if(read == 1) continue; /*empty line*/
        url = malloc(read);
        if(url == NULL) {
            perror("Could not allocate memory");
        }
        line[read-1] = '\0'; /* null-terminate the URL */
        strncpy(url, line, read);
        enqueue(file_reader_args->url_queue, url);
    }
    fclose(toplist_file);
    return NULL;
}

typedef struct {
    int number_of_threads;
    Queue *url_queue;
    Queue *result_queue;
} CollectorArguments;

void *collector(void *args) {
    /*
     * TODO:
     * 1. Enqueue number_of_threads NULLs to the url_queue
     * 2. Dequeue and aggregate number_of_threads thread_results
     *      from result_queue into results (don't forget to free() thread_results)
     * 3. Print aggregated results to the screen
     */

    CollectorArguments *collector_args = (CollectorArguments *)args;
    UrlStatus results = {0};
    UrlStatus *thread_results;
    int i;

    for(i= 0; i < collector_args->number_of_threads; i++) {
    }   

    for(i= 0; i < collector_args->number_of_threads; i++) {
        thread_results = dequeue(collector_args->result_queue);
        results.ok += thread_results->ok;
        results.error += thread_results->error;
        results.unknown += thread_results->unknown;
        /* free(thread_results);*/
    }

    printf("%d OK, %d Error, %d Unknown\n",
            results.ok,
            results.error,
            results.unknown);
    return NULL;
}

void parallel_checker(const char *filename, int number_of_threads) {
    /*
     * TODO:
     * 1. Initialize a Queue for URLs, a Queue for results (use QUEUE_SIZE)
     * 2. Start number_of_threads threads running worker()
     * 3. Start a thread running file_reader(), and join it
     * 4. Start a thread running collector(), and join it
     * 5. Join all worker threads
     * 6. Destroy both queues
     */
    Queue url_queue, result_queue;
    WorkerArguments worker_arguments = {0};
    FileReaderArguments file_reader_arguments = {0};
    CollectorArguments collector_arguments = {0};
    pthread_t *worker_threads;
    pthread_t file_reader_thread, collector_thread;
    int i;
    int err;


    queue_init(&url_queue, QUEUE_SIZE);
    queue_init(&result_queue, QUEUE_SIZE);

    worker_arguments.url_queue = &url_queue;
    worker_arguments.result_queue = &result_queue;

    file_reader_arguments.filename = filename;
    file_reader_arguments.url_queue = &url_queue;


    collector_arguments.url_queue = &url_queue;
    collector_arguments.result_queue = &result_queue;
    collector_arguments.number_of_threads = number_of_threads;

    worker_threads = (pthread_t *) malloc(sizeof(pthread_t) * number_of_threads);
    if (worker_threads == NULL) {
        perror("unable to allocate memory");
        return;
    }
    curl_global_init(CURL_GLOBAL_ALL); /* init libcurl before starting threads */


    for(i=0; i<number_of_threads; i++) {
        err = pthread_create(&(worker_threads[i]), NULL, &worker, &worker_arguments);
        if (err != 0) {
            fprintf(stderr, "can't create thread :[%s]\n", strerror(err));
        }
    }

    err = pthread_create(&file_reader_thread, NULL, &file_reader, &file_reader_arguments);
    if (err != 0) {
        fprintf(stderr, "can't create thread :[%s]\n", strerror(err));
    }

    err = pthread_join(file_reader_thread, NULL);
    if (err != 0) {
        fprintf(stderr, "can't join thread :[%s]\n", strerror(err));
    }


    err = pthread_create(&collector_thread, NULL, &collector, &collector_arguments);    
    if (err != 0) {
        fprintf(stderr, "can't create thread :[%s]\n", strerror(err));
    }


    err = pthread_join(collector_thread, NULL);
    if (err != 0) {
        fprintf(stderr, "can't join thread :[%s]\n", strerror(err));
    }

    for(i=0; i<number_of_threads; i++) {
        err = pthread_join(worker_threads[i], NULL);

        if (err != 0) {
            fprintf(stderr, "can't join thread :[%s]\n", strerror(err));
        }
    }




    queue_destroy(&url_queue);
    queue_destroy(&result_queue);
    free(worker_threads);
}

int main(int argc, char **argv) {
    if (argc != 3) {
        usage();
    } else {
        parallel_checker(argv[1], atoi(argv[2]));
    }

    return EXIT_SUCCESS;
}

我认为我错过了同步机制，有人能找到我错的地方吗？

非常感谢!!!

Answer 1

一个多线程逻辑缺陷肯定是：

“工人输入队列暂时空”和“文件读取器线程已完成”之间没有区别 - 当没有任何内容可读时，您的工作人员只会退出（甚至暂时）在他们的输入队列上。因此，如果file_reader线程产生的队列条目比工作人员因任何原因消耗它们的速度慢，那么消费者就会饿死并死掉，让生产者没有消费者，最后是一个挂起。

Linux C中的多线程队列

1 个答案: