为什么pthread_cond_timedwait在指定的时间限制后不会触发?

时间:2017-10-21 00:16:25

标签: linux pthreads

这应该在一个循环(服务器)中工作,并将工作/查询委托给故障库(这里由longrun()函数调用表示)到超时tmax = 3s的线程。我放置了同步变量,我试图等待不超过这个限制,但是当longrun()挂起(运行4)时,它仍然等待全时(7s)而不是请求的限制。谁能解释一下?

#include <unistd.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <pthread.h>
#include <sys/time.h>

#include <iostream>
using namespace std;


string int2str(int i){
    char buf[10]; // no larger int passed we hope
    int end = sprintf(buf, "%d", i);
    buf[end] = '\0';
    return string(buf);
}
string longrun(int qi){
    if(qi % 4 == 0) {
        sleep(7);
        return string("'---- to: ") + int2str(qi) + string("' (hang case)");
    }
    else {
        sleep(1);
        return string("'okay to: ") + int2str(qi) + string("'");
    }
}


struct tpack_t {            // thread pack
    pthread_t thread;
    pthread_mutex_t mutex;
    pthread_cond_t go;      // have a new value to run
    pthread_cond_t ready;   // tell main thread we're done processing
    int newq;               // predicate on go+ready condition for wait
    int qi;                 // place question as int to thread: question-int
    string res;             // where i place the response
    tpack_t();
};
tpack_t::tpack_t() {
    pthread_mutex_init (&mutex, NULL);
    pthread_cond_init (&go, NULL);
    pthread_cond_init (&ready, NULL);
    newq = 0;
}
void set_cond_time(timespec* ctp, int tmax){
    timeval now;
    gettimeofday(&now, NULL);
    ctp->tv_nsec = now.tv_usec * 1000UL;
    ctp->tv_sec = now.tv_sec + tmax; // now + max time!
    printf("[m] ... set to sleep for %d sec, i hope...\n", tmax);
}

void take_faulty_evasive_action(tpack_t* tpx){
    // basically kill thread, clean faulty library copy (that file) and restart it
    cout << "will work on it (restarting thread) soon!\n";
    tpx->newq = 0; // minimal action for now...
}

void* faulty_proc(void* arg){
    tpack_t* tpx = (tpack_t*) arg;
    while(true){
        pthread_mutex_lock(&tpx->mutex);
        while(tpx->newq == 0){
            pthread_cond_wait(&tpx->go, &tpx->mutex);
        }
        printf("[t] to process : %d\n", tpx->qi); fflush(stdout);
        // now i have a new value in qi, process it and place the answer in... res
        tpx->res = longrun(tpx->qi);
        tpx->newq = 0;
        pthread_mutex_unlock(&tpx->mutex);
        pthread_cond_signal(&tpx->ready);
    }
}


int main(int argc, char* argv[]){

    cout << "\n    this presents the problem: idx = 4k -> hang case ...\n    ( challenge is to eliminate them by killing thread and restarting it )\n\n";
    printf("    ETIMEDOUT = %d   EINVAL = %d  EPERM = %d\n\n", ETIMEDOUT, EINVAL, EPERM);

    tpack_t* tpx = new tpack_t();
    pthread_create(&tpx->thread, NULL, &faulty_proc, (void*) tpx);

    // max wait time; more than that is a hanging indication!
    int numproc = 5;
    ++numproc;
    int tmax = 3;
    timespec cond_time;
    cond_time.tv_nsec = 0;
    int status, expired; // for timed wait on done condition!


    time_t t0 = time(NULL);
    for(int i=1; i<numproc; ++i){
        expired = 0;

        pthread_mutex_lock(&tpx->mutex);
        tpx->qi = i; // init the question
        tpx->newq = 1; // ... predicate
        //pthread_mutex_unlock(&tpx->mutex);
        pthread_cond_signal(&tpx->go); // let it know that...

        while(tpx->newq == 1){

            ///  ---------------------- most amazing region, timedwait waits all the way! ----------------------
            set_cond_time(&cond_time, tmax); // time must be FROM NOW! (abs time, not interval)
            time_t wt0 = time(NULL);
            status = pthread_cond_timedwait(&tpx->ready, &tpx->mutex, &cond_time);
            printf("[m] ---- \t exited with status = %d  (after %.2fs)\n", status, difftime(time(NULL), wt0));
            ///  -----------------------------------------------------------------------------------------------

            if (status == ETIMEDOUT){
                printf("\t ['t was and newq == %d]\n", tpx->newq);
                if(tpx->newq == 1){ // check one more time, to elim race possibility
                    expired = 1;
                    break;
                }
            }
            else if(status != 0){
                fprintf(stderr, "cond timewait for faulty to reply errored out\n");
                return 1;
            }
        }

        if(expired){
            take_faulty_evasive_action(tpx); // kill thread, start new one, report failure below
            cout << "[m]  :: interruption: default bad answer goes here for " << i  << "\n\n";
        }
        else {
            cout << "[m]  ::  end with ans: " << tpx->res << endl << endl;
        }
        pthread_mutex_unlock(&tpx->mutex);
    }
    time_t t1 = time(NULL);
    printf("took %.2f sec to run\n", difftime(t1, t0));
}

使用&#39; g ++ -pthread code.cc&#39;在linux下编译(ubuntu 16.04)。输出是:

    this presents the problem: idx = 4k -> hang case ...
    ( challenge is to eliminate them by killing thread and restarting it )

    ETIMEDOUT = 110   EINVAL = 22  EPERM = 1

[m] ... set to sleep for 3 sec, i hope...
[t] to process : 1
[m] ----     exited with status = 0  (after 1.00s)
[m]  ::  end with ans: 'okay to: 1'

[m] ... set to sleep for 3 sec, i hope...
[t] to process : 2
[m] ----     exited with status = 0  (after 1.00s)
[m]  ::  end with ans: 'okay to: 2'

[m] ... set to sleep for 3 sec, i hope...
[t] to process : 3
[m] ----     exited with status = 0  (after 1.00s)
[m]  ::  end with ans: 'okay to: 3'

[m] ... set to sleep for 3 sec, i hope...
[t] to process : 4
[m] ----     exited with status = 110  (after 7.00s)
     ['t was and newq == 0]
[m]  ::  end with ans: '---- to: 4' (hang case)

[m] ... set to sleep for 3 sec, i hope...
[t] to process : 5
[m] ----     exited with status = 0  (after 1.00s)
[m]  ::  end with ans: 'okay to: 5'

took 11.00 sec to run

1 个答案:

答案 0 :(得分:0)

问题是faulty_proc()在调用tpx->mutex时保持longrun()被锁定,pthread_cond_timedwait()中的main()调用无法返回,直到它重新开始 - 即使超时到期,也要获取互斥锁。

如果longrun()不需要锁定互斥锁 - 而且似乎就是这种情况 - 您可以在设置完成标志并发出条件变量信号之前解锁该调用周围的互斥锁并重新锁定它