在我的main函数中,我创建了一个执行pingOMS函数的线程。以下是我的主要功能的代码摘录。
if (status = pthread_create(&agentPingThread,NULL,(void *) &pingOMS, NULL) != 0 ){
LogError("%s: Failed to create the OM agent ping thread -- %s\n", prog,
strerror(status));
}
此pingOMS函数通过在pingOMS函数中调用另一个函数ThreadWait1来强制当前线程等待,每隔10秒使用套接字与服务器联系。 ThreadWait1函数使用pthread_cond_timedwait来实现这一点。以下是pingOMS的代码:
int pingOMS(){
DEBUG("Inside %s %s() \n",__FILE__,__func__);
if(This.stopped || failedPings > 1){
DEBUG(" Ping manager stopping ...\n");
return TRUE;
}
int socketPING,returnHB;
returnHB = FALSE;
struct sockaddr_in serverADDRESS;
struct hostent *hostINFO;
char remoteFILE[4096],recvBUFF[4096];
if ((hostINFO = gethostbyname(This.servername)) == NULL){
failedPings++;
LogError("Ping manager unable to reach OM Server.\n");
if(failedPings < 2) goto SKIP_POINT1;
if(failedPings > 1){
This.stopped = TRUE;
return FALSE;
}
}
if ( (socketPING = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
failedPings++;
LogError("Ping manager unable to create socket.\n");
if(failedPings < 2) goto SKIP_POINT1;
if(failedPings > 1){
This.stopped = TRUE;
return FALSE;
}
}
serverADDRESS.sin_family = hostINFO->h_addrtype;
memcpy((char *) &serverADDRESS.sin_addr.s_addr, hostINFO->h_addr_list[0], hostINFO-
>h_length);
serverADDRESS.sin_port = htons((int) This.serverport);
if (connect(socketPING, (struct sockaddr *) &serverADDRESS, sizeof(serverADDRESS)) < 0) {
failedPings++;
LogError("Ping manager unable to connect OM Server on port %d\n",
(int)This.serverport);
if(failedPings < 2) goto SKIP_POINT2;
if(failedPings > 1){
This.stopped = TRUE;
close(socketPING);
return FALSE;
}
}else{
DEBUG(" Ping manager successfully connected to OM Server on port
%d\n",This.serverport);
}
sprintf(remoteFILE,"STARTPING|%d|%s|ENDPING",This.agentID,getOracleDate());
if (send(socketPING, remoteFILE, sizeof(remoteFILE), 0) >= 0){
DEBUG(" Server ping initiated.\n");
}else{
LogError("Server ping failed. OM agent has lost connection to OM server.\n");
failedPings++;
if(failedPings < 2) goto SKIP_POINT2;
if(failedPings > 1){
This.stopped = TRUE;
close(socketPING);
return FALSE;
}
}
// Start - Following code interpret server ping status sent by OM server
int fr_block_sz = 0;
recvBUFF[0] = 0;
while((fr_block_sz = recv(socketPING, recvBUFF, sizeof(recvBUFF), 0)) > 0){
returnHB = TRUE;
DEBUG(" Receiving ping data from OM server.\n");
}
DEBUG(" Received buffer %s\n",recvBUFF);
if(returnHB == TRUE){
if(!strncmp(recvBUFF,"SUCCESS",7)) {
DEBUG(" Server ping succeeded.\n");
}
else{
LogError("OM agent has lost connection with OM server and will shutdown.\n");
failedPings++;
if(failedPings < 2) goto SKIP_POINT2;
if(failedPings > 1){
This.stopped = TRUE;
close(socketPING);
return FALSE;
}
}
}else{
failedPings++;
LogError("OM agent has not received heartbeat from server.\n");
if(failedPings < 2) goto SKIP_POINT2;
if(failedPings > 1){
This.stopped = TRUE;
close(socketPING);
return FALSE;
}
}
//End
SKIP_POINT2:
close(socketPING);
DEBUG(" Agent ping thread going into sleep mode ... \n");
SKIP_POINT1:
ThreadWait1(10000);
pingOMS();
}
这是ThreadWait1的代码:
void ThreadWait1(int timeInMSec)
{
DEBUG("Inside %s %s() \n",__FILE__,__func__);
int rt;
pthread_mutexattr_t mtx_attr;
pthread_mutex_t mtx;
pthread_condattr_t cond_attr;
pthread_cond_t cond;
int milliseconds;
pthread_mutexattr_init ( &mtx_attr );
//pthread_mutexattr_settype ( &mtx_attr, PTHREAD_MUTEX_NORMAL );
pthread_mutexattr_setpshared ( &mtx_attr, PTHREAD_PROCESS_PRIVATE );
pthread_mutex_init ( &mtx, &mtx_attr );
pthread_mutexattr_destroy ( &mtx_attr );
#ifdef USE_CONDATTR
pthread_condattr_init ( &cond_attr );
if ( pthread_condattr_setclock ( &cond_attr, CLOCK_REALTIME ) != 0 )
{
fputs ( "pthread_condattr_setclock failed", stderr );
exit ( EXIT_FAILURE );
}
pthread_cond_init ( &cond, &cond_attr );
pthread_condattr_destroy ( &cond_attr );
#else
pthread_cond_init ( &cond, NULL );
#endif
struct timespec now, ts;
clock_gettime ( CLOCK_REALTIME, &now );
ts.tv_sec = now.tv_sec + timeInMSec / 1000;
ts.tv_nsec = now.tv_nsec + (timeInMSec % 1000) * 1000000;
if (ts.tv_nsec > 1000000000)
{
ts.tv_nsec -= 1000000000;
++ts.tv_sec;
}
DEBUG ( " %ld.%09ld %ld.%09ld \n", now.tv_sec, now.tv_nsec,
ts.tv_sec, ts.tv_nsec );
pthread_mutex_lock ( &mtx );
rt = pthread_cond_timedwait ( &cond, &mtx, &ts );
ASSERT(rt);
pthread_mutex_unlock ( &mtx );
}
问题:
线程仅等待4~5秒而不是10秒,但非常有趣的是,当独立的c程序中的ThreadWait1函数具有相同的逻辑时,它运行良好。它可能与CPU上的线程调度或其他内容有关。
答案 0 :(得分:0)
这不是问题的答案,但在我看来是有疑问的。你正在使用什么平台?
其次,为什么你创建了一个递归调用,我认为ThreadWait1应该在每X次(在你的情况下为10秒)后调用pingOMS函数。
请尝试以下代码:
#include <unistd.h>
if (status = pthread_create(&agentPingThread,NULL,(void *) &ThreadWait1, NULL) != 0 ){
LogError("%s: Failed to create the OM agent ping thread -- %s\n", prog,
strerror(status));
}
void ThreadWait1()
{
int timeInMSec = 10;
while(1)
{
pingOMS();
// Add your locks here if required
sleep(timeInMSec);
// release the locks if locked
}
}
删除pingOMS();从pingOMS()函数调用,以便它不调用自身(无递归)