我正在尝试编写检查点多线程应用程序的代码。由于fork
函数不适用于此类应用程序,因此我正在研究具有forkall
功能的solaris来实现这一功能。
以下是包含函数checkpoint
和restart_from_checkpoint
的代码及其示例用法。我将这些功能仅在两个障碍之间称为安全。
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <signal.h>
#include <semaphore.h>
#include <stdint.h>
#include <pthread.h>
#define NOFTHREADS 4
pid_t checkpoint();
void restart_from_checkpoint( pid_t pid );
sem_t sem;
pthread_barrier_t barrier;
void sig_handler(int signum)
{
printf( ">> sem_post!\n" );
sem_post( &sem );
}
pid_t child_pid;
pid_t par_pid;
void *threadFunc( void *pParam )
{
unsigned int tid = *((unsigned int*)(pParam));
int i;
for( i = 0; i < 20; i++ )
{
if ( !(i % 2) )
{
pthread_barrier_wait(&barrier);
if ( tid == 0 && i == 6 )
{
child_pid = checkpoint();
}
if ( tid == 0 )
printf( "p%d: >> i = %d\n", getpid(), i );
if ( tid == 0 && i == 12 && ( getpid() == par_pid ) )
{
restart_from_checkpoint( child_pid );
}
pthread_barrier_wait(&barrier);
}
printf( "p%d: t%d: i%d\n", getpid(), tid, i );
}
}
int main( int argc, char *argv[] )
{
int i;
pthread_t hThread[NOFTHREADS];
int index[NOFTHREADS];
signal(SIGUSR1, sig_handler);
pthread_barrier_init (&barrier, NULL, NOFTHREADS);
par_pid = getpid();
for( i = 0; i < NOFTHREADS; i++ )
{
index[i] = i;
pthread_create( &hThread[i], NULL, threadFunc, &index[i] );
}
for( i = 0; i < NOFTHREADS; i++ )
pthread_join( hThread[i], NULL );
return 0;
}
pid_t checkpoint()
{
pid_t pid;
int wait_val;
sem_init( &sem, 0, 0 );
switch (pid=forkall())
{
case -1:
perror("fork");
break;
case 0: // child process starts
sem_wait( &sem );
printf( ">> passed sem_wait!\n" );
break; // child process ends
default: // parent process starts
return pid;
}
}
void restart_from_checkpoint( pid_t pid )
{
printf( ">> restart_from_checkpoint!\n" );
kill( pid, SIGUSR1 );
printf( ">> exiting!\n" );
exit( 0 );
printf( ">> should not had been printed!\n" );
}
以下是屏幕上打印的输出......
p1159: >> i = 0
p1159: t0: i0
p1159: t0: i1
p1159: t1: i0
p1159: t1: i1
p1159: t2: i0
p1159: t2: i1
p1159: t3: i0
p1159: t3: i1
p1159: >> i = 2
p1159: t2: i2
p1159: t2: i3
p1159: t0: i2
p1159: t0: i3
p1159: t3: i2
p1159: t3: i3
p1159: t1: i2
p1159: t1: i3
p1159: >> i = 4
p1159: t0: i4
p1159: t0: i5
p1159: t2: i4
p1159: t2: i5
p1159: t1: i4
p1159: t1: i5
p1159: t3: i4
p1159: t3: i5
p1159: >> i = 6
p1159: t2: i6
p1159: t2: i7
p1159: t0: i6
p1159: t0: i7
p1159: t3: i6
p1159: t3: i7
p1159: t1: i6
p1159: t1: i7
p1159: >> i = 8
p1159: t1: i8
p1159: t1: i9
p1159: t2: i8
p1159: t2: i9
p1159: t3: i8
p1159: t3: i9
p1159: t0: i8
p1159: t0: i9
p1159: >> i = 10
p1159: t1: i10
p1159: t1: i11
p1159: t2: i10
p1159: t2: i11
p1159: t3: i10
p1159: t3: i11
p1159: t0: i10
p1159: t0: i11
p1159: >> i = 12
>> restart_from_checkpoint!
>> exiting!
p1159: >> i = 0
p1159: t0: i0
p1159: t0: i1
p1159: t1: i0
p1159: t1: i1
p1159: t2: i0
p1159: t2: i1
p1159: t3: i0
p1159: t3: i1
p1159: >> i = 2
p1159: t2: i2
p1159: t2: i3
p1159: t0: i2
p1159: t0: i3
p1159: t3: i2
p1159: t3: i3
p1159: t1: i2
p1159: t1: i3
p1159: >> i = 4
p1159: t0: i4
p1159: t0: i5
p1159: t2: i4
p1159: t2: i5
p1159: t1: i4
p1159: t1: i5
p1159: t3: i4
p1159: t3: i5
>> sem_post!
>> passed sem_wait!
p1160: >> i = 6
p1160: t0: i6
p1160: t0: i7
p1160: t2: i6
p1160: t2: i7
p1160: t3: i6
p1160: t3: i7
p1160: t1: i6
p1160: t1: i7
p1160: >> i = 8
p1160: t3: i8
p1160: t3: i9
p1160: t2: i8
p1160: t2: i9
p1160: t1: i8
p1160: t1: i9
p1160: t0: i8
p1160: t0: i9
p1160: >> i = 10
p1160: t3: i10
p1160: t3: i11
p1160: t1: i10
p1160: t1: i11
p1160: t0: i10
p1160: t0: i11
p1160: t2: i10
p1160: t2: i11
p1160: >> i = 12
p1160: t3: i12
p1160: t3: i13
p1160: t0: i12
p1160: t0: i13
p1160: t1: i12
p1160: t1: i13
p1160: t2: i12
p1160: t2: i13
p1160: >> i = 14
p1160: t1: i14
p1160: t1: i15
p1160: t2: i14
p1160: t2: i15
p1160: t0: i14
p1160: t0: i15
p1160: t3: i14
p1160: t3: i15
p1160: >> i = 16
p1160: t0: i16
p1160: t0: i17
p1160: t3: i16
p1160: t3: i17
p1160: t1: i16
p1160: t1: i17
p1160: t2: i16
p1160: t2: i17
p1160: >> i = 18
p1160: t1: i18
p1160: t1: i19
p1160: t2: i18
p1160: t2: i19
p1160: t0: i18
p1160: t0: i19
p1160: t3: i18
p1160: t3: i19
请注意,父进程ID是1159而子进程的ID是1160.现在我的问题是,为什么退出父进程后重新执行uptill i == 6(调用检查点的点),请参阅{{1之间的输出}和>> exiting!
。它不应该立即退出吗?我在这里做错了什么?
答案 0 :(得分:0)
printf
函数不会立即(以同步方式)将文本输出到屏幕或文件。它确实存储要打印到libc缓冲区的文本,并且有时会刷新缓冲区(写入屏幕或文件)(在'\ n'char或者如果有大量数据的话)。
fork之后,所有缓冲区都从父级复制到子级。缓冲区中有一些文本,两个进程都将刷新缓冲区。
您应该考虑在fork()之前添加fflush()
或通过setvbuf
设置不同的缓冲区规则,例如禁用缓冲。