我需要同时运行n个进程,并且这些进程仅在指定时间内处于活动状态,并且每当进程终止时我想创建一个新进程。我想出了一些有效的方法,但我想知道这是否是一种正确的方法。
#define n 5
void newProcess(){
if(fork()==0){
//work
exit(0);
}
}
int main(){
int processCount = 0;
while(1){
if(processCount<n){
newProcess();
processCount++;
}
else{
wait(NULL);
processCount--;
}
}
return 0;
}
答案 0 :(得分:0)
这是一些工作代码,代表或多或少的代码工作版本,其中包括日志记录。日志记录使得更容易看到它正常运行。
#include "stderr.h"
#include <assert.h>
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
#define N 5
static int newProcess(void)
{
int pid = fork();
if (pid == 0)
{
// work - this process goes to sleep on the job!
struct timespec nap = { .tv_sec = rand() % 3, .tv_nsec = rand() % 1000000000 };
nanosleep(&nap, 0);
err_remark("About to do %ld.%9ld seconds work\n", (long)nap.tv_sec, nap.tv_nsec);
int rc = 0;
if (rand() % 100 > 90)
rc = rand() % 255;
err_remark("Work completed - exit status %d\n", rc);
exit(rc);
}
if (pid > 0 && rand() % 100 > 90)
{
kill(pid, rand() % 8 + 1);
errno = EAGAIN;
pid = -1;
}
return pid;
}
static inline int check_child(int pid)
{
#undef SIGNONE
enum { SIGNONE = 0 };
int rc = kill(pid, SIGNONE);
err_remark("PID %d - %s\n", pid, (rc == 0) ? "OK" : "Dead");
return rc;
}
static void process_check(int *npids, int pids[])
{
err_remark("Checking PID list\n");
for (int i = 0; i < *npids; i++)
{
while (check_child(pids[i]) != 0)
{
// Child is presumably dead!
if (*npids > 0)
pids[i] = pids[--*npids];
}
}
}
int main(int argc, char **argv)
{
err_setarg0(argv[0]);
if (argc != 1)
err_usage(" # No arguments allowed");
int child_pid[N] = { 0 };
srand(time(0));
err_setlogopts(ERR_PID | ERR_MICRO);
int processCount = 0;
while (1)
{
if (processCount < N)
{
int pid = newProcess();
if (pid > 0)
{
child_pid[processCount++] = pid;;
err_remark("PID %d started\n", pid);
}
else
{
assert(pid == -1);
int errnum = errno;
err_sysrem("Failed to fork");
process_check(&processCount, child_pid);
if (errnum == EAGAIN)
{
struct timespec nap = { .tv_sec = 0, .tv_nsec = (rand() % 10 + 1) * 1000000 };
nanosleep(&nap, 0); // sleep 1-10 milliseconds (could be too big).
}
}
}
else
{
int corpse;
int status;
if ((corpse = wait(&status)) > 0)
{
int known_pid = 0;
for (int i = 0; i < processCount; i++)
{
if (child_pid[i] == corpse)
{
err_remark("PID %d exit status 0x%.4X\n", corpse, status);
known_pid = 1;
child_pid[i] = child_pid[--processCount];
break;
}
}
if (!known_pid)
err_remark("Unknown PID %d exit status 0x%.4X - ignored\n", corpse, status);
}
}
}
return 0;
}
标题stderr.h
及其随附源代码stderr.c
可以在libsoq
folder的SOQ存储库中的GitHub中找到。它提供了方便且可配置的日志记录服务。
请注意,测试代码会伪造一些失败,并杀死一些孩子等。您将删除该生产代码。您可能会保留大部分日志记录,尤其是如果孩子们通常一次工作很多秒而不是像本例中那样只有几个。
一些示例输出:
$ ./mon61
mon61: 2017-12-01 09:48:03.636756 - pid=74353: PID 74354 started
mon61: 2017-12-01 09:48:03.637568 - pid=74353: PID 74355 started
mon61: 2017-12-01 09:48:03.637724 - pid=74353: PID 74356 started
mon61: 2017-12-01 09:48:03.637885 - pid=74353: PID 74357 started
mon61: 2017-12-01 09:48:03.638048 - pid=74353: PID 74358 started
mon61: 2017-12-01 09:48:03.747398 - pid=74356: About to do 0.108225168 seconds work
mon61: 2017-12-01 09:48:03.748152 - pid=74356: Work completed - exit status 0
mon61: 2017-12-01 09:48:03.748791 - pid=74353: PID 74356 exit status 0x0000
mon61: 2017-12-01 09:48:03.749046 - pid=74353: PID 74359 started
mon61: 2017-12-01 09:48:04.032219 - pid=74359: About to do 0.281932019 seconds work
mon61: 2017-12-01 09:48:04.032971 - pid=74359: Work completed - exit status 0
mon61: 2017-12-01 09:48:04.033747 - pid=74353: PID 74359 exit status 0x0000
mon61: 2017-12-01 09:48:04.034007 - pid=74353: PID 74361 started
mon61: 2017-12-01 09:48:04.602396 - pid=74355: About to do 0.964067315 seconds work
mon61: 2017-12-01 09:48:04.602951 - pid=74355: Work completed - exit status 0
mon61: 2017-12-01 09:48:04.603596 - pid=74353: PID 74355 exit status 0x0000
mon61: 2017-12-01 09:48:04.603855 - pid=74353: PID 74362 started
mon61: 2017-12-01 09:48:05.419466 - pid=74358: About to do 1.780199743 seconds work
mon61: 2017-12-01 09:48:05.420017 - pid=74358: Work completed - exit status 0
mon61: 2017-12-01 09:48:05.420669 - pid=74353: PID 74358 exit status 0x0000
mon61: 2017-12-01 09:48:05.420923 - pid=74353: PID 74363 started
mon61: 2017-12-01 09:48:05.453929 - pid=74357: About to do 1.814728145 seconds work
mon61: 2017-12-01 09:48:05.454320 - pid=74357: Work completed - exit status 0
mon61: 2017-12-01 09:48:05.454753 - pid=74353: PID 74357 exit status 0x0000
mon61: 2017-12-01 09:48:05.454939 - pid=74353: PID 74364 started
mon61: 2017-12-01 09:48:05.512822 - pid=74354: About to do 1.875699204 seconds work
mon61: 2017-12-01 09:48:05.514094 - pid=74354: Work completed - exit status 0
mon61: 2017-12-01 09:48:05.514349 - pid=74353: PID 74354 exit status 0x0000
mon61: 2017-12-01 09:48:05.514658 - pid=74353: PID 74365 started
mon61: 2017-12-01 09:48:06.004823 - pid=74362: About to do 1.399425773 seconds work
mon61: 2017-12-01 09:48:06.005581 - pid=74362: Work completed - exit status 0
mon61: 2017-12-01 09:48:06.006237 - pid=74353: PID 74362 exit status 0x0000
mon61: 2017-12-01 09:48:06.006523 - pid=74353: Failed to forkerror (35) Resource temporarily unavailable
mon61: 2017-12-01 09:48:06.006562 - pid=74353: Checking PID list
mon61: 2017-12-01 09:48:06.006570 - pid=74353: PID 74364 - OK
mon61: 2017-12-01 09:48:06.006576 - pid=74353: PID 74361 - OK
mon61: 2017-12-01 09:48:06.006582 - pid=74353: PID 74365 - OK
mon61: 2017-12-01 09:48:06.006588 - pid=74353: PID 74363 - OK
mon61: 2017-12-01 09:48:06.013228 - pid=74353: PID 74368 started
mon61: 2017-12-01 09:48:06.013267 - pid=74353: Unknown PID 74366 exit status 0x0006 - ignored
mon61: 2017-12-01 09:48:06.117089 - pid=74361: About to do 2. 82518051 seconds work
mon61: 2017-12-01 09:48:06.117618 - pid=74361: Work completed - exit status 0
mon61: 2017-12-01 09:48:06.118206 - pid=74353: PID 74361 exit status 0x0000
mon61: 2017-12-01 09:48:06.118486 - pid=74353: PID 74369 started
mon61: 2017-12-01 09:48:06.537455 - pid=74363: About to do 1.115086289 seconds work
mon61: 2017-12-01 09:48:06.537967 - pid=74363: Work completed - exit status 0
mon61: 2017-12-01 09:48:06.538610 - pid=74353: PID 74363 exit status 0x0000
mon61: 2017-12-01 09:48:06.538880 - pid=74353: PID 74371 started
mon61: 2017-12-01 09:48:06.682182 - pid=74371: About to do 0.141922802 seconds work
mon61: 2017-12-01 09:48:06.682945 - pid=74371: Work completed - exit status 0
mon61: 2017-12-01 09:48:06.683733 - pid=74353: PID 74371 exit status 0x0000
mon61: 2017-12-01 09:48:06.684007 - pid=74353: PID 74372 started
mon61: 2017-12-01 09:48:06.975561 - pid=74364: About to do 1.519976923 seconds work
mon61: 2017-12-01 09:48:06.976341 - pid=74364: Work completed - exit status 188
mon61: 2017-12-01 09:48:06.976942 - pid=74353: PID 74364 exit status 0xBC00
mon61: 2017-12-01 09:48:06.977225 - pid=74353: PID 74373 started
mon61: 2017-12-01 09:48:07.436814 - pid=74368: About to do 1.422967208 seconds work
mon61: 2017-12-01 09:48:07.437600 - pid=74368: Work completed - exit status 0
mon61: 2017-12-01 09:48:07.438230 - pid=74353: PID 74368 exit status 0x0000
日志的详细检查表明存在一些“未知的PID”死亡消息。这表明在管理PID数组(即“修复错误”)方面还有一些工作要做。 我可能会稍后再研究一下。
查看代码,这些是“预期的”。大约有9%的可能性是有一个孩子被创建但被信号杀死(并且所有这些孩子的状态都设置为值0x0001到0x0008,表示信号死亡)。对于这些进程,newProcess()
的返回状态为-1
,这会阻止PID进入已知子进程列表,因此当子进程死亡并收集状态信息时,PID为'未知'。换句话说,这是'预期'的行为。通过否定返回到调用进程的PID,并写出一条消息,表明这个特定的子PID已经创建但是死于信号(可能在孩子有机会做任何事情之前,例如报告它是运行)。
与err_remark()
相关的"About to do N.xxxxxxxxxx seconds work"
来电错误且格式错误。它应该在纳米睡眠之前,而不是之后。它还应使用%.9d
代替%9d
来格式化小数时间。两者都很容易修复。
除了让孩子们做真正的工作而不是只是在工作中睡觉之外,还有各种各样的改进。代码可以处理一些信号(中断检查子节点,挂起重新读取配置文件,终止以杀死子节点并退出,例如)。它可以写入日志文件而不是标准错误。它可以被守护而不是在前台运行。它可以有控制日志文件目录的选项,也许还有日志文件名。它可以检测是否/何时删除其日志文件并开始新的日志文件。等
但这会让你有所作为。