c fork()和kill()同时不工作?

时间:2018-01-20 15:57:53

标签: c signals fork kill child-process

主程序:启动一定数量的子进程,然后立即发送SIGINT

int     main()
{
    pid_t   childs[CHILDS];
    char    *execv_argv[3];
    int     n = CHILDS;

    execv_argv[0] = "./debugging_procs/wait_time_at_interrupt";
    execv_argv[1] = "2";
    execv_argv[2] = NULL;

    for (int i = 0; i < n; i++)
    {
        childs[i] = fork();
        if (childs[i] == 0)
        {
            execv(execv_argv[0], execv_argv);
            if (errno != 0)
                perror(strerror(errno));        
            _exit(1);
        }
    }

    if (errno != 0)
        perror(strerror(errno));

    // sleep(1);

    for (int i = 0; i < n; i++)
        kill(childs[i], SIGINT);

    if (errno != 0)
        perror(strerror(errno));

    // Wait for all children.
    while (wait(NULL) > 0);

    return 0;
}

分叉程序:等待任何信号,如果发送了SIGINT,打开某个文件并写入SIGINT和当前的pid并等待指定的秒数(在这种情况下,我从主程序发送2)。

#include <signal.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>

void    sigint_handler(int signum)
{
    int     fd = open("./aux/log1", O_WRONLY | O_APPEND);
    char    buf[124];

    (void)signum;
    sprintf(buf, "SIGINT %d\n", getpid());
    write(fd, buf, strlen(buf));
    close(fd);
}

int     main(int argc, char **argv)
{
    int wait_time;

    wait_time = (argv[1]) ? atoi(argv[1]) : 5;
    signal(SIGINT, &sigint_handler);

    // Wait for any signal.
    pause();
    sleep(wait_time);
    return 0;
}

问题是,孩子们应该写的日志文件没有n行,这意味着并非所有孩子都写信给它。有时候没有人会写任何内容而且主程序根本不会wait(意味着在这种情况下sleep()不会被调用)。

但如果我在主程序中取消注释sleep(1),一切都会按照我的预期运作。

我怀疑孩子没有足够的时间来听SIGINT

我正在处理的程序是一个任务控件,当我运行如下命令时: restart my_program; restart my_program我的行为不稳定。当我调用重新启动时,会发送SIGINT,然后调用新的fork(),然后发送另一个SIGINT,就像上面的示例一样。

如何确保所有孩子在没有SIGINT行的情况下解析sleep(1)?我正在测试我的程序是否可以处理在发送SIGINT后不会立即退出的程序。

如果我在子程序的顶部添加printf("child process started\n");,它就不会被打印,主程序也不会等待,除非我sleep一秒钟即使只有一个子进程,也会发生这种情况。

2 个答案:

答案 0 :(得分:1)

尝试在for循环中使用waitpid()命令。这样,下一个孩子只会在第一个孩子完成后才会写

答案 1 :(得分:1)

一切都在发挥作用。在设置信号处理程序之前,甚至在它们开始执行子二进制文件之前,一些子进程会被信号杀死。

在父进程中,您可以检查所收到的每个进程的身份和退出状态,而不仅仅是wait(),直到没有更多子进程。将while (wait(NULL) > 0);替换为

{
    pid_t  p;
    int    status;

    while ((p = wait(&status)) > 0) {
        if (WIFEXITED(status))
            printf("Child %ld exit status was %d.\n", (long)p, WEXITSTATUS(status));
        else
        if (WIFSIGNALED(status))
            printf("Child %ld was killed by signal %d.\n", (long)p, WTERMSIG(status));
        else
            printf("Child %ld was lost.\n", (long)p);
        fflush(stdout);
    }
}

你会发现&#34;失踪&#34;子进程被信号终止。这意味着子进程在准备好捕获信号之前就被杀死了。

我编写了自己的示例程序对,并完成了错误检查。我决定使用sigprocmask()sigwaitinfo()来代替信号处理程序,只是为了显示另一种方法来做同样的事情(并且不限于信号处理程序中的异步信号安全函数)。

<强> parent.c

#define  _POSIX_C_SOURCE 200809L
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <signal.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>

const char *signal_name(const int signum)
{
    static char  buffer[32];
    switch (signum) {
    case SIGINT:  return "INT";
    case SIGHUP:  return "HUP";
    case SIGTERM: return "TERM";
    default:
        snprintf(buffer, sizeof buffer, "%d", signum);
        return (const char *)buffer;
    }
}

static int compare_pids(const void *p1, const void *p2)
{
    const pid_t  pid1 = *(const pid_t *)p1;
    const pid_t  pid2 = *(const pid_t *)p2;

    return (pid1 < pid2) ? -1 :
           (pid1 > pid2) ? +1 : 0;
}

int main(int argc, char *argv[])
{
    size_t  count, r, i;
    int     status;
    pid_t  *child, *reaped, p;
    char    dummy;

    if (argc < 3 || !strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
        fprintf(stderr, "\n");
        fprintf(stderr, "Usage: %s [ -h | --help ]\n", argv[0]);
        fprintf(stderr, "       %s COUNT PATH-TO-BINARY [ ARGS ... ]\n", argv[0]);
        fprintf(stderr, "\n");
        fprintf(stderr, "This program will fork COUNT child processes,\n");
        fprintf(stderr, "each child process executing PATH-TO-BINARY.\n");
        fprintf(stderr, "Immediately after all child processes have been forked,\n");
        fprintf(stderr, "they are sent a SIGINT signal.\n");
        fprintf(stderr, "\n");
        return EXIT_FAILURE;
    }
    if (sscanf(argv[1], " %zu %c", &count, &dummy) != 1 || count < 1) {
        fprintf(stderr, "%s: Invalid count.\n", argv[1]);
        return EXIT_FAILURE;
    }

    child = malloc(count * sizeof child[0]);
    reaped = malloc(count * sizeof reaped[0]);
    if (!child || !reaped) {
        fprintf(stderr, "%s: Count is too large; out of memory.\n", argv[1]);
        return EXIT_FAILURE;
    }

    for (i = 0; i < count; i++) {
        p = fork();
        if (p == -1) {
            if (i == 0) {
                fprintf(stderr, "Cannot fork child processes: %s.\n", strerror(errno));
                return EXIT_FAILURE;
            } else {
                fprintf(stderr, "Cannot fork child %zu: %s.\n", i + 1, strerror(errno));
                count = i;
                break;
            }
        } else
        if (!p) {
            /* Child process */
            execvp(argv[2], argv + 2);
            {
                const char *errmsg = strerror(errno);
                fprintf(stderr, "Child process %ld: Cannot execute %s: %s.\n",
                                (long)getpid(), argv[2], errmsg);
                exit(EXIT_FAILURE);
            }
        } else {
            /* Parent process. */
            child[i] = p;
        }
    }

    /* Send all children the INT signal. */
    for (i = 0; i < count; i++)
        kill(child[i], SIGINT);

    /* Reap and report each child. */
    r = 0;
    while (1) {
        p = wait(&status);

        if (p == -1) {
            if (errno == ECHILD)
                break;
            fprintf(stderr, "Error waiting for child processes: %s.\n", strerror(errno));
            return EXIT_FAILURE;
        }

        if (r < count)
            reaped[r++] = p;
        else
            fprintf(stderr, "Reaped an extra child process!\n");

        if (WIFEXITED(status)) {
            switch (WEXITSTATUS(status)) {
            case EXIT_SUCCESS:
                printf("Parent: Reaped child process %ld: EXIT_SUCCESS.\n", (long)p);
                break;
            case EXIT_FAILURE:
                printf("Parent: Reaped child process %ld: EXIT_FAILURE.\n", (long)p);
                break;
            default:
                printf("Parent: Reaped child process %ld: Exit status %d.\n", (long)p, WEXITSTATUS(status));
                break;
            }
            fflush(stdout);

        } else
        if (WIFSIGNALED(status)) {
            printf("Parent: Reaped child process %ld: Terminated by %s.\n", (long)p, signal_name(WTERMSIG(status)));
            fflush(stdout);

        } else {
            printf("Parent: Reaped child process %ld: Lost.\n", (long)p);
            fflush(stdout);
        }
    }

    if (r == count) {
        /* Sort both pid arrays. */
        qsort(child, count, sizeof child[0], compare_pids);
        qsort(reaped, count, sizeof reaped[0], compare_pids);
        for (i = 0; i < count; i++)
            if (child[i] != reaped[i])
                break;
        if (i == count)
            printf("Parent: All %zu child processes were reaped successfully.\n", count);
    }

    return EXIT_SUCCESS;
}

<强> child.c

#define  _POSIX_C_SOURCE 200809L
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <signal.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>

const char *signal_name(const int signum)
{
    static char  buffer[32];
    switch (signum) {
    case SIGINT:  return "INT";
    case SIGHUP:  return "HUP";
    case SIGTERM: return "TERM";
    default:
        snprintf(buffer, sizeof buffer, "%d", signum);
        return (const char *)buffer;
    }
}

int main(void)
{
    const long mypid = getpid();
    sigset_t   set;
    siginfo_t  info;
    int        result;

    printf("Child: Child process %ld started!\n", mypid);
    fflush(stdout);

    sigemptyset(&set);
    sigaddset(&set, SIGINT);
    sigaddset(&set, SIGHUP);
    sigaddset(&set, SIGTERM);
    sigprocmask(SIG_BLOCK, &set, NULL);
    result = sigwaitinfo(&set, &info);
    if (result == -1) {
        printf("Child: Child process %ld failed: %s.\n", mypid, strerror(errno));
        return EXIT_FAILURE;
    }

    if (info.si_pid == 0)
        printf("Child: Child process %ld terminated by signal %s via terminal.\n", mypid, signal_name(result));
    else
    if (info.si_pid == getppid())
        printf("Child: Child process %ld terminated by signal %s sent by the parent process %ld.\n",
               mypid, signal_name(result), (long)info.si_pid);
    else
        printf("Child: Child process %ld terminated by signal %s sent by process %ld.\n",
               mypid, signal_name(result), (long)info.si_pid);
    return EXIT_SUCCESS;
}

使用例如

编译
gcc -Wall -O2 parent.c -o parent
gcc -Wall -O2 child.c -o child

并使用例如

运行它们
./parent 100 ./child

其中100是要分叉的子进程数,每个进程都运行./child

错误输出到标准错误。从父项到标准输出的每一行都以Parent:开头,从任何子项到标准输出的每一行都以Child:开头。

在我的计算机上,输出中的最后一行始终为Parent: All # child processes were reaped successfully.,这意味着已使用fork()收集并报告了每个子进程wait()。没有任何遗失,fork()kill()没有问题。

(请注意,如果您指定的子进程多于允许fork的子进程,则父程序不会将错误视为错误,只需使用允许的子进程数进行测试。)

在我的机器上,分支和收获100个子进程对于父进程来说已经足够了,所以每个子进程都可以到达准备好捕获信号的部分。

另一方面,父进程可以快速处理10个子进程(运行./parent 10 ./child),以便在准备好处理信号之前,每个子进程都被INT信号杀死。

以下是运行./parent 20 ./child时非典型案例的输出:

Child: Child process 19982 started!
Child: Child process 19983 started!
Child: Child process 19984 started!
Child: Child process 19982 terminated by signal INT sent by the parent process 19981.
Child: Child process 19992 started!
Child: Child process 19983 terminated by signal INT sent by the parent process 19981.
Child: Child process 19984 terminated by signal INT sent by the parent process 19981.
Parent: Reaped child process 19982: EXIT_SUCCESS.
Parent: Reaped child process 19985: Terminated by INT.
Parent: Reaped child process 19986: Terminated by INT.
Parent: Reaped child process 19984: EXIT_SUCCESS.
Parent: Reaped child process 19987: Terminated by INT.
Parent: Reaped child process 19988: Terminated by INT.
Parent: Reaped child process 19989: Terminated by INT.
Parent: Reaped child process 19990: Terminated by INT.
Parent: Reaped child process 19991: Terminated by INT.
Parent: Reaped child process 19992: Terminated by INT.
Parent: Reaped child process 19993: Terminated by INT.
Parent: Reaped child process 19994: Terminated by INT.
Parent: Reaped child process 19995: Terminated by INT.
Parent: Reaped child process 19996: Terminated by INT.
Parent: Reaped child process 19983: EXIT_SUCCESS.
Parent: Reaped child process 19997: Terminated by INT.
Parent: Reaped child process 19998: Terminated by INT.
Parent: Reaped child process 19999: Terminated by INT.
Parent: Reaped child process 20000: Terminated by INT.
Parent: Reaped child process 20001: Terminated by INT.
Parent: All 20 child processes were reaped successfully.

在20个子进程中,有16个在执行第一个printf()(或fflush(stdout))行之前被INT信号杀死。 (我们可以在printf("Child: Child process %ld executing %s\n", (long)getpid(), argv[2]); fflush(stdout);行之前向parent.c添加execvp(),以查看是否有任何子进程在执行之前被杀死。)

在剩余的四个子进程(19982,19983,19984和19992)中,一个(19982)在第一个printf()fflush()之后终止,但在它成功运行之前{{1阻止信号并让孩子准备抓住它。

只有剩下的三个子进程(19983,19984和19992)才捕获到父进程发送的INT信号。

正如您所看到的,只需添加完整的错误检查,并添加足够的输出(并且setprocmask()在有用的情况下,默认缓冲标准输出),让您运行多个测试用例,并构建更好的整体正在发生的事情的图片。

  

我正在处理的程序是一个任务控件,当我运行如下命令时:restart my_program;重启my_program我得到一个不稳定的行为。当我调用restart时,会发送一个SIGINT,然后调用一个新的fork(),然后发送另一个SIGINT,就像上面的例子一样。

在这种情况下,您在新fork准备就绪之前发送信号,因此default disposition of the signal(终止,对于INT)定义会发生什么。

这个潜在问题的解决方案各不相同。请注意,它是许多init system问题的核心。如果孩子(fflush(stdout);在这里)合作,很容易解决,但在其他所有情况下都很困难。

一种简单的合作方法是让孩子在准备好行动时向其父进程发送信号。为避免杀死未准备好此类信息的父进程,可以使用默认忽略的信号(例如my_program)。

在一段时间内休眠的选项,以便新的子进程有足够的时间准备采取行动,是缓解此问题的常见但非常不可靠的方法。 (特别是,所需的持续时间取决于子进程优先级以及计算机上的总负载。)