X-Git-Url: https://git.jankratochvil.net/?p=nethome.git;a=blobdiff_plain;f=src%2Forphanripper.c;h=4893e8f4961abeae4f07cb71a88e3a7e8ad67bfc;hp=59fe41ff850dd9893ec84ad751d79b5bff0440d5;hb=c825ee4453a37dac605a9a1bc16288f3a21ccf0e;hpb=40c2cf283ed265da46fdc66556f02a2521ac6406 diff --git a/src/orphanripper.c b/src/orphanripper.c index 59fe41f..4893e8f 100644 --- a/src/orphanripper.c +++ b/src/orphanripper.c @@ -16,17 +16,16 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Reap any leftover children possibly holding file descriptors. - * Children are identified by the open file descriptor. - * PGID or SID may be set by the children on their own. - * If we find a child by its file descriptor, we kill it will all its process - * tree (grandchildren). + * Children are identified by the stale file descriptor or PGID / SID. + * Both can be missed but only the stale file descriptors are important for us. + * PGID / SID may be set by the children on their own. + * If we fine a candidate we kill it will all its process tree (grandchildren). * The child process is run with `2>&1' redirection (due to forkpty(3)). * 2007-07-10 Jan Kratochvil */ -#define _XOPEN_SOURCE 1 -#define _XOPEN_SOURCE_EXTENDED 1 -#define _BSD_SOURCE 1 +/* For getpgid(2). */ +#define _GNU_SOURCE 1 #include #include @@ -41,83 +40,255 @@ #include #include #include +#include + +#define LENGTH(x) (sizeof (x) / sizeof (*(x))) static const char *progname; -static int signal_child_hit = 0; +static volatile int signal_chld_hit = 0; + +static void signal_chld (int signo) +{ + signal_chld_hit = 1; +} + +static volatile int signal_alrm_hit = 0; -static void signal_child (int signo) +static void signal_alrm (int signo) { - signal_child_hit = 1; + signal_alrm_hit = 1; } static char childptyname[LINE_MAX]; +static pid_t child; -static int spawn (char **argv) +static void print_child_error (const char *reason, char **argv) +{ + char **sp; + + fprintf (stderr, "%s: %d %s:", progname, (int) child, reason); + for (sp = argv; *sp != NULL; sp++) + { + fputc (' ', stderr); + fputs (*sp, stderr); + } + fputc ('\n', stderr); +} + +static int read_out (int amaster) { - pid_t child, child_got; - int status, amaster, i; char buf[LINE_MAX]; ssize_t buf_got; - struct sigaction act; - /* We do not use signal(2) to be sure we have SA_RESTART unset. */ - memset (&act, 0, sizeof (act)); - act.sa_handler = signal_child; - i = sigemptyset (&act.sa_mask); - if (i != 0) + buf_got = read (amaster, buf, sizeof buf); + if (buf_got == 0) + return 0; + /* Weird but at least after POLLHUP we get EIO instead of just EOF. */ + if (buf_got == -1 && errno == EIO) + return 0; + if (buf_got < 0) { - perror ("sigemotyset(3)"); + perror ("read (amaster)"); exit (EXIT_FAILURE); } - act.sa_flags = 0; - i = sigaction (SIGCHLD, &act, NULL); - if (i != 0) + if (write (STDOUT_FILENO, buf, buf_got) != buf_got) { - perror ("sigaction(2)"); + perror ("write(2)"); exit (EXIT_FAILURE); } + return 1; +} - child = forkpty (&amaster, childptyname, NULL, NULL); +static int spawn (char **argv, int timeout) +{ + pid_t child_got; + int status, amaster, i, rc; + struct sigaction act; + sigset_t set; + struct termios termios; + unsigned alarm_orig; + + /* We do not use signal(2) to be sure we do not have SA_RESTART. */ + memset (&act, 0, sizeof (act)); + act.sa_handler = signal_chld; + i = sigemptyset (&act.sa_mask); + assert (i == 0); + act.sa_flags = 0; /* !SA_RESTART */ + i = sigaction (SIGCHLD, &act, NULL); + assert (i == 0); + + i = sigemptyset (&set); + assert (i == 0); + i = sigaddset (&set, SIGCHLD); + assert (i == 0); + i = sigprocmask (SIG_SETMASK, &set, NULL); + assert (i == 0); + + /* With TERMP passed as NULL we get "\n" -> "\r\n". */ + termios.c_iflag = IGNBRK | IGNPAR; + termios.c_oflag = 0; + termios.c_cflag = CS8 | CREAD | CLOCAL | HUPCL | B9600; + termios.c_lflag = IEXTEN | NOFLSH; + memset (termios.c_cc, _POSIX_VDISABLE, sizeof (termios.c_cc)); + termios.c_cc[VTIME] = 0; + termios.c_cc[VMIN ] = 1; + cfmakeraw (&termios); +#ifdef FLUSHO + /* Workaround a readline deadlock bug in _get_tty_settings(). */ + termios.c_lflag &= ~FLUSHO; +#endif + child = forkpty (&amaster, childptyname, &termios, NULL); switch (child) { case -1: perror ("forkpty(3)"); exit (EXIT_FAILURE); case 0: - execvp (argv[1], argv + 1); + /* Do not replace STDIN as inferiors query its termios. */ +#if 0 + i = close (STDIN_FILENO); + assert (i == 0); + i = open ("/dev/null", O_RDONLY); + assert (i == STDIN_FILENO); +#endif + + /* Do not setpgrp(2) in the parent process as the process-group + is shared for the whole sh(1) pipeline we could be a part + of. The process-group is set according to PID of the first + command in the pipeline. + We would rip even vi(1) in the case of: + ./orphanripper sh -c 'sleep 1&' | vi - + */ + /* Do not setpgrp(2) as our pty would not be ours and we would + get `SIGSTOP' later, particularly after spawning gdb(1). + setsid(3) was already executed by forkpty(3) and it would fail if + executed again. */ + if (getpid() != getpgrp ()) + { + perror ("getpgrp(2)"); + exit (EXIT_FAILURE); + } + execvp (argv[0], argv); perror ("execvp(2)"); exit (EXIT_FAILURE); default: break; } - for (;;) + i = fcntl (amaster, F_SETFL, O_RDWR | O_NONBLOCK); + if (i != 0) { - buf_got = read (amaster, buf, sizeof buf); - if (buf_got == -1) - { - assert (signal_child_hit != 0); - assert (errno == EINTR || errno == EIO); + perror ("fcntl (amaster, F_SETFL, O_NONBLOCK)"); + exit (EXIT_FAILURE); + } + + /* We do not use signal(2) to be sure we do not have SA_RESTART. */ + act.sa_handler = signal_alrm; + i = sigaction (SIGALRM, &act, NULL); + assert (i == 0); + + alarm_orig = alarm (timeout); + assert (alarm_orig == 0); + + i = sigemptyset (&set); + assert (i == 0); + + while (!signal_alrm_hit) + { + struct pollfd pollfd; + + pollfd.fd = amaster; + pollfd.events = POLLIN; + i = ppoll (&pollfd, 1, NULL, &set); + if (i == -1 && errno == EINTR && signal_chld_hit) + break; + assert (i == 1); + /* Data available? Process it first. */ + if (pollfd.revents & POLLIN) + { + if (!read_out (amaster)) + { + fprintf (stderr, "%s: Unexpected EOF\n", progname); + exit (EXIT_FAILURE); + } } - if (buf_got <= 0) + if (pollfd.revents & POLLHUP) break; - if (write (STDOUT_FILENO, buf, buf_got) != buf_got) - { - perror ("write(2)"); + if ((pollfd.revents &= ~POLLIN) != 0) + { + fprintf (stderr, "%s: ppoll(2): revents 0x%x\n", progname, + (unsigned) pollfd.revents); exit (EXIT_FAILURE); } + /* Child exited? */ + if (signal_chld_hit) + break; } + + if (signal_alrm_hit) + { + i = kill (child, SIGKILL); + assert (i == 0); + } + else + alarm (0); + + /* WNOHANG still could fail. */ child_got = waitpid (child, &status, 0); if (child != child_got) { fprintf (stderr, "waitpid (%d) = %d: %m\n", (int) child, (int) child_got); exit (EXIT_FAILURE); } - if (!WIFEXITED (status)) + if (signal_alrm_hit) + { + char *buf; + + if (asprintf (&buf, "Timed out after %d seconds", timeout) != -1) + { + print_child_error (buf, argv); + free (buf); + } + rc = 128 + SIGALRM; + } + else if (WIFEXITED (status)) + rc = WEXITSTATUS (status); + else if (WIFSIGNALED (status)) + { + print_child_error (strsignal (WTERMSIG (status)), argv); + rc = 128 + WTERMSIG (status); + } + else if (WIFSTOPPED (status)) + { + fprintf (stderr, "waitpid (%d): WIFSTOPPED - WSTOPSIG is %d\n", + (int) child, WSTOPSIG (status)); + exit (EXIT_FAILURE); + } + else { fprintf (stderr, "waitpid (%d): !WIFEXITED (%d)\n", (int) child, status); exit (EXIT_FAILURE); } + + /* In the POLLHUP case we may not have seen SIGCHLD so far. */ + i = sigprocmask (SIG_SETMASK, &set, NULL); + assert (i == 0); + + assert (signal_chld_hit != 0); + + /* Do not unset O_NONBLOCK as a stale child (the whole purpose of this + program) having open its output pty would block us in read_out. */ +#if 0 + i = fcntl (amaster, F_SETFL, O_RDONLY /* !O_NONBLOCK */); + if (i != 0) + { + perror ("fcntl (amaster, F_SETFL, O_RDONLY /* !O_NONBLOCK */)"); + exit (EXIT_FAILURE); + } +#endif + + while (read_out (amaster)); + /* Do not close the master FD as the child would have `/dev/pts/23 (deleted)' entries which are not expected (and expecting ` (deleted)' would be a race. */ @@ -130,7 +301,7 @@ static int spawn (char **argv) } #endif - return WEXITSTATUS (status); + return rc; } /* Detected commandline may look weird due to a race: @@ -183,62 +354,56 @@ static const char *read_cmdline (pid_t pid) return cmdline; } -static int fd_fs_scan (pid_t pid, int (*func) (pid_t pid, const char *link)) +static int dir_scan (const char *dirname, + int (*callback) (struct dirent *dirent, const char *pathname)) { DIR *dir; struct dirent *dirent; - char dirname[64]; int rc = 0; - if (snprintf (dirname, sizeof dirname, "/proc/%d/fd", (int) pid) < 0) - { - perror ("snprintf(3)"); - exit (EXIT_FAILURE); - } dir = opendir (dirname); if (dir == NULL) { - if (errno == EACCES) - return 0; + if (errno == EACCES || errno == ENOENT) + return rc; fprintf (stderr, "%s: opendir (\"%s\"): %m\n", progname, dirname); exit (EXIT_FAILURE); } while ((errno = 0, dirent = readdir (dir))) { - char linkname[LINE_MAX], buf[LINE_MAX]; - int linkname_len; - ssize_t buf_len; - - /* FIXME: POSIX portability. */ - if ((dirent->d_type != DT_DIR && dirent->d_type != DT_LNK) - || (dirent->d_type == DT_DIR && strcmp (dirent->d_name, ".") != 0 - && strcmp (dirent->d_name, "..") != 0) - || (dirent->d_type == DT_LNK && strspn (dirent->d_name, "0123456789") - != strlen (dirent->d_name))) - { - fprintf (stderr, "Unexpected entry \"%s\" on readdir (\"%s\"): %m\n", - dirent->d_name, dirname); - continue; - } - if (dirent->d_type == DT_DIR) - continue; - linkname_len = snprintf (linkname, sizeof linkname, "%s/%s", dirname, - dirent->d_name); - if (linkname_len <= 0 || linkname_len >= sizeof linkname) + char pathname[LINE_MAX]; + int pathname_len; + + pathname_len = snprintf (pathname, sizeof pathname, "%s/%s", + dirname, dirent->d_name); + if (pathname_len <= 0 || pathname_len >= (int) sizeof pathname) { - fprintf (stderr, "Link content too long: `%s' / `%s'\n", - dirent->d_name, dirent->d_name); + fprintf (stderr, "entry file name too long: `%s' / `%s'\n", + dirname, dirent->d_name); continue; } - buf_len = readlink (linkname, buf, sizeof buf - 1); - if (buf_len <= 0 || buf_len >= sizeof buf - 1) - { - fprintf (stderr, "Error reading link \"%s\": %m\n", - linkname); - continue; + /* RHEL-4.5 on s390x never fills in D_TYPE. */ + if (dirent->d_type == DT_UNKNOWN) + { + struct stat statbuf; + int i; + + /* We are not interested in the /proc/PID/fd/ links targets. */ + i = lstat (pathname, &statbuf); + if (i == -1) + { + if (errno == EACCES || errno == ENOENT) + continue; + fprintf (stderr, "%s: stat (\"%s\"): %m\n", progname, pathname); + exit (EXIT_FAILURE); + } + if (S_ISDIR (statbuf.st_mode)) + dirent->d_type = DT_DIR; + if (S_ISLNK (statbuf.st_mode)) + dirent->d_type = DT_LNK; + /* No other D_TYPE types used in this code. */ } - buf[buf_len] = 0; - rc = (*func) (pid, buf); + rc = (*callback) (dirent, pathname); if (rc != 0) { errno = 0; @@ -258,38 +423,63 @@ static int fd_fs_scan (pid_t pid, int (*func) (pid_t pid, const char *link)) return rc; } -static void pid_fs_scan (void (*func) (pid_t pid, void *data), void *data) +static int fd_fs_scan (pid_t pid, int (*func) (pid_t pid, const char *link)) { - DIR *dir; - struct dirent *dirent; + char dirname[64]; - dir = opendir ("/proc"); - if (dir == NULL) - { - perror ("opendir (\"/proc\")"); - exit (EXIT_FAILURE); - } - while ((errno = 0, dirent = readdir (dir))) - { - /* FIXME: POSIX portability. */ - if (dirent->d_type != DT_DIR - || strspn (dirent->d_name, "0123456789") != strlen (dirent->d_name)) - continue; - (*func) (atoi (dirent->d_name), data); - } - if (errno != 0) - { - perror ("readdir (\"/proc\")"); - exit (EXIT_FAILURE); - } - if (closedir (dir) != 0) + if (snprintf (dirname, sizeof dirname, "/proc/%d/fd", (int) pid) < 0) { - perror ("closedir (\"/proc\")"); + perror ("snprintf(3)"); exit (EXIT_FAILURE); } + + int callback (struct dirent *dirent, const char *pathname) + { + char buf[LINE_MAX]; + ssize_t buf_len; + + if ((dirent->d_type != DT_DIR && dirent->d_type != DT_LNK) + || (dirent->d_type == DT_DIR && strcmp (dirent->d_name, ".") != 0 + && strcmp (dirent->d_name, "..") != 0) + || (dirent->d_type == DT_LNK && strspn (dirent->d_name, "0123456789") + != strlen (dirent->d_name))) + { + fprintf (stderr, "Unexpected entry \"%s\" (d_type %u)" + " on readdir (\"%s\"): %m\n", + dirent->d_name, (unsigned) dirent->d_type, dirname); + return 0; + } + if (dirent->d_type == DT_DIR) + return 0; + buf_len = readlink (pathname, buf, sizeof buf - 1); + if (buf_len <= 0 || buf_len >= (ssize_t) sizeof buf - 1) + { + if (errno != ENOENT && errno != EACCES) + fprintf (stderr, "Error reading link \"%s\": %m\n", pathname); + return 0; + } + buf[buf_len] = 0; + return (*func) (pid, buf); + } + + return dir_scan (dirname, callback); +} + +static void pid_fs_scan (void (*func) (pid_t pid, void *data), void *data) +{ + int callback (struct dirent *dirent, const char *pathname) + { + if (dirent->d_type != DT_DIR + || strspn (dirent->d_name, "0123456789") != strlen (dirent->d_name)) + return 0; + (*func) (atoi (dirent->d_name), data); + return 0; + } + + dir_scan ("/proc", callback); } -static int rip_check (pid_t pid, const char *link) +static int rip_check_ptyname (pid_t pid, const char *link) { assert (pid != getpid ()); @@ -313,12 +503,18 @@ static int pid_found (pid_t pid) return 0; } +/* Single pass is not enough, a (multithreaded) process was seen to survive. + Repeated killing of the same process is not enough, zombies can be killed. + */ +static int cleanup_acted; + static void pid_record (pid_t pid) { struct pid *entry; if (pid_found (pid)) return; + cleanup_acted = 1; entry = malloc (sizeof (*entry)); if (entry == NULL) @@ -360,7 +556,7 @@ static pid_t pid_get_parent (pid_t pid) while (errno = 0, fgets (line, sizeof line, f) == line) { if (strncmp (line, "PPid:\t", sizeof "PPid:\t" - 1) != 0) - continue; + continue; retval = atoi (line + sizeof "PPid:\t" - 1); errno = 0; break; @@ -384,9 +580,13 @@ static void killtree_pid_fs_scan (pid_t pid, void *data) { pid_t parent_pid = *(pid_t *) data; + /* Do not optimize it as we could miss some newly spawned processes. + Always traverse all the leaves. */ +#if 0 /* Optimization. */ if (pid_found (pid)) return; +#endif if (pid_get_parent (pid) != parent_pid) return; @@ -402,11 +602,16 @@ static void killtree (pid_t pid) static void rip_pid_fs_scan (pid_t pid, void *data) { + pid_t pgid; + /* Shouldn't happen. */ if (pid == getpid ()) return; - if (fd_fs_scan (pid, rip_check) != 0) + /* Check both PGID and the stale file descriptors. */ + pgid = getpgid (pid); + if (pgid == child + || fd_fs_scan (pid, rip_check_ptyname) != 0) killtree (pid); } @@ -415,16 +620,18 @@ static void killproc (pid_t pid) const char *cmdline; cmdline = read_cmdline (pid); + /* Avoid printing the message for already gone processes. */ if (kill (pid, 0) != 0 && errno == ESRCH) return; if (cmdline == NULL) cmdline = ""; fprintf (stderr, "%s: Killed -9 orphan PID %d: %s\n", progname, (int) pid, cmdline); - if (kill (pid, SIGKILL)) { - fprintf (stderr, "%s: kill (%d, SIGKILL): %m\n", progname, - (int) pid); - return; - } + if (kill (pid, SIGKILL) == 0) + cleanup_acted = 1; + else if (errno != ESRCH) + fprintf (stderr, "%s: kill (%d, SIGKILL): %m\n", progname, (int) pid); + /* RHEL-3 kernels cannot SIGKILL a `T (stopped)' process. */ + kill (pid, SIGCONT); /* Do not waitpid(2) as it cannot be our direct descendant and it gets cleaned up by init(8). */ #if 0 @@ -439,24 +646,60 @@ static void killproc (pid_t pid) #endif } -static void rip () +static void rip (void) { - pid_fs_scan (rip_pid_fs_scan, NULL); - pid_forall (killproc); + cleanup_acted = 0; + do + { + if (cleanup_acted) + usleep (1000000 / 10); + cleanup_acted = 0; + pid_fs_scan (rip_pid_fs_scan, NULL); + pid_forall (killproc); + } + while (cleanup_acted); } int main (int argc, char **argv) { + int timeout = 0; int rc; - if (argc < 2 || strcmp (argv[1], "-h") == 0 - || strcmp (argv[1], "--help") == 0) + progname = *argv++; + argc--; + + if (argc < 1 || strcmp (*argv, "-h") == 0 + || strcmp (*argv, "--help") == 0) { - fputs ("Syntax: orphanripper \n", stdout); + puts ("Syntax: orphanripper [-t ] "); exit (EXIT_FAILURE); } - progname = argv[0]; - rc = spawn (argv); + if ((*argv)[0] == '-' && (*argv)[1] == 't') + { + char *timeout_s = NULL; + + if ((*argv)[2] == 0) + timeout_s = *++argv; + else if (isdigit ((*argv)[2])) + timeout_s = (*argv) + 2; + if (timeout_s != NULL) + { + long l; + char *endptr; + + argv++; + l = strtol (timeout_s, &endptr, 0); + timeout = l; + if ((endptr != NULL && *endptr != 0) || timeout < 0 || timeout != l) + { + fprintf (stderr, "%s: Invalid timeout value: %s\n", progname, + timeout_s); + exit (EXIT_FAILURE); + } + } + } + + rc = spawn (argv, timeout); rip (); return rc; }