From 82079b22573dc1ba5dda12325c584c2f7d5ceae7 Mon Sep 17 00:00:00 2001 From: lace <> Date: Wed, 11 Jul 2007 11:41:42 +0000 Subject: [PATCH] Workaround/fix unset D_TYPE on RHEL-4.s390x. Final cleanup is now fully raceless - for respawning children. --- src/orphanripper.c | 187 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 110 insertions(+), 77 deletions(-) diff --git a/src/orphanripper.c b/src/orphanripper.c index 7bea08d..f60651a 100644 --- a/src/orphanripper.c +++ b/src/orphanripper.c @@ -274,66 +274,56 @@ static const char *read_cmdline (pid_t pid) return cmdline; } -static int fd_fs_scan (pid_t pid, int (*func) (pid_t pid, const char *link)) +static int dir_scan (const char *dirname, + int (*callback) (struct dirent *dirent, const char *pathname)) { DIR *dir; struct dirent *dirent; - char dirname[64]; int rc = 0; - if (snprintf (dirname, sizeof dirname, "/proc/%d/fd", (int) pid) < 0) - { - perror ("snprintf(3)"); - exit (EXIT_FAILURE); - } dir = opendir (dirname); if (dir == NULL) { if (errno == EACCES || errno == ENOENT) - return 0; + return rc; fprintf (stderr, "%s: opendir (\"%s\"): %m\n", progname, dirname); exit (EXIT_FAILURE); } while ((errno = 0, dirent = readdir (dir))) { - char linkname[LINE_MAX], buf[LINE_MAX]; - int linkname_len; - ssize_t buf_len; - - /* FIXME: POSIX portability. */ - if ((dirent->d_type != DT_DIR && dirent->d_type != DT_LNK) - || (dirent->d_type == DT_DIR && strcmp (dirent->d_name, ".") != 0 - && strcmp (dirent->d_name, "..") != 0) - || (dirent->d_type == DT_LNK && strspn (dirent->d_name, "0123456789") - != strlen (dirent->d_name))) - { - /* There is a race, D_TYPE may be uninitialized if stat(2) fails. */ - if (dirent->d_type != DT_UNKNOWN) - fprintf (stderr, "Unexpected entry \"%s\" (d_type %u)" - " on readdir (\"%s\"): %m\n", - dirent->d_name, (unsigned) dirent->d_type, dirname); - continue; - } - if (dirent->d_type == DT_DIR) - continue; - linkname_len = snprintf (linkname, sizeof linkname, "%s/%s", dirname, - dirent->d_name); - if (linkname_len <= 0 || linkname_len >= sizeof linkname) + char pathname[LINE_MAX]; + int pathname_len; + + pathname_len = snprintf (pathname, sizeof pathname, "%s/%s", + dirname, dirent->d_name); + if (pathname_len <= 0 || pathname_len >= sizeof pathname) { - fprintf (stderr, "Link content too long: `%s' / `%s'\n", - dirent->d_name, dirent->d_name); + fprintf (stderr, "entry file name too long: `%s' / `%s'\n", + dirname, dirent->d_name); continue; } - buf_len = readlink (linkname, buf, sizeof buf - 1); - if (buf_len <= 0 || buf_len >= sizeof buf - 1) - { - if (errno != ENOENT && errno != EACCES) - fprintf (stderr, "Error reading link \"%s\": %m\n", - linkname); - continue; + /* RHEL-4.5 on s390x never fills in D_TYPE. */ + if (dirent->d_type == DT_UNKNOWN) + { + struct stat statbuf; + int i; + + /* We are not interested in the /proc/PID/fd/ links targets. */ + i = lstat (pathname, &statbuf); + if (i == -1) + { + if (errno == EACCES || errno == ENOENT) + continue; + fprintf (stderr, "%s: stat (\"%s\"): %m\n", progname, pathname); + exit (EXIT_FAILURE); + } + if (S_ISDIR (statbuf.st_mode)) + dirent->d_type = DT_DIR; + if (S_ISLNK (statbuf.st_mode)) + dirent->d_type = DT_LNK; + /* No other D_TYPE types used in this code. */ } - buf[buf_len] = 0; - rc = (*func) (pid, buf); + rc = (*callback) (dirent, pathname); if (rc != 0) { errno = 0; @@ -353,38 +343,63 @@ static int fd_fs_scan (pid_t pid, int (*func) (pid_t pid, const char *link)) return rc; } -static void pid_fs_scan (void (*func) (pid_t pid, void *data), void *data) +static int fd_fs_scan (pid_t pid, int (*func) (pid_t pid, const char *link)) { - DIR *dir; - struct dirent *dirent; + char dirname[64]; - dir = opendir ("/proc"); - if (dir == NULL) - { - perror ("opendir (\"/proc\")"); - exit (EXIT_FAILURE); - } - while ((errno = 0, dirent = readdir (dir))) - { - /* FIXME: POSIX portability. */ - if (dirent->d_type != DT_DIR - || strspn (dirent->d_name, "0123456789") != strlen (dirent->d_name)) - continue; - (*func) (atoi (dirent->d_name), data); - } - if (errno != 0) - { - perror ("readdir (\"/proc\")"); - exit (EXIT_FAILURE); - } - if (closedir (dir) != 0) + if (snprintf (dirname, sizeof dirname, "/proc/%d/fd", (int) pid) < 0) { - perror ("closedir (\"/proc\")"); + perror ("snprintf(3)"); exit (EXIT_FAILURE); } + + int callback (struct dirent *dirent, const char *pathname) + { + char buf[LINE_MAX]; + ssize_t buf_len; + + if ((dirent->d_type != DT_DIR && dirent->d_type != DT_LNK) + || (dirent->d_type == DT_DIR && strcmp (dirent->d_name, ".") != 0 + && strcmp (dirent->d_name, "..") != 0) + || (dirent->d_type == DT_LNK && strspn (dirent->d_name, "0123456789") + != strlen (dirent->d_name))) + { + fprintf (stderr, "Unexpected entry \"%s\" (d_type %u)" + " on readdir (\"%s\"): %m\n", + dirent->d_name, (unsigned) dirent->d_type, dirname); + return 0; + } + if (dirent->d_type == DT_DIR) + return 0; + buf_len = readlink (pathname, buf, sizeof buf - 1); + if (buf_len <= 0 || buf_len >= sizeof buf - 1) + { + if (errno != ENOENT && errno != EACCES) + fprintf (stderr, "Error reading link \"%s\": %m\n", pathname); + return 0; + } + buf[buf_len] = 0; + return (*func) (pid, buf); + } + + return dir_scan (dirname, callback); } -static int rip_check (pid_t pid, const char *link) +static void pid_fs_scan (void (*func) (pid_t pid, void *data), void *data) +{ + int callback (struct dirent *dirent, const char *pathname) + { + if (dirent->d_type != DT_DIR + || strspn (dirent->d_name, "0123456789") != strlen (dirent->d_name)) + return 0; + (*func) (atoi (dirent->d_name), data); + return 0; + } + + dir_scan ("/proc", callback); +} + +static int rip_check_ptyname (pid_t pid, const char *link) { assert (pid != getpid ()); @@ -408,12 +423,18 @@ static int pid_found (pid_t pid) return 0; } +/* Single pass is not enough, a (multithreaded) process was seen to survive. + Repeated killing of the same process is not enough, zombies can be killed. + */ +static int cleanup_acted; + static void pid_record (pid_t pid) { struct pid *entry; if (pid_found (pid)) return; + cleanup_acted = 1; entry = malloc (sizeof (*entry)); if (entry == NULL) @@ -479,9 +500,13 @@ static void killtree_pid_fs_scan (pid_t pid, void *data) { pid_t parent_pid = *(pid_t *) data; + /* Do not optimize it as we could miss some newly spawned processes. + Always traverse all the leaves. */ +#if 0 /* Optimization. */ if (pid_found (pid)) return; +#endif if (pid_get_parent (pid) != parent_pid) return; @@ -506,7 +531,7 @@ static void rip_pid_fs_scan (pid_t pid, void *data) /* Check both PGID and the stale file descriptors. */ pgid = getpgid (pid); if (pgid == child - || fd_fs_scan (pid, rip_check) != 0) + || fd_fs_scan (pid, rip_check_ptyname) != 0) killtree (pid); } @@ -521,12 +546,12 @@ static void killproc (pid_t pid) if (cmdline == NULL) cmdline = ""; fprintf (stderr, "%s: Killed -9 orphan PID %d: %s\n", progname, (int) pid, cmdline); - if (kill (pid, SIGKILL)) - { - if (errno != ESRCH) - fprintf (stderr, "%s: kill (%d, SIGKILL): %m\n", progname, (int) pid); - return; - } + if (kill (pid, SIGKILL) == 0) + cleanup_acted = 1; + else if (errno != ESRCH) + fprintf (stderr, "%s: kill (%d, SIGKILL): %m\n", progname, (int) pid); + /* RHEL-3 kernels cannot SIGKILL a `T (stopped)' process. */ + kill (pid, SIGCONT); /* Do not waitpid(2) as it cannot be our direct descendant and it gets cleaned up by init(8). */ #if 0 @@ -541,10 +566,18 @@ static void killproc (pid_t pid) #endif } -static void rip () +static void rip (void) { - pid_fs_scan (rip_pid_fs_scan, NULL); - pid_forall (killproc); + cleanup_acted = 0; + do + { + if (cleanup_acted) + usleep (1000000 / 10); + cleanup_acted = 0; + pid_fs_scan (rip_pid_fs_scan, NULL); + pid_forall (killproc); + } + while (cleanup_acted); } int main (int argc, char **argv) -- 1.8.3.1