/*-*- Mode: C; c-basic-offset: 8 -*-*/ /*** This file is part of rtwatch. Copyright 2008 Lennart Poettering rtwatch is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation, either version 2.1 of the License, or (at your option) any later version. rtwatch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with rtwatch. If not, see . ***/ /* This is Linux-only. Rock'n'roll! */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define TOP_PATH "/var/run/rtwatch" #define PERIOD_USEC_FILE #define CPU_LOAD_MAX 50 /* 50% CPU load at max */ #define RTPRIO_MAX 18 static int verbose = 0; static int write_long(const char *fn, unsigned long u) { int fd; ssize_t k; char t[32]; snprintf(t, sizeof(t), "%lu\n", u); if ((fd = open(fn, O_WRONLY|O_NOFOLLOW|O_NOCTTY)) < 0) { fprintf(stderr, "open(\"%s\", O_WRONLY): %s\n", fn, strerror(errno)); return -1; } if ((k = write(fd, t, strlen(t))) != (ssize_t) strlen(t)) { fprintf(stderr, "write(): %s\n", k < 0 ? strerror(errno) : "short write"); close(fd); return -1; } if (close(fd) < 0) { fprintf(stderr, "close(): %s\n", strerror(errno)); return -1; } return 0; } static int read_long(const char *fn, unsigned long *u) { int fd; ssize_t k; char t[32]; if ((fd = open(fn, O_RDONLY|O_NOFOLLOW|O_NOCTTY)) < 0) { fprintf(stderr, "open(\"%s\", O_RDONLY): %s\n", fn, strerror(errno)); return -1; } if ((k = read(fd, t, sizeof(t)-1)) < 0) { fprintf(stderr, "read(): %s\n", k < 0 ? strerror(errno) : "eof"); close(fd); return -1; } if (close(fd) < 0) { fprintf(stderr, "close(): %s\n", strerror(errno)); return -1; } t[k] = 0; if (k <= 0 || t[k-1] != '\n' || sscanf(t, "%lu", u) != 1) { fprintf(stderr, "Parse failure in %s\n", fn); return -1; } return 0; } static int child(unsigned long runtime_usec, const char *cgroup_path, char *argv[]) { sigset_t ss; int ret = 1; char path[PATH_MAX]; struct rlimit rl; sigemptyset(&ss); if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) { fprintf(stderr, "sigprocmask(): %s\n", strerror(errno)); goto finish; } snprintf(path, sizeof(path), "%s/cpu.rt_runtime_us", cgroup_path); if (write_long(path, runtime_usec) < 0) goto finish; snprintf(path, sizeof(path), "%s/tasks", cgroup_path); if (write_long(path, (unsigned long) getpid()) < 0) goto finish; memset(&rl, 0, sizeof(rl)); rl.rlim_cur = rl.rlim_max = RTPRIO_MAX; if (setrlimit(RLIMIT_RTPRIO, &rl) < 0) { fprintf(stderr, "RLIMIT_RTPRIO: %s\n", strerror(errno)); goto finish; } if (setresuid(getuid(), getuid(), getuid()) < 0) { fprintf(stderr, "setresuid(): %s\n", strerror(errno)); goto finish; } execvp(argv[0], argv); fprintf(stderr, "execvp(): %s\n", strerror(errno)); finish: return ret; } static int check_dir(const char *path) { struct stat st; if (lstat(path, &st) < 0) { fprintf(stderr, "lstat(\"%s\", ...): %s", path, strerror(errno)); return -1; } if (!S_ISDIR(st.st_mode) || (st.st_mode & 0777) != 0700) { fprintf(stderr, "%s is not a directory or has inproper access modes.", path); return -1; } return 0; } static int wait_for_kid(pid_t pid, int timeout, sigset_t *ss, int *ret, int *ret_signal) { static const struct timespec ts = { .tv_sec = 2, .tv_nsec = 0, }; siginfo_t info; for (;;) { pid_t cpid; int status; memset(&info, 0, sizeof(info)); if (sigtimedwait(ss, &info, timeout ? &ts : 0) < 0) { if (errno == EAGAIN) return 1; fprintf(stderr, "sigtimedwait(): %s\n", strerror(errno)); return -1; } if (info.si_signo != SIGCHLD) { fprintf(stderr, "Caught signal %i.\n", info.si_signo); return -1; } if ((cpid = waitpid(-1, &status, WNOHANG)) < 0) { fprintf(stderr, "waitpid(%lu, ...): %s\n", (unsigned long) pid, strerror(errno)); return -1; } if (cpid != pid) continue; if (WIFEXITED(status)) { *ret = WEXITSTATUS(status); return 0; } if (WIFSIGNALED(status)) { if (verbose) fprintf(stderr, "Warning: child process terminated with signal %i.\n", WTERMSIG(status)); *ret_signal = WTERMSIG(status); return 0; } fprintf(stderr, "Hmm, waitpid() returned and we don't know why.\n"); return -1; } } static int safe_kill(pid_t pid, sigset_t *ss, int *ret, int *ret_signal) { int r; if (verbose) fprintf(stderr, "Killing child %lu with signal %i\n", (unsigned long) pid, SIGTERM); if (kill(pid, SIGTERM) < 0) { if (errno == ESRCH) return 0; fprintf(stderr, "kill(): %s\n", strerror(errno)); return -1; } if ((r = wait_for_kid(pid, 1, ss, ret, ret_signal)) <= 0) return r; if (verbose) fprintf(stderr, "Killing child %lu with signal %i\n", (unsigned long) pid, SIGKILL); if (kill(pid, SIGKILL) < 0) { if (errno == ESRCH) return 0; fprintf(stderr, "kill(): %s\n", strerror(errno)); return -1; } if ((r = wait_for_kid(pid, 1, ss, ret, ret_signal)) <= 0) return r; fprintf(stderr, "Client didn't react on termination signals.\n"); return -1; } static void help(const char *argv0, FILE *f) { fprintf(f, "%s -- [arguments...]\n\n" "\t-v\tVerbose\n" "\t-h\tHelp\n\n" "\t: CPU time to assign in percent.\n" "\t: the program to run\n\n" "Example:\n\n" "\t%s 1.5 -- foobar\n\n" "\tRuns 'foobar' and assigns it 1.5%% of the available CPU time.\n", argv0, argv0); } int main(int argc, char *argv[]) { char id[64], mnt_path[PATH_MAX], cgroup_path[PATH_MAX], root_runtime_us_path[PATH_MAX]; const char *argv0; int ret = 1; pid_t pid; sigset_t ss; float cpu_percentage; unsigned long runtime_usec, period_usec, u; int ret_signal = -1; int o; static const struct option long_options[] = { { "help", no_argument, NULL, 'h' }, { "verbose", no_argument, NULL, 'v' }, { NULL, 0, NULL, 0 } }; if (argc < 0) argv0 = "rtwatch"; else if ((argv0 = strrchr(argv[0], '/'))) argv0++; else argv0 = argv[0]; while ((o = getopt_long(argc, argv, "hv", long_options, NULL)) >= 0) { switch (o) { case 'h': help(argv0, stdout); break; case 'v': verbose = 1; break; default: goto finish; } } if (optind+2 > argc) { help(argv0, stderr); goto finish; } cpu_percentage = atof(argv[optind]); if (cpu_percentage <= 0 || cpu_percentage > 100) { fprintf(stderr, "Failed to parse period argument: %s\n", argv[1]); goto finish; } if (cpu_percentage > CPU_LOAD_MAX) { fprintf(stderr, "More than the limit of %u%% CPU requested.\n", CPU_LOAD_MAX); goto finish; } if (read_long("/proc/sys/kernel/sched_rt_period_us", &period_usec) < 0) goto finish; runtime_usec = (double)period_usec*cpu_percentage/100; if (runtime_usec <= 0) { fprintf(stderr, "Percentage too small.\n"); goto finish; } if (verbose) fprintf(stderr, "period=%0.2fms runtime=%0.2fms max_cpu_load=%0.1f%%\n", (double) period_usec / 1000, (double) runtime_usec / 1000, (double) (runtime_usec*100/period_usec)); if (geteuid() != 0) { fprintf(stderr, "%s needs to be run as root.\n", argv0); goto finish; } /* Block the signals we want to wait for with sigwait() */ sigemptyset(&ss); sigaddset(&ss, SIGTERM); sigaddset(&ss, SIGINT); sigaddset(&ss, SIGQUIT); sigaddset(&ss, SIGCHLD); if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) { fprintf(stderr, "sigprocmask(): %s\n", strerror(errno)); goto finish; } /* Make sure our signals are not set to SIG_IGN */ signal(SIGTERM, SIG_DFL); signal(SIGINT, SIG_DFL); signal(SIGQUIT, SIG_DFL); signal(SIGCHLD, SIG_DFL); /* Create top directory for the hierarchies */ if (mkdir(TOP_PATH, 0700) < 0 && errno != EEXIST) { fprintf(stderr, "mkdir(\"%s\"): %s\n", TOP_PATH, strerror(errno)); goto finish; } chmod(TOP_PATH, 0700); if (check_dir(TOP_PATH) < 0) goto finish_remove_top_path; /* Create mount directory for the hierarchy */ snprintf(mnt_path, sizeof(mnt_path), TOP_PATH "/%lu", (unsigned long) getpid()); if (mkdir(mnt_path, 0700) < 0) { fprintf(stderr, "mkdir(\"%s\"): %s\n", mnt_path, strerror(errno)); goto finish_remove_top_path; } chmod(mnt_path, 0700); if (check_dir(mnt_path) < 0) goto finish_remove_mnt_path; /* Create hierarchy */ snprintf(id, sizeof(id), "rtwatch-%lu", (unsigned long) getpid()); if (mount(id, mnt_path, "cgroup", 0, "cpu") < 0) { fprintf(stderr, "mount(\"%s\", \"%s\", \"cgroup\", ...): %s\n", id, mnt_path, strerror(errno)); goto finish_remove_mnt_path; } /* Decrease root runtime */ snprintf(root_runtime_us_path, sizeof(root_runtime_us_path), "%s/cpu.rt_runtime_us", mnt_path); if (read_long(root_runtime_us_path, &u) < 0) goto finish_umount; if (u < (period_usec/20 + runtime_usec)) { fprintf(stderr, "Refusing to limit root runtime to less than 5%%\n"); goto finish_umount; } u -= runtime_usec; if (write_long(root_runtime_us_path, u) < 0) goto finish_umount; /* Create cgroup */ snprintf(cgroup_path, sizeof(cgroup_path), "%s/rtwatch-%lu", mnt_path, (unsigned long) getpid()); if (mkdir(cgroup_path, 0700) < 0) { fprintf(stderr, "mkdir(\"%s\"): %s\n", cgroup_path, strerror(errno)); goto finish_increment_root_runtime; } if ((pid = fork()) < 0) { fprintf(stderr, "fork(): %s\n", strerror(errno)); goto finish_remove_cgroup_path; } else if (!pid) _exit(child(runtime_usec, cgroup_path, argv + optind + 1)); if (verbose) fprintf(stderr, "Child process has PID %lu.\n", (unsigned long) pid); if (wait_for_kid(pid, 0, &ss, &ret, &ret_signal) < 0) goto finish_kill; goto finish_remove_cgroup_path; finish_kill: /* We'll die shortly. Then, our child will be reparented to * init which then will care for reaping it. */ if (safe_kill(pid, &ss, &ret, &ret_signal) < 0) ret = 1; finish_remove_cgroup_path: if (rmdir(cgroup_path) < 0) { fprintf(stderr, "Warning: rmdir(\"%s\"): %s\n", cgroup_path, strerror(errno)); ret = 1; } finish_increment_root_runtime: if (read_long(root_runtime_us_path, &u) < 0) goto finish_umount; u += runtime_usec; if (u > period_usec) u = period_usec; write_long(root_runtime_us_path, u); finish_umount: if (umount(mnt_path) < 0) { fprintf(stderr, "Warning: umount(\"%s\"): %s\n", mnt_path, strerror(errno)); ret = 1; } finish_remove_mnt_path: if (rmdir(mnt_path) < 0) { fprintf(stderr, "Warning: rmdir(\"%s\"): %s\n", mnt_path, strerror(errno)); ret = 1; } finish_remove_top_path: /* If some other instance of rtwatch is still running the * following call will fail, but that's fine. */ if (rmdir(TOP_PATH) < 0 && errno != EBUSY && errno != ENOTEMPTY) { fprintf(stderr, "Warning: rmdir(\"%s\"): %s\n", TOP_PATH, strerror(errno)); ret = 1; } finish: if (ret_signal > 0) { sigemptyset(&ss); if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) fprintf(stderr, "Warning: sigprocmask(): %s\n", strerror(errno)); raise(ret_signal); } return ret; }