From 08bc2a62aeb8517f99c909f1dcb4ceb6c750ea1d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 18 Apr 2008 03:54:57 +0200 Subject: Initial import --- Makefile | 7 + rtwatch.c | 444 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 451 insertions(+) create mode 100644 Makefile create mode 100644 rtwatch.c diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..84419b0 --- /dev/null +++ b/Makefile @@ -0,0 +1,7 @@ +CFLAGS=-Wall -W -pipe -O2 + +rtwatch: rtwatch.o + $(CC) $(CFLAGS) -o $@ $^ + +clean: + rm -f rtwatch rtwatch.o diff --git a/rtwatch.c b/rtwatch.c new file mode 100644 index 0000000..ee34406 --- /dev/null +++ b/rtwatch.c @@ -0,0 +1,444 @@ +/*-*- Mode: C; c-basic-offset: 8 -*-*/ + +/* This is Linux-only. Rock'n'roll! */ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define TOP_PATH "/var/run/rtwatch" +#define PERIOD_USEC_FILE + +#define CPU_LOAD_MAX 50 /* 80% CPU load at max */ +#define RTPRIO_MAX 18 + +static int write_long(const char *fn, unsigned long u) { + int fd; + ssize_t k; + char t[32]; + + snprintf(t, sizeof(t), "%lu\n", u); + + if ((fd = open(fn, O_WRONLY|O_NOFOLLOW|O_NOCTTY)) < 0) { + fprintf(stderr, "open(\"%s\", O_WRONLY): %s\n", fn, strerror(errno)); + return -1; + } + + if ((k = write(fd, t, strlen(t))) != (ssize_t) strlen(t)) { + fprintf(stderr, "write(): %s\n", k < 0 ? strerror(errno) : "short write"); + close(fd); + return -1; + } + + if (close(fd) < 0) { + fprintf(stderr, "close(): %s\n", strerror(errno)); + return -1; + } + + return 0; +} + +static int read_long(const char *fn, unsigned long *u) { + int fd; + ssize_t k; + char t[32]; + + if ((fd = open(fn, O_RDONLY|O_NOFOLLOW|O_NOCTTY)) < 0) { + fprintf(stderr, "open(\"%s\", O_RDONLY): %s\n", fn, strerror(errno)); + return -1; + } + + if ((k = read(fd, t, sizeof(t)-1)) < 0) { + fprintf(stderr, "read(): %s\n", k < 0 ? strerror(errno) : "eof"); + close(fd); + return -1; + } + + if (close(fd) < 0) { + fprintf(stderr, "close(): %s\n", strerror(errno)); + return -1; + } + + t[k] = 0; + + if (k <= 0 || + t[k-1] != '\n' || + sscanf(t, "%lu", u) != 1) { + + fprintf(stderr, "Parse failure in %s\n", fn); + return -1; + } + + return 0; +} + +static int child(unsigned long runtime_usec, const char *cgroup_path, char *argv[]) { + sigset_t ss; + int ret = 1; + char path[PATH_MAX]; + struct rlimit rl; + + sigemptyset(&ss); + + if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) { + fprintf(stderr, "sigprocmask(): %s\n", strerror(errno)); + goto finish; + } + + snprintf(path, sizeof(path), "%s/cpu.rt_runtime_us", cgroup_path); + + if (write_long(path, runtime_usec) < 0) + goto finish; + + snprintf(path, sizeof(path), "%s/tasks", cgroup_path); + + if (write_long(path, (unsigned long) getpid()) < 0) + goto finish; + + memset(&rl, 0, sizeof(rl)); + rl.rlim_cur = rl.rlim_max = RTPRIO_MAX; + + if (setrlimit(RLIMIT_RTPRIO, &rl) < 0) { + fprintf(stderr, "RLIMIT_RTPRIO: %s\n", strerror(errno)); + goto finish; + } + + if (setresuid(getuid(), getuid(), getuid()) < 0) { + fprintf(stderr, "setresuid(): %s\n", strerror(errno)); + goto finish; + } + + execvp(argv[0], argv); + fprintf(stderr, "execvp(): %s\n", strerror(errno)); + +finish: + + return ret; +} + +static int check_dir(const char *path) { + struct stat st; + + if (lstat(path, &st) < 0) { + fprintf(stderr, "lstat(\"%s\", ...): %s", path, strerror(errno)); + return -1; + } + + if (!S_ISDIR(st.st_mode) || (st.st_mode & 0777) != 0700) { + fprintf(stderr, "%s is not a directory or has inproper access modes.", path); + return -1; + } + + return 0; +} + +static int wait_for_kid(pid_t pid, int timeout, sigset_t *ss, int *ret, int *ret_signal) { + static const struct timespec ts = { + .tv_sec = 2, + .tv_nsec = 0, + }; + + siginfo_t info; + + for (;;) { + pid_t cpid; + int status; + memset(&info, 0, sizeof(info)); + + if (sigtimedwait(ss, &info, timeout ? &ts : 0) < 0) { + + if (errno == EAGAIN) + return 1; + + fprintf(stderr, "sigtimedwait(): %s\n", strerror(errno)); + return -1; + } + + if (info.si_signo != SIGCHLD) { + fprintf(stderr, "Caught signal %i.\n", info.si_signo); + return -1; + } + + if ((cpid = waitpid(-1, &status, WNOHANG)) < 0) { + fprintf(stderr, "waitpid(%lu, ...): %s\n", (unsigned long) pid, strerror(errno)); + return -1; + } + + if (cpid != pid) + continue; + + if (WIFEXITED(status)) { + *ret = WEXITSTATUS(status); + return 0; + } + + if (WIFSIGNALED(status)) { + fprintf(stderr, "Warning: child process terminated with signal %i.\n", WTERMSIG(status)); + *ret_signal = WTERMSIG(status); + return 0; + } + + fprintf(stderr, "Hmm, waitpid() returned and we don't know why.\n"); + return -1; + } +} + +static int safe_kill(pid_t pid, sigset_t *ss, int *ret, int *ret_signal) { + int r; + + fprintf(stderr, "Killing child %lu with signal %i\n", (unsigned long) pid, SIGTERM); + + if (kill(pid, SIGTERM) < 0) { + if (errno == ESRCH) + return 0; + + fprintf(stderr, "kill(): %s\n", strerror(errno)); + return -1; + } + + if ((r = wait_for_kid(pid, 1, ss, ret, ret_signal)) <= 0) + return r; + + fprintf(stderr, "Killing child %lu with signal %i\n", (unsigned long) pid, SIGKILL); + + if (kill(pid, SIGKILL) < 0) { + if (errno == ESRCH) + return 0; + + fprintf(stderr, "kill(): %s\n", strerror(errno)); + return -1; + } + + if ((r = wait_for_kid(pid, 1, ss, ret, ret_signal)) <= 0) + return r; + + fprintf(stderr, "Client didn't react.\n"); + return -1; +} + +static void help(const char *argv0, FILE *f) { + + fprintf(f, + "%s [arguments...]\n\n" + "\truntime: available runtime in percent.\n" + "\tprogram: the program to run\n\n" + "Example:\n" + "\t%s 1.5 foobar\n", + argv0, argv0); +} + +int main(int argc, char *argv[]) { + char id[64], mnt_path[PATH_MAX], cgroup_path[PATH_MAX], root_runtime_us_path[PATH_MAX]; + const char *argv0; + int ret = 1; + pid_t pid; + sigset_t ss; + float cpu_percentage; + unsigned long runtime_usec, period_usec, u; + int ret_signal = -1; + + if (argc < 0) + argv0 = "rtwatch"; + else if ((argv0 = strrchr(argv[0], '/'))) + argv0++; + else + argv0 = argv[0]; + + if (argc == 1) { + help(argv0, stdout); + ret = 0; + goto finish; + } + + if (argc < 3) { + help(argv0, stderr); + goto finish; + } + + cpu_percentage = atof(argv[1]); + + if (cpu_percentage <= 0 || cpu_percentage > 100) { + fprintf(stderr, "Failed to parse period argument: %s\n", argv[1]); + goto finish; + } + + if (cpu_percentage > CPU_LOAD_MAX) { + fprintf(stderr, "More than the limit of %u%% CPU requested.\n", CPU_LOAD_MAX); + goto finish; + } + if (read_long("/proc/sys/kernel/sched_rt_period_us", &period_usec) < 0) + goto finish; + + runtime_usec = (double)period_usec*cpu_percentage/100; + + if (runtime_usec <= 0) { + fprintf(stderr, "Percentage too small.\n"); + goto finish; + } + + fprintf(stderr, "period=%0.2fms runtime=%0.2fms max_cpu_load=%0.1f%%\n", + (double) period_usec / 1000, + (double) runtime_usec / 1000, + (double) (runtime_usec*100/period_usec)); + + if (geteuid() != 0) { + fprintf(stderr, "%s needs to be run as root.\n", argv0); + goto finish; + } + + /* Block the signals we want to wait for with sigwait() */ + sigemptyset(&ss); + sigaddset(&ss, SIGTERM); + sigaddset(&ss, SIGINT); + sigaddset(&ss, SIGQUIT); + sigaddset(&ss, SIGCHLD); + + if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) { + fprintf(stderr, "sigprocmask(): %s\n", strerror(errno)); + goto finish; + } + + /* Make sure our signals are not set to SIG_IGN */ + signal(SIGTERM, SIG_DFL); + signal(SIGINT, SIG_DFL); + signal(SIGQUIT, SIG_DFL); + signal(SIGCHLD, SIG_DFL); + + /* Create top directory for the hierarchies */ + if (mkdir(TOP_PATH, 0700) < 0 && errno != EEXIST) { + fprintf(stderr, "mkdir(\"%s\"): %s\n", TOP_PATH, strerror(errno)); + goto finish; + } + chmod(TOP_PATH, 0700); + + if (check_dir(TOP_PATH) < 0) + goto finish_remove_top_path; + + /* Create mount directory for the hierarchy */ + snprintf(mnt_path, sizeof(mnt_path), TOP_PATH "/%lu", (unsigned long) getpid()); + if (mkdir(mnt_path, 0700) < 0) { + fprintf(stderr, "mkdir(\"%s\"): %s\n", mnt_path, strerror(errno)); + goto finish_remove_top_path; + } + chmod(mnt_path, 0700); + + if (check_dir(mnt_path) < 0) + goto finish_remove_mnt_path; + + /* Create hierarchy */ + snprintf(id, sizeof(id), "rtwatch-%lu", (unsigned long) getpid()); + if (mount(id, mnt_path, "cgroup", 0, "cpu") < 0) { + fprintf(stderr, "mount(\"%s\", \"%s\", \"cgroup\", ...): %s\n", id, mnt_path, strerror(errno)); + goto finish_remove_mnt_path; + } + + /* Decrease root runtime */ + snprintf(root_runtime_us_path, sizeof(root_runtime_us_path), "%s/cpu.rt_runtime_us", mnt_path); + if (read_long(root_runtime_us_path, &u) < 0) + goto finish_umount; + + if (u < (period_usec/20 + runtime_usec)) { + fprintf(stderr, "Refusing to limit root runtime to less than 5%%\n"); + goto finish_umount; + } + + u -= runtime_usec; + + if (write_long(root_runtime_us_path, u) < 0) + goto finish_umount; + + /* Create cgroup */ + snprintf(cgroup_path, sizeof(cgroup_path), "%s/rtwatch-%lu", mnt_path, (unsigned long) getpid()); + if (mkdir(cgroup_path, 0700) < 0) { + fprintf(stderr, "mkdir(\"%s\"): %s\n", cgroup_path, strerror(errno)); + goto finish_increment_root_runtime; + } + + if ((pid = fork()) < 0) { + fprintf(stderr, "fork(): %s\n", strerror(errno)); + goto finish_remove_cgroup_path; + } else if (!pid) + _exit(child(runtime_usec, cgroup_path, argv + 2)); + + if (wait_for_kid(pid, 0, &ss, &ret, &ret_signal) < 0) + goto finish_kill; + + goto finish_remove_cgroup_path; + + +finish_kill: + + /* We'll die shortly. Then, our child will be reparented to + * init which then will care for reaping it. */ + + + if (safe_kill(pid, &ss, &ret, &ret_signal) < 0) + ret = 1; + +finish_remove_cgroup_path: + + if (rmdir(cgroup_path) < 0) { + fprintf(stderr, "Warning: rmdir(\"%s\"): %s\n", cgroup_path, strerror(errno)); + ret = 1; + } + +finish_increment_root_runtime: + + if (read_long(root_runtime_us_path, &u) < 0) + goto finish_umount; + + u += runtime_usec; + + if (u > period_usec) + u = period_usec; + + write_long(root_runtime_us_path, u); + +finish_umount: + + if (umount(mnt_path) < 0) { + fprintf(stderr, "Warning: umount(\"%s\"): %s\n", mnt_path, strerror(errno)); + ret = 1; + } + +finish_remove_mnt_path: + + if (rmdir(mnt_path) < 0) { + fprintf(stderr, "Warning: rmdir(\"%s\"): %s\n", mnt_path, strerror(errno)); + ret = 1; + } + +finish_remove_top_path: + + /* If some other instance of rtwatch is still running the + * following call will fail, but that's fine. */ + + if (rmdir(TOP_PATH) < 0 && errno != EBUSY && errno != ENOTEMPTY) { + fprintf(stderr, "Warning: rmdir(\"%s\"): %s\n", TOP_PATH, strerror(errno)); + ret = 1; + } + +finish: + + if (ret_signal > 0) { + sigemptyset(&ss); + + if (sigprocmask(SIG_SETMASK, &ss, NULL) < 0) + fprintf(stderr, "Warning: sigprocmask(): %s\n", strerror(errno)); + + raise(ret_signal); + } + + return ret; +} -- cgit