/*-*- Mode: C; c-basic-offset: 8 -*-*/
/***
This file is part of mutrace.
Copyright 2009 Lennart Poettering
mutrace is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
mutrace is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with mutrace. If not, see .
***/
#include "config.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#if !defined (__linux__) || !defined(__GLIBC__)
#error "This stuff only works on Linux!"
#endif
#ifndef SCHED_RESET_ON_FORK
/* "Your libc lacks the definition of SCHED_RESET_ON_FORK. We'll now
* define it ourselves, however make sure your kernel is new
* enough! */
#define SCHED_RESET_ON_FORK 0x40000000
#endif
#if defined(__i386__) || defined(__x86_64__)
#define DEBUG_TRAP __asm__("int $3")
#else
#define DEBUG_TRAP raise(SIGTRAP)
#endif
#define LIKELY(x) (__builtin_expect(!!(x),1))
#define UNLIKELY(x) (__builtin_expect(!!(x),0))
struct stacktrace_info {
void **frames;
int nb_frame;
};
/* Used to differentiate between statistics for read-only and read-write locks
* held on rwlocks. */
typedef enum {
WRITE = 0,
READ = 1,
} LockType;
#define NUM_LOCK_TYPES READ + 1
struct mutex_info {
pthread_mutex_t *mutex;
pthread_rwlock_t *rwlock;
int type, protocol, kind;
bool broken:1;
bool realtime:1;
bool dead:1;
unsigned n_lock_level;
LockType lock_type; /* rwlocks only */
pid_t last_owner;
unsigned n_locked[NUM_LOCK_TYPES];
unsigned n_contended[NUM_LOCK_TYPES];
unsigned n_owner_changed;
uint64_t nsec_locked_total[NUM_LOCK_TYPES];
uint64_t nsec_locked_max[NUM_LOCK_TYPES];
uint64_t nsec_contended_total[NUM_LOCK_TYPES];
uint64_t nsec_timestamp[NUM_LOCK_TYPES];
struct stacktrace_info stacktrace;
unsigned id;
struct mutex_info *next;
};
struct cond_info {
pthread_cond_t *cond;
bool broken:1;
bool realtime:1;
bool dead:1;
unsigned n_wait_level;
pthread_mutex_t *mutex;
unsigned n_wait;
unsigned n_signal;
unsigned n_broadcast;
unsigned n_wait_contended; /* number of wait() calls made while another
* thread is already waiting on the cond */
unsigned n_signal_contended; /* number of signal() or broadcast() calls
* made while no thread is waiting */
uint64_t nsec_wait_total;
uint64_t nsec_wait_max;
uint64_t nsec_wait_contended_total;
uint64_t nsec_wait_contended_max;
uint64_t nsec_timestamp;
struct stacktrace_info stacktrace;
unsigned id;
struct cond_info *next;
};
static unsigned hash_size = 3371; /* probably a good idea to pick a prime here */
static unsigned frames_max = 16;
static volatile unsigned n_broken_mutexes = 0;
static volatile unsigned n_broken_conds = 0;
static volatile unsigned n_collisions = 0;
static volatile unsigned n_self_contended = 0;
static unsigned show_n_wait_min = 1;
static unsigned show_n_locked_min = 1;
static unsigned show_n_read_locked_min = 0;
static unsigned show_n_contended_min = 0;
static unsigned show_n_read_contended_min = 0;
static unsigned show_n_owner_changed_min = 2;
static unsigned show_n_max = 10;
static bool raise_trap = false;
static bool track_rt = false;
static int (*real_pthread_mutex_init)(pthread_mutex_t *mutex, const pthread_mutexattr_t *mutexattr) = NULL;
static int (*real_pthread_mutex_destroy)(pthread_mutex_t *mutex) = NULL;
static int (*real_pthread_mutex_lock)(pthread_mutex_t *mutex) = NULL;
static int (*real_pthread_mutex_trylock)(pthread_mutex_t *mutex) = NULL;
static int (*real_pthread_mutex_timedlock)(pthread_mutex_t *mutex, const struct timespec *abstime) = NULL;
static int (*real_pthread_mutex_unlock)(pthread_mutex_t *mutex) = NULL;
static int (*real_pthread_cond_init)(pthread_cond_t *cond, const pthread_condattr_t *attr) = NULL;
static int (*real_pthread_cond_destroy)(pthread_cond_t *cond) = NULL;
static int (*real_pthread_cond_signal)(pthread_cond_t *cond) = NULL;
static int (*real_pthread_cond_broadcast)(pthread_cond_t *cond) = NULL;
static int (*real_pthread_cond_wait)(pthread_cond_t *cond, pthread_mutex_t *mutex) = NULL;
static int (*real_pthread_cond_timedwait)(pthread_cond_t *cond, pthread_mutex_t *mutex, const struct timespec *abstime) = NULL;
static int (*real_pthread_create)(pthread_t *newthread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg) = NULL;
static int (*real_pthread_rwlock_init)(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr) = NULL;
static int (*real_pthread_rwlock_destroy)(pthread_rwlock_t *rwlock) = NULL;
static int (*real_pthread_rwlock_rdlock)(pthread_rwlock_t *rwlock) = NULL;
static int (*real_pthread_rwlock_tryrdlock)(pthread_rwlock_t *rwlock) = NULL;
static int (*real_pthread_rwlock_timedrdlock)(pthread_rwlock_t *rwlock, const struct timespec *abstime) = NULL;
static int (*real_pthread_rwlock_wrlock)(pthread_rwlock_t *rwlock) = NULL;
static int (*real_pthread_rwlock_trywrlock)(pthread_rwlock_t *rwlock) = NULL;
static int (*real_pthread_rwlock_timedwrlock)(pthread_rwlock_t *rwlock, const struct timespec *abstime) = NULL;
static int (*real_pthread_rwlock_unlock)(pthread_rwlock_t *rwlock);
static void (*real_exit)(int status) __attribute__((noreturn)) = NULL;
static void (*real__exit)(int status) __attribute__((noreturn)) = NULL;
static void (*real__Exit)(int status) __attribute__((noreturn)) = NULL;
static int (*real_backtrace)(void **array, int size) = NULL;
static char **(*real_backtrace_symbols)(void *const *array, int size) = NULL;
static void (*real_backtrace_symbols_fd)(void *const *array, int size, int fd) = NULL;
static struct mutex_info **alive_mutexes = NULL, **dead_mutexes = NULL;
static pthread_mutex_t *mutexes_lock = NULL;
static struct cond_info **alive_conds = NULL, **dead_conds = NULL;
static pthread_mutex_t *conds_lock = NULL;
static __thread bool recursive = false;
static volatile bool initialized = false;
static volatile bool threads_existing = false;
static uint64_t nsec_timestamp_setup;
typedef struct {
const char *command; /* as passed to --order command line argument */
const char *ui_string; /* as displayed by show_summary() */
} SummaryOrderDetails;
/* Must be kept in sync with summary_mutex_order_details. */
typedef enum {
MUTEX_ORDER_ID = 0,
MUTEX_ORDER_N_LOCKED,
MUTEX_ORDER_N_READ_LOCKED,
MUTEX_ORDER_N_CONTENDED,
MUTEX_ORDER_N_READ_CONTENDED,
MUTEX_ORDER_N_OWNER_CHANGED,
MUTEX_ORDER_NSEC_LOCKED_TOTAL,
MUTEX_ORDER_NSEC_LOCKED_MAX,
MUTEX_ORDER_NSEC_LOCKED_AVG,
MUTEX_ORDER_NSEC_READ_LOCKED_TOTAL,
MUTEX_ORDER_NSEC_READ_LOCKED_MAX,
MUTEX_ORDER_NSEC_READ_LOCKED_AVG,
MUTEX_ORDER_NSEC_CONTENDED_TOTAL,
MUTEX_ORDER_NSEC_CONTENDED_AVG,
MUTEX_ORDER_NSEC_READ_CONTENDED_TOTAL,
MUTEX_ORDER_NSEC_READ_CONTENDED_AVG,
} SummaryMutexOrder;
#define MUTEX_ORDER_INVALID MUTEX_ORDER_NSEC_READ_CONTENDED_AVG + 1 /* first invalid order */
/* Must be kept in sync with SummaryMutexOrder. */
static const SummaryOrderDetails summary_mutex_order_details[] = {
{ "id", "mutex number" },
{ "n-locked", "(write) lock count" },
{ "n-read-locked", "(read) lock count" },
{ "n-contended", "(write) contention count" },
{ "n-read-contended", "(read) contention count" },
{ "n-owner-changed", "owner change count" },
{ "nsec-locked-total", "total time (write) locked" },
{ "nsec-locked-max", "maximum time (write) locked" },
{ "nsec-locked-avg", "average time (write) locked" },
{ "nsec-read-locked-total", "total time (read) locked" },
{ "nsec-read-locked-max", "maximum time (read) locked" },
{ "nsec-read-locked-avg", "average time (read) locked" },
{ "nsec-contended-total", "total time (write) contended" },
{ "nsec-contended-avg", "average time (write) contended" },
{ "nsec-read-contended-total", "total time (read) contended" },
{ "nsec-read-contended-avg", "average time (read) contended" },
};
static SummaryMutexOrder summary_mutex_order = MUTEX_ORDER_N_CONTENDED;
static SummaryMutexOrder summary_mutex_order_from_command(const char *command);
/* Must be kept in sync with summary_cond_order_details. */
typedef enum {
COND_ORDER_ID = 0,
COND_ORDER_N_WAIT,
COND_ORDER_N_SIGNAL,
COND_ORDER_N_BROADCAST,
COND_ORDER_N_WAIT_CONTENDED,
COND_ORDER_N_SIGNAL_CONTENDED,
COND_ORDER_NSEC_WAIT_TOTAL,
COND_ORDER_NSEC_WAIT_MAX,
COND_ORDER_NSEC_WAIT_AVG,
COND_ORDER_NSEC_WAIT_CONTENDED_TOTAL,
COND_ORDER_NSEC_WAIT_CONTENDED_MAX,
COND_ORDER_NSEC_WAIT_CONTENDED_AVG,
} SummaryCondOrder;
#define COND_ORDER_INVALID COND_ORDER_NSEC_WAIT_CONTENDED_AVG + 1 /* first invalid order */
/* Must be kept in sync with SummaryCondOrder. */
static const SummaryOrderDetails summary_cond_order_details[] = {
{ "id", "condition variable number" },
{ "n-wait", "wait count" },
{ "n-signal", "signal count" },
{ "n-broadcast", "broadcast count" },
{ "n-wait-contended", "wait contention count" },
{ "n-signal-contended", "signal contention count" },
{ "nsec-wait-total", "total wait time" },
{ "nsec-wait-max", "maximum wait time" },
{ "nsec-wait-avg", "average wait time" },
{ "nsec-wait-contended-total", "total contended wait time" },
{ "nsec-wait-contended-max", "maximum contended wait time" },
{ "nsec-wait-contended-avg", "average contended wait time" },
};
static SummaryCondOrder summary_cond_order = COND_ORDER_N_WAIT_CONTENDED;
static SummaryCondOrder summary_cond_order_from_command(const char *command);
static void setup(void) __attribute ((constructor));
static void shutdown(void) __attribute ((destructor));
static char *stacktrace_to_string(struct stacktrace_info stacktrace);
static void sigusr1_cb(int sig);
static pid_t _gettid(void) {
return (pid_t) syscall(SYS_gettid);
}
static uint64_t nsec_now(void) {
struct timespec ts;
int r;
r = clock_gettime(CLOCK_MONOTONIC, &ts);
assert(r == 0);
return
(uint64_t) ts.tv_sec * 1000000000ULL +
(uint64_t) ts.tv_nsec;
}
static const char *get_prname(void) {
static char prname[17];
int r;
r = prctl(PR_GET_NAME, prname);
assert(r == 0);
prname[16] = 0;
return prname;
}
static int parse_env(const char *n, unsigned *t) {
const char *e;
char *x = NULL;
unsigned long ul;
if (!(e = getenv(n)))
return 0;
errno = 0;
ul = strtoul(e, &x, 0);
if (!x || *x || errno != 0)
return -1;
*t = (unsigned) ul;
if ((unsigned long) *t != ul)
return -1;
return 0;
}
/* Maximum tolerated relative error when comparing doubles */
#define MAX_RELATIVE_ERROR 0.001
static bool doubles_equal(double a, double b) {
/* Make sure we don't divide by zero. */
if (fpclassify(b) == FP_ZERO)
return (fpclassify(a) == FP_ZERO);
return ((a - b) / b <= MAX_RELATIVE_ERROR);
}
#define LOAD_FUNC(name) \
do { \
*(void**) (&real_##name) = dlsym(RTLD_NEXT, #name); \
assert(real_##name); \
} while (false)
#define LOAD_FUNC_VERSIONED(name, version) \
do { \
*(void**) (&real_##name) = dlvsym(RTLD_NEXT, #name, version); \
assert(real_##name); \
} while (false)
static void load_functions(void) {
static volatile bool loaded = false;
if (LIKELY(loaded))
return;
recursive = true;
/* If someone uses a shared library constructor that is called
* before ours we might not be initialized yet when the first
* lock related operation is executed. To deal with this we'll
* simply call the original implementation and do nothing
* else, but for that we do need the original function
* pointers. */
LOAD_FUNC(pthread_mutex_init);
LOAD_FUNC(pthread_mutex_destroy);
LOAD_FUNC(pthread_mutex_lock);
LOAD_FUNC(pthread_mutex_trylock);
LOAD_FUNC(pthread_mutex_timedlock);
LOAD_FUNC(pthread_mutex_unlock);
LOAD_FUNC(pthread_create);
LOAD_FUNC(pthread_rwlock_init);
LOAD_FUNC(pthread_rwlock_destroy);
LOAD_FUNC(pthread_rwlock_rdlock);
LOAD_FUNC(pthread_rwlock_tryrdlock);
LOAD_FUNC(pthread_rwlock_timedrdlock);
LOAD_FUNC(pthread_rwlock_wrlock);
LOAD_FUNC(pthread_rwlock_trywrlock);
LOAD_FUNC(pthread_rwlock_timedwrlock);
LOAD_FUNC(pthread_rwlock_unlock);
/* There's some kind of weird incompatibility problem causing
* pthread_cond_timedwait() to freeze if we don't ask for this
* explicit version of these functions */
LOAD_FUNC_VERSIONED(pthread_cond_init, "GLIBC_2.3.2");
LOAD_FUNC_VERSIONED(pthread_cond_destroy, "GLIBC_2.3.2");
LOAD_FUNC_VERSIONED(pthread_cond_signal, "GLIBC_2.3.2");
LOAD_FUNC_VERSIONED(pthread_cond_broadcast, "GLIBC_2.3.2");
LOAD_FUNC_VERSIONED(pthread_cond_wait, "GLIBC_2.3.2");
LOAD_FUNC_VERSIONED(pthread_cond_timedwait, "GLIBC_2.3.2");
LOAD_FUNC(exit);
LOAD_FUNC(_exit);
LOAD_FUNC(_Exit);
LOAD_FUNC(backtrace);
LOAD_FUNC(backtrace_symbols);
LOAD_FUNC(backtrace_symbols_fd);
loaded = true;
recursive = false;
}
static void setup(void) {
struct sigaction sigusr_action;
pthread_mutex_t *m, *last;
int r;
unsigned t;
const char *s;
load_functions();
if (LIKELY(initialized))
return;
if (!dlsym(NULL, "main"))
fprintf(stderr,
"mutrace: Application appears to be compiled without -rdynamic. It might be a\n"
"mutrace: good idea to recompile with -rdynamic enabled since this produces more\n"
"mutrace: useful stack traces.\n\n");
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
if (__malloc_hook) {
#pragma GCC diagnostic pop
fprintf(stderr,
"mutrace: Detected non-glibc memory allocator. Your program uses some\n"
"mutrace: alternative memory allocator (jemalloc?) which is not compatible with\n"
"mutrace: mutrace. Please rebuild your program with the standard memory\n"
"mutrace: allocator or fix mutrace to handle yours correctly.\n");
/* The reason for this is that jemalloc and other
* allocators tend to call pthread_mutex_xxx() from
* the allocator. However, we need to call malloc()
* ourselves from some mutex operations so this might
* create an endless loop eventually overflowing the
* stack. glibc's malloc() does locking too but uses
* lock routines that do not end up calling
* pthread_mutex_xxx(). */
real_exit(1);
}
t = hash_size;
if (parse_env("MUTRACE_HASH_SIZE", &t) < 0 || t <= 0)
fprintf(stderr, "mutrace: WARNING: Failed to parse $MUTRACE_HASH_SIZE.\n");
else
hash_size = t;
t = frames_max;
if (parse_env("MUTRACE_FRAMES", &t) < 0 || t <= 0)
fprintf(stderr, "mutrace: WARNING: Failed to parse $MUTRACE_FRAMES.\n");
else
frames_max = t;
t = show_n_wait_min;
if (parse_env("MUTRACE_WAIT_MIN", &t) < 0)
fprintf(stderr, "mutrace: WARNING: Failed to parse $MUTRACE_WAIT_MIN.\n");
else
show_n_wait_min = t;
t = show_n_locked_min;
if (parse_env("MUTRACE_LOCKED_MIN", &t) < 0)
fprintf(stderr, "mutrace: WARNING: Failed to parse $MUTRACE_LOCKED_MIN.\n");
else
show_n_locked_min = t;
t = show_n_read_locked_min;
if (parse_env("MUTRACE_READ_LOCKED_MIN", &t) < 0)
fprintf(stderr, "mutrace: WARNING: Failed to parse $MUTRACE_READ_LOCKED_MIN.\n");
else
show_n_read_locked_min = t;
t = show_n_contended_min;
if (parse_env("MUTRACE_CONTENDED_MIN", &t) < 0)
fprintf(stderr, "mutrace: WARNING: Failed to parse $MUTRACE_CONTENDED_MIN.\n");
else
show_n_contended_min = t;
t = show_n_read_contended_min;
if (parse_env("MUTRACE_READ_CONTENDED_MIN", &t) < 0)
fprintf(stderr, "mutrace: WARNING: Failed to parse $MUTRACE_READ_CONTENDED_MIN.\n");
else
show_n_read_contended_min = t;
t = show_n_owner_changed_min;
if (parse_env("MUTRACE_OWNER_CHANGED_MIN", &t) < 0)
fprintf(stderr, "mutrace: WARNING: Failed to parse $MUTRACE_OWNER_CHANGED_MIN.\n");
else
show_n_owner_changed_min = t;
t = show_n_max;
if (parse_env("MUTRACE_MAX", &t) < 0)
fprintf(stderr, "mutrace: WARNING: Failed to parse $MUTRACE_MAX.\n");
else
show_n_max = t;
s = getenv("MUTRACE_SUMMARY_MUTEX_ORDER");
if (s != NULL) {
t = summary_mutex_order_from_command(s);
if (t == MUTEX_ORDER_INVALID)
fprintf(stderr, "mutrace: WARNING: Failed to parse $MUTRACE_SUMMARY_MUTEX_ORDER.\n");
else
summary_mutex_order = t;
}
s = getenv("MUTRACE_SUMMARY_COND_ORDER");
if (s != NULL) {
t = summary_cond_order_from_command(s);
if (t == COND_ORDER_INVALID)
fprintf(stderr, "mutrace: WARNING: Failed to parse $MUTRACE_SUMMARY_COND_ORDER.\n");
else
summary_cond_order = t;
}
if (getenv("MUTRACE_TRAP"))
raise_trap = true;
if (getenv("MUTRACE_TRACK_RT"))
track_rt = true;
/* Set up the mutex hash table. */
alive_mutexes = calloc(hash_size, sizeof(struct mutex_info*));
assert(alive_mutexes);
dead_mutexes = calloc(hash_size, sizeof(struct mutex_info*));
assert(dead_mutexes);
mutexes_lock = malloc(hash_size * sizeof(pthread_mutex_t));
assert(mutexes_lock);
for (m = mutexes_lock, last = mutexes_lock+hash_size; m < last; m++) {
r = real_pthread_mutex_init(m, NULL);
assert(r == 0);
}
/* Set up the cond hash table. */
alive_conds = calloc(hash_size, sizeof(struct cond_info*));
assert(alive_conds);
dead_conds = calloc(hash_size, sizeof(struct cond_info*));
assert(dead_conds);
conds_lock = malloc(hash_size * sizeof(pthread_mutex_t));
assert(conds_lock);
for (m = conds_lock, last = conds_lock+hash_size; m < last; m++) {
r = real_pthread_mutex_init(m, NULL);
assert(r == 0);
}
/* Listen for SIGUSR1 and print out a summary of what's happened so far
* when we receive it. */
sigusr_action.sa_handler = sigusr1_cb;
sigemptyset(&sigusr_action.sa_mask);
sigusr_action.sa_flags = 0;
sigaction(SIGUSR1, &sigusr_action, NULL);
nsec_timestamp_setup = nsec_now();
initialized = true;
fprintf(stderr, "mutrace: "PACKAGE_VERSION" successfully initialized for process %s (PID: %lu).\n",
get_prname(), (unsigned long) getpid());
}
static unsigned long mutex_hash(pthread_mutex_t *mutex) {
unsigned long u;
u = (unsigned long) mutex;
u /= sizeof(void*);
return u % hash_size;
}
static unsigned long rwlock_hash(pthread_rwlock_t *rwlock) {
unsigned long u;
u = (unsigned long) rwlock;
u /= sizeof(void*);
return u % hash_size;
}
static unsigned long cond_hash(pthread_cond_t *cond) {
unsigned long u;
u = (unsigned long) cond;
u /= sizeof(void*);
return u % hash_size;
}
static void lock_hash(pthread_mutex_t *lock_array, unsigned u) {
int r;
r = real_pthread_mutex_trylock(lock_array + u);
if (UNLIKELY(r == EBUSY)) {
__sync_fetch_and_add(&n_self_contended, 1);
r = real_pthread_mutex_lock(lock_array + u);
}
assert(r == 0);
}
static void unlock_hash(pthread_mutex_t *lock_array, unsigned u) {
int r;
r = real_pthread_mutex_unlock(lock_array + u);
assert(r == 0);
}
#define lock_hash_mutex(u) lock_hash(mutexes_lock, u)
#define unlock_hash_mutex(u) unlock_hash(mutexes_lock, u)
#define lock_hash_cond(u) lock_hash(conds_lock, u)
#define unlock_hash_cond(u) unlock_hash(conds_lock, u)
#define _ORDER_CASE(TYPE, UCASE, lcase) \
case TYPE##_ORDER_##UCASE: \
if (a->lcase != b->lcase) \
return a->lcase - b->lcase; \
break;
#define _ORDER_CASE_AVG(TYPE, UCASE, lcase, divisor) \
case TYPE##_ORDER_##UCASE: { \
double a_avg = a->lcase / a->divisor, \
b_avg = b->lcase / b->divisor; \
if (!doubles_equal(a_avg, b_avg)) \
return ((a_avg - b_avg) < 0.0) ? -1 : 1; \
break; \
}
#define STATIC_ORDER(lcase) \
if (a->lcase != b->lcase) \
return a->lcase - b->lcase
static int mutex_info_compare(const void *_a, const void *_b) {
const struct mutex_info
*a = *(const struct mutex_info**) _a,
*b = *(const struct mutex_info**) _b;
/* Order by the user's chosen ordering first, then fall back to a static
* ordering. */
switch (summary_mutex_order) {
#define ORDER_CASE(UCASE, lcase) _ORDER_CASE(MUTEX, UCASE, lcase)
#define ORDER_CASE_AVG(UCASE, lcase, divisor) _ORDER_CASE_AVG(MUTEX, UCASE, lcase, divisor)
ORDER_CASE(ID, id)
ORDER_CASE(N_LOCKED, n_locked[WRITE])
ORDER_CASE(N_READ_LOCKED, n_locked[READ])
ORDER_CASE(N_CONTENDED, n_contended[WRITE])
ORDER_CASE(N_READ_CONTENDED, n_contended[READ])
ORDER_CASE(N_OWNER_CHANGED, n_owner_changed)
ORDER_CASE(NSEC_LOCKED_TOTAL, nsec_locked_total[WRITE])
ORDER_CASE(NSEC_LOCKED_MAX, nsec_locked_max[WRITE])
ORDER_CASE_AVG(NSEC_LOCKED_AVG, nsec_locked_total[WRITE], n_locked[WRITE])
ORDER_CASE(NSEC_READ_LOCKED_TOTAL, nsec_locked_total[READ])
ORDER_CASE(NSEC_READ_LOCKED_MAX, nsec_locked_max[READ])
ORDER_CASE_AVG(NSEC_READ_LOCKED_AVG, nsec_locked_total[READ], n_locked[READ])
ORDER_CASE(NSEC_CONTENDED_TOTAL, nsec_contended_total[WRITE])
ORDER_CASE_AVG(NSEC_CONTENDED_AVG, nsec_contended_total[WRITE], n_contended[WRITE])
ORDER_CASE(NSEC_READ_CONTENDED_TOTAL, nsec_contended_total[READ])
ORDER_CASE_AVG(NSEC_READ_CONTENDED_AVG, nsec_contended_total[READ], n_contended[READ])
default:
/* Should never be reached. */
assert(0);
#undef ORDER_CASE_AVG
#undef ORDER_CASE
}
/* Fall back to a static ordering. */
STATIC_ORDER(n_contended[WRITE]);
STATIC_ORDER(n_owner_changed);
STATIC_ORDER(n_locked[WRITE]);
STATIC_ORDER(nsec_locked_max[WRITE]);
/* Let's make the output deterministic */
return a - b;
}
static int cond_info_compare(const void *_a, const void *_b) {
const struct cond_info
*a = *(const struct cond_info**) _a,
*b = *(const struct cond_info**) _b;
/* Order by the user's chosen ordering first, then fall back to a static
* ordering. */
switch (summary_cond_order) {
#define ORDER_CASE(UCASE, lcase) _ORDER_CASE(COND, UCASE, lcase)
#define ORDER_CASE_AVG(UCASE, lcase, divisor) _ORDER_CASE_AVG(COND, UCASE, lcase, divisor)
ORDER_CASE(ID, id)
ORDER_CASE(N_WAIT, n_wait)
ORDER_CASE(N_SIGNAL, n_signal)
ORDER_CASE(N_BROADCAST, n_broadcast)
ORDER_CASE(N_WAIT_CONTENDED, n_wait_contended)
ORDER_CASE(N_SIGNAL_CONTENDED, n_signal_contended)
ORDER_CASE(NSEC_WAIT_TOTAL, nsec_wait_total)
ORDER_CASE(NSEC_WAIT_MAX, nsec_wait_max)
ORDER_CASE_AVG(NSEC_WAIT_AVG, nsec_wait_total, n_wait)
ORDER_CASE(NSEC_WAIT_CONTENDED_TOTAL, nsec_wait_contended_total)
ORDER_CASE(NSEC_WAIT_CONTENDED_MAX, nsec_wait_contended_max)
ORDER_CASE_AVG(NSEC_WAIT_CONTENDED_AVG, nsec_wait_contended_total, n_wait_contended)
default:
/* Should never be reached. */
assert(0);
#undef ORDER_CASE_AVG
#undef ORDER_CASE
}
/* Fall back to a static ordering. */
STATIC_ORDER(n_wait_contended);
STATIC_ORDER(n_wait);
STATIC_ORDER(nsec_wait_total);
STATIC_ORDER(n_signal);
/* Let's make the output deterministic */
return a - b;
}
#undef STATIC_ORDER
#undef ORDER_CASE_AVG
#undef ORDER_CASE
static bool mutex_info_show(struct mutex_info *mi) {
/* Mutexes used by real-time code are always noteworthy */
if (mi->realtime)
return true;
if (mi->n_locked[WRITE] < show_n_locked_min)
return false;
if (mi->n_locked[READ] < show_n_read_locked_min)
return false;
if (mi->n_contended[WRITE] < show_n_contended_min)
return false;
if (mi->n_contended[READ] < show_n_read_contended_min)
return false;
if (mi->n_owner_changed < show_n_owner_changed_min)
return false;
return true;
}
static bool cond_info_show(struct cond_info *ci) {
/* Condition variables used by real-time code are always noteworthy */
if (ci->realtime)
return true;
if (ci->n_wait < show_n_wait_min)
return false;
return true;
}
static bool mutex_info_dump(struct mutex_info *mi) {
char *stacktrace_str;
if (!mutex_info_show(mi))
return false;
stacktrace_str = stacktrace_to_string(mi->stacktrace);
fprintf(stderr,
"\nMutex #%u (0x%p) first referenced by:\n"
"%s", mi->id, mi->mutex ? (void*) mi->mutex : (void*) mi->rwlock, stacktrace_str);
free(stacktrace_str);
return true;
}
static bool cond_info_dump(struct cond_info *ci) {
char *stacktrace_str;
if (!cond_info_show(ci))
return false;
stacktrace_str = stacktrace_to_string(ci->stacktrace);
fprintf(stderr,
"\nCondvar #%u (0x%p) first referenced by:\n"
"%s", ci->id, ci->cond, stacktrace_str);
free(stacktrace_str);
return true;
}
static char mutex_type_name(int type) {
switch (type) {
case PTHREAD_MUTEX_NORMAL:
return '-';
case PTHREAD_MUTEX_RECURSIVE:
return 'r';
case PTHREAD_MUTEX_ERRORCHECK:
return 'e';
case PTHREAD_MUTEX_ADAPTIVE_NP:
return 'a';
default:
return '?';
}
}
static char mutex_protocol_name(int protocol) {
switch (protocol) {
case PTHREAD_PRIO_NONE:
return '-';
case PTHREAD_PRIO_INHERIT:
return 'i';
case PTHREAD_PRIO_PROTECT:
return 'p';
default:
return '?';
}
}
static char rwlock_kind_name(int kind) {
switch (kind) {
case PTHREAD_RWLOCK_PREFER_READER_NP:
return 'r';
case PTHREAD_RWLOCK_PREFER_WRITER_NP:
return 'w';
case PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP:
return 'W';
default:
return '?';
}
}
static bool mutex_info_stat(struct mutex_info *mi) {
if (!mutex_info_show(mi))
return false;
fprintf(stderr,
"%8u %8u %8u %8u %13.3f %12.3f %12.3f %c%c%c%c%c%c\n",
mi->id,
mi->n_locked[WRITE],
mi->n_owner_changed,
mi->n_contended[WRITE],
(double) mi->nsec_contended_total[WRITE] / 1000000.0,
(double) mi->nsec_locked_total[WRITE] / 1000000.0,
(double) mi->nsec_locked_total[WRITE] / mi->n_locked[WRITE] / 1000000.0,
mi->mutex ? 'M' : 'W',
mi->broken ? '!' : (mi->dead ? 'x' : '-'),
track_rt ? (mi->realtime ? 'R' : '-') : '.',
mi->mutex ? mutex_type_name(mi->type) : '.',
mi->mutex ? mutex_protocol_name(mi->protocol) : '.',
mi->rwlock ? rwlock_kind_name(mi->kind) : '.');
/* Show a second row for rwlocks, listing the statistics for read-only
* locks; the first row shows the statistics for write-only locks. */
if (mi->rwlock) {
fprintf(stderr,
" %8u %8u %13.3f %12.3f %12.3f \n",
mi->n_locked[READ],
mi->n_contended[READ],
(double) mi->nsec_contended_total[READ] / 1000000.0,
(double) mi->nsec_locked_total[READ] / 1000000.0,
(double) mi->nsec_locked_total[READ] / mi->n_locked[READ] / 1000000.0);
}
return true;
}
static bool cond_info_stat(struct cond_info *ci) {
if (!cond_info_show(ci))
return false;
fprintf(stderr,
"%8u %8u %8u %8u %12.3f %13.3f %12.3f %c%c\n",
ci->id,
ci->n_wait,
ci->n_signal + ci->n_broadcast,
ci->n_wait_contended,
(double) ci->nsec_wait_total / 1000000.0,
(double) ci->nsec_wait_contended_total / 1000000.0,
(double) ci->nsec_wait_contended_total / ci->n_wait / 1000000.0,
ci->broken ? '!' : (ci->dead ? 'x' : '-'),
track_rt ? (ci->realtime ? 'R' : '-') : '.');
return true;
}
static SummaryMutexOrder summary_mutex_order_from_command(const char *command) {
unsigned int i;
for (i = 0; i < MUTEX_ORDER_INVALID; i++) {
if (strcmp(command, summary_mutex_order_details[i].command) == 0) {
return i;
}
}
return MUTEX_ORDER_INVALID;
}
static SummaryCondOrder summary_cond_order_from_command(const char *command) {
unsigned int i;
for (i = 0; i < COND_ORDER_INVALID; i++) {
if (strcmp(command, summary_cond_order_details[i].command) == 0) {
return i;
}
}
return COND_ORDER_INVALID;
}
static void show_summary_internal(void) {
struct mutex_info *mi, **mutex_table;
struct cond_info *ci, **cond_table;
unsigned n, u, i, m;
uint64_t t;
long n_cpus;
t = nsec_now() - nsec_timestamp_setup;
fprintf(stderr,
"\n"
"mutrace: Showing statistics for process %s (PID: %lu).\n", get_prname(), (unsigned long) getpid());
/* Mutexes. */
n = 0;
for (u = 0; u < hash_size; u++) {
lock_hash_mutex(u);
for (mi = alive_mutexes[u]; mi; mi = mi->next)
n++;
for (mi = dead_mutexes[u]; mi; mi = mi->next)
n++;
}
if (n <= 0) {
fprintf(stderr,
"mutrace: No mutexes used.\n");
return;
}
fprintf(stderr,
"mutrace: %u mutexes used.\n", n);
mutex_table = malloc(sizeof(struct mutex_info*) * n);
i = 0;
for (u = 0; u < hash_size; u++) {
for (mi = alive_mutexes[u]; mi; mi = mi->next) {
mi->id = i;
mutex_table[i++] = mi;
}
for (mi = dead_mutexes[u]; mi; mi = mi->next) {
mi->id = i;
mutex_table[i++] = mi;
}
}
assert(i == n);
qsort(mutex_table, n, sizeof(mutex_table[0]), mutex_info_compare);
for (i = n, m = 0; i > 0 && (show_n_max <= 0 || m < show_n_max); i--)
m += mutex_info_dump(mutex_table[i - 1]) ? 1 : 0;
if (m > 0) {
fprintf(stderr,
"\n"
"mutrace: Showing %u mutexes in order of %s:\n"
"\n"
" Mutex # Locked Changed Cont. cont.Time[ms] tot.Time[ms] avg.Time[ms] Flags\n",
m, summary_mutex_order_details[summary_mutex_order].ui_string);
for (i = n, m = 0; i > 0 && (show_n_max <= 0 || m < show_n_max); i--)
m += mutex_info_stat(mutex_table[i - 1]) ? 1 : 0;
if (i < n)
fprintf(stderr,
" ... ... ... ... ... ... ... ||||||\n");
else
fprintf(stderr,
" ||||||\n");
fprintf(stderr,
" /|||||\n"
" Object: M = Mutex, W = RWLock /||||\n"
" State: x = dead, ! = inconsistent /|||\n"
" Use: R = used in realtime thread /||\n"
" Mutex Type: r = RECURSIVE, e = ERRORCHECK, a = ADAPTIVE /|\n"
" Mutex Protocol: i = INHERIT, p = PROTECT /\n"
" RWLock Kind: r = PREFER_READER, w = PREFER_WRITER, W = PREFER_WRITER_NONREC \n"
"\n"
"mutrace: Note that rwlocks are shown as two lines: write locks then read locks.\n");
if (!track_rt)
fprintf(stderr,
"\n"
"mutrace: Note that the flags column R is only valid in --track-rt mode!\n\n");
} else
fprintf(stderr,
"\n"
"mutrace: No mutex contended according to filtering parameters.\n\n");
free(mutex_table);
for (u = 0; u < hash_size; u++)
unlock_hash_mutex(u);
/* Condition variables. */
n = 0;
for (u = 0; u < hash_size; u++) {
lock_hash_cond(u);
for (ci = alive_conds[u]; ci; ci = ci->next)
n++;
for (ci = dead_conds[u]; ci; ci = ci->next)
n++;
}
if (n <= 0) {
fprintf(stderr,
"mutrace: No condition variables used.\n");
return;
}
fprintf(stderr,
"mutrace: %u condition variables used.\n", n);
cond_table = malloc(sizeof(struct cond_info*) * n);
i = 0;
for (u = 0; u < hash_size; u++) {
for (ci = alive_conds[u]; ci; ci = ci->next) {
ci->id = i;
cond_table[i++] = ci;
}
for (ci = dead_conds[u]; ci; ci = ci->next) {
ci->id = i;
cond_table[i++] = ci;
}
}
assert(i == n);
qsort(cond_table, n, sizeof(cond_table[0]), cond_info_compare);
for (i = n, m = 0; i > 0 && (show_n_max <= 0 || m < show_n_max); i--)
m += cond_info_dump(cond_table[i - 1]) ? 1 : 0;
if (m > 0) {
fprintf(stderr,
"\n"
"mutrace: Showing %u condition variables in order of %s:\n"
"\n"
" Cond # Waits Signals Cont. tot.Time[ms] cont.Time[ms] avg.Time[ms] Flags\n",
m, summary_cond_order_details[summary_cond_order].ui_string);
for (i = n, m = 0; i > 0 && (show_n_max <= 0 || m < show_n_max); i--)
m += cond_info_stat(cond_table[i - 1]) ? 1 : 0;
if (i < n)
fprintf(stderr,
" ... ... ... ... ... ... ... ||\n");
else
fprintf(stderr,
" ||\n");
fprintf(stderr,
" /|\n"
" State: x = dead, ! = inconsistent /\n"
" Use: R = used in realtime thread \n");
if (!track_rt)
fprintf(stderr,
"\n"
"mutrace: Note that the flags column R is only valid in --track-rt mode!\n");
} else
fprintf(stderr,
"\n"
"mutrace: No condition variable contended according to filtering parameters.\n");
free(cond_table);
for (u = 0; u < hash_size; u++)
unlock_hash_cond(u);
/* Footer. */
fprintf(stderr,
"\n"
"mutrace: Total runtime is %0.3f ms.\n", (double) t / 1000000.0);
n_cpus = sysconf(_SC_NPROCESSORS_ONLN);
assert(n_cpus >= 1);
if (n_cpus <= 1)
fprintf(stderr,
"\n"
"mutrace: WARNING: Results for uniprocessor machine. Results might be more interesting\n"
" when run on an SMP machine!\n");
else
fprintf(stderr,
"\n"
"mutrace: Results for SMP with %li processors.\n", n_cpus);
if (n_broken_mutexes > 0)
fprintf(stderr,
"\n"
"mutrace: WARNING: %u inconsistent mutex uses detected. Results might not be reliable.\n"
"mutrace: Fix your program first!\n", n_broken_mutexes);
if (n_broken_conds > 0)
fprintf(stderr,
"\n"
"mutrace: WARNING: %u inconsistent condition variable uses detected. Results might not be reliable.\n"
"mutrace: Fix your program first!\n", n_broken_conds);
if (n_collisions > 0)
fprintf(stderr,
"\n"
"mutrace: WARNING: %u internal hash collisions detected. Results might not be as reliable as they could be.\n"
"mutrace: Try to increase --hash-size=, which is currently at %u.\n", n_collisions, hash_size);
if (n_self_contended > 0)
fprintf(stderr,
"\n"
"mutrace: WARNING: %u internal mutex contention detected. Results might not be reliable as they could be.\n"
"mutrace: Try to increase --hash-size=, which is currently at %u.\n", n_self_contended, hash_size);
}
/* Print out the summary only the first time this is called. */
static void show_summary(void) {
static pthread_mutex_t summary_mutex = PTHREAD_MUTEX_INITIALIZER;
static bool shown_summary = false;
real_pthread_mutex_lock(&summary_mutex);
if (shown_summary)
goto finish;
show_summary_internal();
finish:
shown_summary = true;
real_pthread_mutex_unlock(&summary_mutex);
}
/* Print out the summary every time this is called. */
static void show_summary_again(void) {
static pthread_mutex_t summary_mutex = PTHREAD_MUTEX_INITIALIZER;
real_pthread_mutex_lock(&summary_mutex);
show_summary_internal();
real_pthread_mutex_unlock(&summary_mutex);
}
static void shutdown(void) {
show_summary();
}
void exit(int status) {
show_summary();
real_exit(status);
}
void _exit(int status) {
show_summary();
real_exit(status);
}
void _Exit(int status) {
show_summary();
real__Exit(status);
}
void sigusr1_cb(int sig) {
show_summary_again();
}
static bool is_realtime(void) {
int policy;
policy = sched_getscheduler(_gettid());
assert(policy >= 0);
policy &= ~SCHED_RESET_ON_FORK;
return
policy == SCHED_FIFO ||
policy == SCHED_RR;
}
static bool verify_frame(const char *s) {
/* Generated by glibc's native backtrace_symbols() on Fedora */
if (strstr(s, "/" SONAME "("))
return false;
/* Generated by glibc's native backtrace_symbols() on Debian */
if (strstr(s, "/" SONAME " ["))
return false;
/* Generated by backtrace-symbols.c */
if (strstr(s, __FILE__":"))
return false;
return true;
}
static int light_backtrace(void **buffer, int size) {
#if defined(__i386__) || defined(__x86_64__)
int osize = 0;
void *stackaddr;
size_t stacksize;
void *frame;
pthread_attr_t attr;
pthread_getattr_np(pthread_self(), &attr);
pthread_attr_getstack(&attr, &stackaddr, &stacksize);
pthread_attr_destroy(&attr);
#if defined(__i386__)
__asm__("mov %%ebp, %[frame]": [frame] "=r" (frame));
#elif defined(__x86_64__)
__asm__("mov %%rbp, %[frame]": [frame] "=r" (frame));
#endif
while (osize < size &&
frame >= stackaddr &&
frame < (void *)((char *)stackaddr + stacksize)) {
buffer[osize++] = *((void **)frame + 1);
frame = *(void **)frame;
}
return osize;
#else
return real_backtrace(buffer, size);
#endif
}
static struct stacktrace_info generate_stacktrace(void) {
struct stacktrace_info stacktrace;
stacktrace.frames = malloc(sizeof(void*) * frames_max);
assert(stacktrace.frames);
stacktrace.nb_frame = light_backtrace(stacktrace.frames, frames_max);
assert(stacktrace.nb_frame >= 0);
return stacktrace;
}
static char *stacktrace_to_string(struct stacktrace_info stacktrace) {
char **strings, *ret, *p;
int i;
size_t k;
bool b;
strings = real_backtrace_symbols(stacktrace.frames, stacktrace.nb_frame);
assert(strings);
k = 0;
for (i = 0; i < stacktrace.nb_frame; i++)
k += strlen(strings[i]) + 2;
ret = malloc(k + 1);
assert(ret);
b = false;
for (i = 0, p = ret; i < stacktrace.nb_frame; i++) {
if (!b && !verify_frame(strings[i]))
continue;
if (!b && i > 0) {
/* Skip all but the first stack frame of ours */
*(p++) = '\t';
strcpy(p, strings[i-1]);
p += strlen(strings[i-1]);
*(p++) = '\n';
}
b = true;
*(p++) = '\t';
strcpy(p, strings[i]);
p += strlen(strings[i]);
*(p++) = '\n';
}
*p = 0;
free(strings);
return ret;
}
static struct mutex_info *mutex_info_add(unsigned long u, pthread_mutex_t *mutex, int type, int protocol) {
struct mutex_info *mi;
/* Needs external locking */
if (alive_mutexes[u])
__sync_fetch_and_add(&n_collisions, 1);
mi = calloc(1, sizeof(struct mutex_info));
assert(mi);
mi->mutex = mutex;
mi->type = type;
mi->protocol = protocol;
mi->stacktrace = generate_stacktrace();
mi->next = alive_mutexes[u];
alive_mutexes[u] = mi;
return mi;
}
static void mutex_info_remove(unsigned u, pthread_mutex_t *mutex) {
struct mutex_info *mi, *p;
/* Needs external locking */
for (mi = alive_mutexes[u], p = NULL; mi; p = mi, mi = mi->next)
if (mi->mutex == mutex)
break;
if (!mi)
return;
if (p)
p->next = mi->next;
else
alive_mutexes[u] = mi->next;
mi->dead = true;
mi->next = dead_mutexes[u];
dead_mutexes[u] = mi;
}
static struct mutex_info *mutex_info_acquire(pthread_mutex_t *mutex) {
unsigned long u;
struct mutex_info *mi;
u = mutex_hash(mutex);
lock_hash_mutex(u);
for (mi = alive_mutexes[u]; mi; mi = mi->next)
if (mi->mutex == mutex)
return mi;
/* FIXME: We assume that static mutexes are NORMAL, which
* might not actually be correct */
return mutex_info_add(u, mutex, PTHREAD_MUTEX_NORMAL, PTHREAD_PRIO_NONE);
}
static void mutex_info_release(pthread_mutex_t *mutex) {
unsigned long u;
u = mutex_hash(mutex);
unlock_hash_mutex(u);
}
int pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *mutexattr) {
int r;
unsigned long u;
if (UNLIKELY(!initialized && recursive)) {
static const pthread_mutex_t template = PTHREAD_MUTEX_INITIALIZER;
/* Now this is incredibly ugly. */
memcpy(mutex, &template, sizeof(pthread_mutex_t));
return 0;
}
load_functions();
r = real_pthread_mutex_init(mutex, mutexattr);
if (r != 0)
return r;
if (LIKELY(initialized && !recursive)) {
int type = PTHREAD_MUTEX_NORMAL;
int protocol = PTHREAD_PRIO_NONE;
recursive = true;
u = mutex_hash(mutex);
lock_hash_mutex(u);
mutex_info_remove(u, mutex);
if (mutexattr) {
int k;
k = pthread_mutexattr_gettype(mutexattr, &type);
assert(k == 0);
k = pthread_mutexattr_getprotocol(mutexattr, &protocol);
assert(k == 0);
}
mutex_info_add(u, mutex, type, protocol);
unlock_hash_mutex(u);
recursive = false;
}
return r;
}
int pthread_mutex_destroy(pthread_mutex_t *mutex) {
unsigned long u;
assert(initialized || !recursive);
load_functions();
if (LIKELY(initialized && !recursive)) {
recursive = true;
u = mutex_hash(mutex);
lock_hash_mutex(u);
mutex_info_remove(u, mutex);
unlock_hash_mutex(u);
recursive = false;
}
return real_pthread_mutex_destroy(mutex);
}
static void mutex_lock(pthread_mutex_t *mutex, bool busy, uint64_t nsec_contended) {
struct mutex_info *mi;
pid_t tid;
if (UNLIKELY(!initialized || recursive))
return;
recursive = true;
mi = mutex_info_acquire(mutex);
if (mi->n_lock_level > 0 && mi->type != PTHREAD_MUTEX_RECURSIVE) {
__sync_fetch_and_add(&n_broken_mutexes, 1);
mi->broken = true;
if (raise_trap)
DEBUG_TRAP;
}
mi->n_lock_level++;
mi->n_locked[WRITE]++;
if (busy) {
mi->n_contended[WRITE]++;
mi->nsec_contended_total[WRITE] += nsec_contended;
}
tid = _gettid();
if (mi->last_owner != tid) {
if (mi->last_owner != 0)
mi->n_owner_changed++;
mi->last_owner = tid;
}
if (track_rt && !mi->realtime && is_realtime())
mi->realtime = true;
mi->nsec_timestamp[WRITE] = nsec_now();
mutex_info_release(mutex);
recursive = false;
}
int pthread_mutex_lock(pthread_mutex_t *mutex) {
int r;
bool busy;
uint64_t wait_time = 0;
if (UNLIKELY(!initialized && recursive)) {
/* During the initialization phase we might be called
* inside of dlsym(). Since we'd enter an endless loop
* if we tried to resolved the real
* pthread_mutex_lock() here then we simply fake the
* lock which should be safe since no thread can be
* running yet. */
assert(!threads_existing);
return 0;
}
load_functions();
r = real_pthread_mutex_trylock(mutex);
if (UNLIKELY(r != EBUSY && r != 0))
return r;
if (UNLIKELY((busy = (r == EBUSY)))) {
uint64_t start_time = nsec_now();
r = real_pthread_mutex_lock(mutex);
wait_time = nsec_now() - start_time;
if (UNLIKELY(r != 0))
return r;
}
mutex_lock(mutex, busy, wait_time);
return r;
}
int pthread_mutex_timedlock(pthread_mutex_t *mutex, const struct timespec *abstime) {
int r;
bool busy;
uint64_t wait_time = 0;
if (UNLIKELY(!initialized && recursive)) {
assert(!threads_existing);
return 0;
}
load_functions();
r = real_pthread_mutex_trylock(mutex);
if (UNLIKELY(r != EBUSY && r != 0))
return r;
if (UNLIKELY((busy = (r == EBUSY)))) {
uint64_t start_time = nsec_now();
r = real_pthread_mutex_timedlock(mutex, abstime);
wait_time = nsec_now() - start_time;
if (UNLIKELY(r == ETIMEDOUT))
busy = true;
else if (UNLIKELY(r != 0))
return r;
}
mutex_lock(mutex, busy, wait_time);
return r;
}
int pthread_mutex_trylock(pthread_mutex_t *mutex) {
int r;
if (UNLIKELY(!initialized && recursive)) {
assert(!threads_existing);
return 0;
}
load_functions();
r = real_pthread_mutex_trylock(mutex);
if (UNLIKELY(r != 0))
return r;
mutex_lock(mutex, false, 0);
return r;
}
static void mutex_unlock(pthread_mutex_t *mutex) {
struct mutex_info *mi;
uint64_t t;
if (UNLIKELY(!initialized || recursive))
return;
recursive = true;
mi = mutex_info_acquire(mutex);
if (mi->n_lock_level <= 0) {
__sync_fetch_and_add(&n_broken_mutexes, 1);
mi->broken = true;
if (raise_trap)
DEBUG_TRAP;
}
mi->n_lock_level--;
t = nsec_now() - mi->nsec_timestamp[WRITE];
mi->nsec_locked_total[WRITE] += t;
if (t > mi->nsec_locked_max[WRITE])
mi->nsec_locked_max[WRITE] = t;
mutex_info_release(mutex);
recursive = false;
}
int pthread_mutex_unlock(pthread_mutex_t *mutex) {
if (UNLIKELY(!initialized && recursive)) {
assert(!threads_existing);
return 0;
}
load_functions();
mutex_unlock(mutex);
return real_pthread_mutex_unlock(mutex);
}
static struct cond_info *cond_info_add(unsigned long u, pthread_cond_t *cond) {
struct cond_info *ci;
/* Needs external locking */
if (alive_conds[u])
__sync_fetch_and_add(&n_collisions, 1);
ci = calloc(1, sizeof(struct cond_info));
assert(ci);
ci->cond = cond;
ci->stacktrace = generate_stacktrace();
ci->next = alive_conds[u];
alive_conds[u] = ci;
return ci;
}
static void cond_info_remove(unsigned u, pthread_cond_t *cond) {
struct cond_info *ci, *p;
/* Needs external locking */
for (ci = alive_conds[u], p = NULL; ci; p = ci, ci = ci->next)
if (ci->cond == cond)
break;
if (!ci)
return;
if (p)
p->next = ci->next;
else
alive_conds[u] = ci->next;
ci->dead = true;
ci->next = dead_conds[u];
dead_conds[u] = ci;
}
static struct cond_info *cond_info_acquire(pthread_cond_t *cond) {
unsigned long u;
struct cond_info *ci;
u = cond_hash(cond);
lock_hash_cond(u);
for (ci = alive_conds[u]; ci; ci = ci->next)
if (ci->cond == cond)
return ci;
return cond_info_add(u, cond);
}
static void cond_info_release(pthread_cond_t *cond) {
unsigned long u;
u = cond_hash(cond);
unlock_hash_cond(u);
}
int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr) {
int r;
if (UNLIKELY(!initialized && recursive)) {
static const pthread_cond_t template = PTHREAD_COND_INITIALIZER;
/* Now this is incredibly ugly. */
memcpy(cond, &template, sizeof(pthread_cond_t));
return 0;
}
load_functions();
r = real_pthread_cond_init(cond, attr);
if (r != 0)
return r;
if (LIKELY(initialized && !recursive)) {
unsigned long u;
recursive = true;
u = cond_hash(cond);
lock_hash_cond(u);
cond_info_remove(u, cond);
cond_info_add(u, cond);
unlock_hash_cond(u);
recursive = false;
}
return r;
}
int pthread_cond_destroy(pthread_cond_t *cond) {
assert(initialized || !recursive);
load_functions();
if (LIKELY(initialized && !recursive)) {
unsigned long u;
recursive = true;
u = cond_hash(cond);
lock_hash_cond(u);
cond_info_remove(u, cond);
unlock_hash_cond(u);
recursive = false;
}
return real_pthread_cond_destroy(cond);
}
static void cond_signal(pthread_cond_t *cond, bool is_broadcast) {
struct cond_info *ci;
if (UNLIKELY(!initialized || recursive))
return;
recursive = true;
ci = cond_info_acquire(cond);
if (ci->n_wait_level == 0) {
ci->n_signal_contended++;
} else {
/* If we're signalling the second-last thread which is waiting
* on the cond, calculate the total contention time (from the
* time the second thread first started waiting on the cond) and
* add it to the total.
*
* However, if we're broadcasting, don't sum up the contention
* time, since we assume that the threads will all now have
* useful work to do. */
if (ci->n_wait_level == 2 && !is_broadcast) {
uint64_t contention_time = nsec_now() - ci->nsec_timestamp;
ci->nsec_wait_contended_total += contention_time;
if (contention_time > ci->nsec_wait_contended_max)
ci->nsec_wait_contended_max = contention_time;
}
}
if (is_broadcast)
ci->n_broadcast++;
else
ci->n_signal++;
if (track_rt && !ci->realtime && is_realtime())
ci->realtime = true;
cond_info_release(cond);
recursive = false;
}
int pthread_cond_signal(pthread_cond_t *cond) {
int r;
if (UNLIKELY(!initialized && recursive)) {
assert(!threads_existing);
return 0;
}
load_functions();
r = real_pthread_cond_signal(cond);
if (UNLIKELY(r != 0))
return r;
cond_signal(cond, false);
return r;
}
int pthread_cond_broadcast(pthread_cond_t *cond) {
int r;
if (UNLIKELY(!initialized && recursive)) {
assert(!threads_existing);
return 0;
}
load_functions();
r = real_pthread_cond_broadcast(cond);
if (UNLIKELY(r != 0))
return r;
cond_signal(cond, true);
return r;
}
static void cond_wait_start(pthread_cond_t *cond, pthread_mutex_t *mutex) {
struct cond_info *ci;
if (UNLIKELY(!initialized || recursive))
return;
recursive = true;
ci = cond_info_acquire(cond);
/* Check the cond is always used with the same mutex. pthreads
* technically allows for different mutexes to be used sequentially
* with a given cond, but this seems error prone and probably always
* A Bad Idea. */
if (ci->mutex == NULL) {
ci->mutex = mutex;
} else if (ci->mutex != mutex) {
__sync_fetch_and_add(&n_broken_conds, 1);
ci->broken = true;
if (raise_trap)
DEBUG_TRAP;
}
if (ci->n_wait_level > 0)
ci->n_wait_contended++;
/* Iff we're the second thread to concurrently wait on this cond, start
* the contention timer. This timer will continue running until the
* second-last thread to be waiting on the cond is signalled. */
if (ci->n_wait_level == 1)
ci->nsec_timestamp = nsec_now();
ci->n_wait_level++;
ci->n_wait++;
if (track_rt && !ci->realtime && is_realtime())
ci->realtime = true;
cond_info_release(cond);
recursive = false;
}
static void cond_wait_finish(pthread_cond_t *cond, pthread_mutex_t *mutex, uint64_t wait_time) {
struct cond_info *ci;
if (UNLIKELY(!initialized || recursive))
return;
recursive = true;
ci = cond_info_acquire(cond);
ci->nsec_wait_total += wait_time;
if (wait_time > ci->nsec_wait_max)
ci->nsec_wait_max = wait_time;
ci->n_wait_level--;
cond_info_release(cond);
recursive = false;
}
int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) {
int r;
uint64_t start_time, wait_time = 0;
assert(initialized || !recursive);
load_functions();
cond_wait_start(cond, mutex);
mutex_unlock(mutex);
start_time = nsec_now();
r = real_pthread_cond_wait(cond, mutex);
wait_time = nsec_now() - start_time;
/* Unfortunately we cannot distuingish mutex contention and
* the condition not being signalled here. */
mutex_lock(mutex, false, 0);
cond_wait_finish(cond, mutex, wait_time);
return r;
}
int pthread_cond_timedwait(pthread_cond_t *cond, pthread_mutex_t *mutex, const struct timespec *abstime) {
int r;
uint64_t start_time, wait_time = 0;
assert(initialized || !recursive);
load_functions();
cond_wait_start(cond, mutex);
mutex_unlock(mutex);
start_time = nsec_now();
r = real_pthread_cond_timedwait(cond, mutex, abstime);
wait_time = nsec_now() - start_time;
/* Unfortunately we cannot distuingish mutex contention and
* the condition not being signalled here. */
mutex_lock(mutex, false, 0);
cond_wait_finish(cond, mutex, wait_time);
return r;
}
int pthread_create(pthread_t *newthread,
const pthread_attr_t *attr,
void *(*start_routine) (void *),
void *arg) {
load_functions();
if (UNLIKELY(!threads_existing)) {
threads_existing = true;
setup();
}
return real_pthread_create(newthread, attr, start_routine, arg);
}
int backtrace(void **array, int size) {
int r;
load_functions();
/* backtrace() internally uses a mutex. To avoid an endless
* loop we need to disable ourselves so that we don't try to
* call backtrace() ourselves when looking at that lock. */
recursive = true;
r = real_backtrace(array, size);
recursive = false;
return r;
}
char **backtrace_symbols(void *const *array, int size) {
char **r;
load_functions();
recursive = true;
r = real_backtrace_symbols(array, size);
recursive = false;
return r;
}
void backtrace_symbols_fd(void *const *array, int size, int fd) {
load_functions();
recursive = true;
real_backtrace_symbols_fd(array, size, fd);
recursive = false;
}
static struct mutex_info *rwlock_info_add(unsigned long u, pthread_rwlock_t *rwlock, int kind) {
struct mutex_info *mi;
/* Needs external locking */
if (alive_mutexes[u])
__sync_fetch_and_add(&n_collisions, 1);
mi = calloc(1, sizeof(struct mutex_info));
assert(mi);
mi->rwlock = rwlock;
mi->kind = kind;
mi->stacktrace = generate_stacktrace();
mi->next = alive_mutexes[u];
alive_mutexes[u] = mi;
return mi;
}
static void rwlock_info_remove(unsigned u, pthread_rwlock_t *rwlock) {
struct mutex_info *mi, *p;
/* Needs external locking */
for (mi = alive_mutexes[u], p = NULL; mi; p = mi, mi = mi->next)
if (mi->rwlock == rwlock)
break;
if (!mi)
return;
if (p)
p->next = mi->next;
else
alive_mutexes[u] = mi->next;
mi->dead = true;
mi->next = dead_mutexes[u];
dead_mutexes[u] = mi;
}
static struct mutex_info *rwlock_info_acquire(pthread_rwlock_t *rwlock) {
unsigned long u;
struct mutex_info *mi;
u = rwlock_hash(rwlock);
lock_hash_mutex(u);
for (mi = alive_mutexes[u]; mi; mi = mi->next)
if (mi->rwlock == rwlock)
return mi;
/* FIXME: We assume that static mutexes are RWLOCK_DEFAULT,
* which might not actually be correct */
return rwlock_info_add(u, rwlock, PTHREAD_RWLOCK_DEFAULT_NP);
}
static void rwlock_info_release(pthread_rwlock_t *rwlock) {
unsigned long u;
u = rwlock_hash(rwlock);
unlock_hash_mutex(u);
}
int pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr) {
int r;
unsigned long u;
if (UNLIKELY(!initialized && recursive)) {
static const pthread_rwlock_t template = PTHREAD_RWLOCK_INITIALIZER;
/* Now this is incredibly ugly. */
memcpy(rwlock, &template, sizeof(pthread_rwlock_t));
return 0;
}
load_functions();
r = real_pthread_rwlock_init(rwlock, attr);
if (r != 0)
return r;
if (LIKELY(initialized && !recursive)) {
int kind = PTHREAD_RWLOCK_DEFAULT_NP;
recursive = true;
u = rwlock_hash(rwlock);
lock_hash_mutex(u);
rwlock_info_remove(u, rwlock);
if (attr) {
int k;
k = pthread_rwlockattr_getkind_np(attr, &kind);
assert(k == 0);
}
rwlock_info_add(u, rwlock, kind);
unlock_hash_mutex(u);
recursive = false;
}
return r;
}
int pthread_rwlock_destroy(pthread_rwlock_t *rwlock) {
unsigned long u;
assert(initialized || !recursive);
load_functions();
if (LIKELY(initialized && !recursive)) {
recursive = true;
u = rwlock_hash(rwlock);
lock_hash_mutex(u);
rwlock_info_remove(u, rwlock);
unlock_hash_mutex(u);
recursive = false;
}
return real_pthread_rwlock_destroy(rwlock);
}
static void rwlock_lock(pthread_rwlock_t *rwlock, LockType lock_type, bool busy, uint64_t nsec_contended) {
struct mutex_info *mi;
pid_t tid;
if (UNLIKELY(!initialized || recursive))
return;
recursive = true;
mi = rwlock_info_acquire(rwlock);
if (mi->n_lock_level > 0 && lock_type == WRITE) {
__sync_fetch_and_add(&n_broken_mutexes, 1);
mi->broken = true;
if (raise_trap)
DEBUG_TRAP;
}
mi->n_lock_level++;
mi->lock_type = lock_type;
mi->n_locked[lock_type]++;
if (busy) {
mi->n_contended[lock_type]++;
mi->nsec_contended_total[lock_type] += nsec_contended;
}
tid = _gettid();
if (mi->last_owner != tid) {
if (mi->last_owner != 0)
mi->n_owner_changed++;
mi->last_owner = tid;
}
if (track_rt && !mi->realtime && is_realtime())
mi->realtime = true;
/* Since multiple readers can hold the rwlock concurrently, we don't
* want subsequent readers overwriting our timestamp; or we'll record
* the wrong total locked time. Consequently, for read locks, we only
* handle nsec_timestamp when the lock level is transitioning 0 → 1
* or 1 → 0. */
if (lock_type != READ || mi->n_lock_level == 1)
mi->nsec_timestamp[lock_type] = nsec_now();
rwlock_info_release(rwlock);
recursive = false;
}
int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock) {
int r;
bool busy;
uint64_t wait_time = 0;
if (UNLIKELY(!initialized && recursive)) {
assert(!threads_existing);
return 0;
}
load_functions();
r = real_pthread_rwlock_tryrdlock(rwlock);
if (UNLIKELY(r != EBUSY && r != 0))
return r;
if (UNLIKELY((busy = (r == EBUSY)))) {
uint64_t start_time = nsec_now();
r = real_pthread_rwlock_rdlock(rwlock);
wait_time = nsec_now() - start_time;
if (UNLIKELY(r == ETIMEDOUT))
busy = true;
else if (UNLIKELY(r != 0))
return r;
}
rwlock_lock(rwlock, READ, busy, wait_time);
return r;
}
int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock) {
int r;
if (UNLIKELY(!initialized && recursive)) {
assert(!threads_existing);
return 0;
}
load_functions();
r = real_pthread_rwlock_tryrdlock(rwlock);
if (UNLIKELY(r != EBUSY && r != 0))
return r;
rwlock_lock(rwlock, READ, false, 0);
return r;
}
int pthread_rwlock_timedrdlock(pthread_rwlock_t *rwlock, const struct timespec *abstime) {
int r;
bool busy;
uint64_t wait_time = 0;
if (UNLIKELY(!initialized && recursive)) {
assert(!threads_existing);
return 0;
}
load_functions();
r = real_pthread_rwlock_tryrdlock(rwlock);
if (UNLIKELY(r != EBUSY && r != 0))
return r;
if (UNLIKELY((busy = (r == EBUSY)))) {
uint64_t start_time = nsec_now();
r = real_pthread_rwlock_timedrdlock(rwlock, abstime);
wait_time = nsec_now() - start_time;
if (UNLIKELY(r == ETIMEDOUT))
busy = true;
else if (UNLIKELY(r != 0))
return r;
}
rwlock_lock(rwlock, READ, busy, wait_time);
return r;
}
int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock) {
int r;
bool busy;
uint64_t wait_time = 0;
if (UNLIKELY(!initialized && recursive)) {
assert(!threads_existing);
return 0;
}
load_functions();
r = real_pthread_rwlock_trywrlock(rwlock);
if (UNLIKELY(r != EBUSY && r != 0))
return r;
if (UNLIKELY((busy = (r == EBUSY)))) {
uint64_t start_time = nsec_now();
r = real_pthread_rwlock_wrlock(rwlock);
wait_time = nsec_now() - start_time;
if (UNLIKELY(r == ETIMEDOUT))
busy = true;
else if (UNLIKELY(r != 0))
return r;
}
rwlock_lock(rwlock, WRITE, busy, wait_time);
return r;
}
int pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock) {
int r;
if (UNLIKELY(!initialized && recursive)) {
assert(!threads_existing);
return 0;
}
load_functions();
r = real_pthread_rwlock_trywrlock(rwlock);
if (UNLIKELY(r != EBUSY && r != 0))
return r;
rwlock_lock(rwlock, WRITE, false, 0);
return r;
}
int pthread_rwlock_timedwrlock(pthread_rwlock_t *rwlock, const struct timespec *abstime) {
int r;
bool busy;
uint64_t wait_time = 0;
if (UNLIKELY(!initialized && recursive)) {
assert(!threads_existing);
return 0;
}
load_functions();
r = real_pthread_rwlock_trywrlock(rwlock);
if (UNLIKELY(r != EBUSY && r != 0))
return r;
if (UNLIKELY((busy = (r == EBUSY)))) {
uint64_t start_time = nsec_now();
r = real_pthread_rwlock_timedwrlock(rwlock, abstime);
wait_time = nsec_now() - start_time;
if (UNLIKELY(r == ETIMEDOUT))
busy = true;
else if (UNLIKELY(r != 0))
return r;
}
rwlock_lock(rwlock, WRITE, busy, wait_time);
return r;
}
static void rwlock_unlock(pthread_rwlock_t *rwlock) {
struct mutex_info *mi;
uint64_t t;
LockType lock_type;
if (UNLIKELY(!initialized || recursive))
return;
recursive = true;
mi = rwlock_info_acquire(rwlock);
if (mi->n_lock_level <= 0) {
__sync_fetch_and_add(&n_broken_mutexes, 1);
mi->broken = true;
if (raise_trap)
DEBUG_TRAP;
}
mi->n_lock_level--;
lock_type = mi->lock_type;
/* See the note in rwlock_lock(). We don't want to count the locked time
* multiple times for concurrently-held read locks. */
if (lock_type != READ || mi->n_lock_level == 0) {
t = nsec_now() - mi->nsec_timestamp[lock_type];
mi->nsec_locked_total[lock_type] += t;
}
if (t > mi->nsec_locked_max[lock_type])
mi->nsec_locked_max[lock_type] = t;
rwlock_info_release(rwlock);
recursive = false;
}
int pthread_rwlock_unlock(pthread_rwlock_t *rwlock) {
if (UNLIKELY(!initialized && recursive)) {
assert(!threads_existing);
return 0;
}
load_functions();
rwlock_unlock(rwlock);
return real_pthread_rwlock_unlock(rwlock);
}