42 files changed, 2799 insertions, 780 deletions
diff --git a/src/pulsecore/core-util.c b/src/pulsecore/core-util.c
index e83563f9..63751f53 100644
--- a/src/pulsecore/core-util.c
+++ b/src/pulsecore/core-util.c
@@ -115,6 +115,7 @@
 #include <pulsecore/macro.h>
 #include <pulsecore/thread.h>
 #include <pulsecore/strbuf.h>
+#include <pulsecore/usergroup.h>
 
 #include "core-util.h"
 
@@ -969,54 +970,24 @@ fail:
 
 /* Check whether the specified GID and the group name match */
 static int is_group(gid_t gid, const char *name) {
-    struct group group, *result = NULL;
-    long n;
-    void *data;
+    struct group *group = NULL;
     int r = -1;
 
-#ifdef HAVE_GETGRGID_R
-#ifdef _SC_GETGR_R_SIZE_MAX
-    n = sysconf(_SC_GETGR_R_SIZE_MAX);
-#else
-    n = -1;
-#endif
-    if (n <= 0)
-        n = 512;
-
-    data = pa_xmalloc((size_t) n);
-
     errno = 0;
-    if (getgrgid_r(gid, &group, data, (size_t) n, &result) < 0 || !result) {
-        pa_log("getgrgid_r(%u): %s", (unsigned) gid, pa_cstrerror(errno));
-
+    if (!(group = pa_getgrgid_malloc(gid)))
+    {
         if (!errno)
             errno = ENOENT;
 
-        goto finish;
-    }
-
-    r = strcmp(name, result->gr_name) == 0;
-
-finish:
-    pa_xfree(data);
-#else
-    /* XXX Not thread-safe, but needed on OSes (e.g. FreeBSD 4.X) that do not
-     * support getgrgid_r. */
-
-    errno = 0;
-    if (!(result = getgrgid(gid))) {
-        pa_log("getgrgid(%u): %s", gid, pa_cstrerror(errno));
-
-        if (!errno)
-            errno = ENOENT;
+        pa_log("pa_getgrgid_malloc(%u): %s", gid, pa_cstrerror(errno));
 
         goto finish;
     }
 
-    r = strcmp(name, result->gr_name) == 0;
+    r = strcmp(name, group->gr_name) == 0;
 
 finish:
-#endif
+    pa_getgrgid_free(group);
 
     return r;
 }
@@ -1065,38 +1036,12 @@ finish:
 
 /* Check whether the specifc user id is a member of the specified group */
 int pa_uid_in_group(uid_t uid, const char *name) {
-    char *g_buf, *p_buf;
-    long g_n, p_n;
-    struct group grbuf, *gr;
+    struct group *group = NULL;
     char **i;
     int r = -1;
 
-#ifdef _SC_GETGR_R_SIZE_MAX
-    g_n = sysconf(_SC_GETGR_R_SIZE_MAX);
-#else
-    g_n = -1;
-#endif
-    if (g_n <= 0)
-        g_n = 512;
-
-    g_buf = pa_xmalloc((size_t) g_n);
-
-#ifdef _SC_GETPW_R_SIZE_MAX
-    p_n = sysconf(_SC_GETPW_R_SIZE_MAX);
-#else
-    p_n = -1;
-#endif
-    if (p_n <= 0)
-        p_n = 512;
-
-    p_buf = pa_xmalloc((size_t) p_n);
-
     errno = 0;
-#ifdef HAVE_GETGRNAM_R
-    if (getgrnam_r(name, &grbuf, g_buf, (size_t) g_n, &gr) != 0 || !gr)
-#else
-    if (!(gr = getgrnam(name)))
-#endif
+    if (!(group = pa_getgrnam_malloc(name)))
     {
         if (!errno)
             errno = ENOENT;
@@ -1104,25 +1049,24 @@ int pa_uid_in_group(uid_t uid, const char *name) {
     }
 
     r = 0;
-    for (i = gr->gr_mem; *i; i++) {
-        struct passwd pwbuf, *pw;
+    for (i = group->gr_mem; *i; i++) {
+        struct passwd *pw = NULL;
 
-#ifdef HAVE_GETPWNAM_R
-        if (getpwnam_r(*i, &pwbuf, p_buf, (size_t) p_n, &pw) != 0 || !pw)
-#else
-        if (!(pw = getpwnam(*i)))
-#endif
+        errno = 0;
+        if (!(pw = pa_getpwnam_malloc(*i)))
             continue;
 
-        if (pw->pw_uid == uid) {
+        if (pw->pw_uid == uid)
             r = 1;
+
+        pa_getpwnam_free(pw);
+
+        if (r == 1)
             break;
-        }
     }
 
 finish:
-    pa_xfree(g_buf);
-    pa_xfree(p_buf);
+    pa_getgrnam_free(group);
 
     return r;
 }
@@ -1130,26 +1074,10 @@ finish:
 /* Get the GID of a gfiven group, return (gid_t) -1 on failure. */
 gid_t pa_get_gid_of_group(const char *name) {
     gid_t ret = (gid_t) -1;
-    char *g_buf;
-    long g_n;
-    struct group grbuf, *gr;
-
-#ifdef _SC_GETGR_R_SIZE_MAX
-    g_n = sysconf(_SC_GETGR_R_SIZE_MAX);
-#else
-    g_n = -1;
-#endif
-    if (g_n <= 0)
-        g_n = 512;
-
-    g_buf = pa_xmalloc((size_t) g_n);
+    struct group *gr = NULL;
 
     errno = 0;
-#ifdef HAVE_GETGRNAM_R
-    if (getgrnam_r(name, &grbuf, g_buf, (size_t) g_n, &gr) != 0 || !gr)
-#else
-    if (!(gr = getgrnam(name)))
-#endif
+    if (!(gr = pa_getgrnam_malloc(name)))
     {
         if (!errno)
             errno = ENOENT;
@@ -1159,7 +1087,7 @@ gid_t pa_get_gid_of_group(const char *name) {
     ret = gr->gr_gid;
 
 finish:
-    pa_xfree(g_buf);
+    pa_getgrnam_free(gr);
     return ret;
 }
 
@@ -2295,7 +2223,7 @@ int pa_close_all(int except_fd, ...) {
     va_end(ap);
 
     r = pa_close_allv(p);
-    free(p);
+    pa_xfree(p);
 
     return r;
 }
@@ -2890,3 +2818,22 @@ void pa_reset_personality(void) {
 #endif
 
 }
+
+#if defined(__linux__) && !defined(__OPTIMIZE__)
+
+pa_bool_t pa_run_from_build_tree(void) {
+    char *rp;
+    pa_bool_t b = FALSE;
+
+    /* We abuse __OPTIMIZE__ as a check whether we are a debug build
+     * or not. */
+
+    if ((rp = pa_readlink("/proc/self/exe"))) {
+        b = pa_startswith(rp, PA_BUILDDIR);
+        pa_xfree(rp);
+    }
+
+    return b;
+}
+
+#endif
diff --git a/src/pulsecore/core-util.h b/src/pulsecore/core-util.h
index 3db55106..8c13b535 100644
--- a/src/pulsecore/core-util.h
+++ b/src/pulsecore/core-util.h
@@ -250,4 +250,8 @@ size_t pa_pipe_buf(int fd);
 
 void pa_reset_personality(void);
 
+#if defined(__linux__) && !defined(__OPTIMIZE__)
+pa_bool_t pa_run_from_build_tree(void);
+#endif
+
 #endif
diff --git a/src/pulsecore/core.c b/src/pulsecore/core.c
index f5eb8352..f0726453 100644
--- a/src/pulsecore/core.c
+++ b/src/pulsecore/core.c
@@ -47,7 +47,7 @@
 
 #include "core.h"
 
-static PA_DEFINE_CHECK_TYPE(pa_core, pa_msgobject);
+PA_DEFINE_PUBLIC_CLASS(pa_core, pa_msgobject);
 
 static int core_process_msg(pa_msgobject *o, int code, void *userdata, int64_t offset, pa_memchunk *chunk) {
     pa_core *c = PA_CORE(o);
diff --git a/src/pulsecore/core.h b/src/pulsecore/core.h
index fc4eabfa..bfcea4f6 100644
--- a/src/pulsecore/core.h
+++ b/src/pulsecore/core.h
@@ -174,7 +174,7 @@ struct pa_core {
     pa_hook hooks[PA_CORE_HOOK_MAX];
 };
 
-PA_DECLARE_CLASS(pa_core);
+PA_DECLARE_PUBLIC_CLASS(pa_core);
 #define PA_CORE(o) pa_core_cast(o)
 
 enum {
diff --git a/src/pulsecore/cpu-arm.c b/src/pulsecore/cpu-arm.c
new file mode 100644
index 00000000..453b7848
--- /dev/null
+++ b/src/pulsecore/cpu-arm.c
@@ -0,0 +1,139 @@
+/***
+  This file is part of PulseAudio.
+
+  Copyright 2004-2006 Lennart Poettering
+  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk>
+
+  PulseAudio is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License as published
+  by the Free Software Foundation; either version 2.1 of the License,
+  or (at your option) any later version.
+
+  PulseAudio is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with PulseAudio; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+  USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include <pulse/xmalloc.h>
+#include <pulsecore/log.h>
+
+#include "cpu-arm.h"
+
+#if defined (__arm__) && defined (__linux__)
+
+#define MAX_BUFFER  4096
+static char *
+get_cpuinfo_line (char *cpuinfo, const char *tag) {
+    char *line, *end, *colon;
+
+    if (!(line = strstr (cpuinfo, tag)))
+        return NULL;
+
+    if (!(end = strchr (line, '\n')))
+        return NULL;
+
+    if (!(colon = strchr (line, ':')))
+        return NULL;
+
+    if (++colon >= end)
+        return NULL;
+
+    return pa_xstrndup (colon, end - colon);
+}
+
+static char *get_cpuinfo(void) {
+    char *cpuinfo;
+    int n, fd;
+
+    cpuinfo = pa_xmalloc(MAX_BUFFER);
+
+    if ((fd = open("/proc/cpuinfo", O_RDONLY)) < 0) {
+        pa_xfree(cpuinfo);
+        return NULL;
+    }
+
+    if ((n = pa_read(fd, cpuinfo, MAX_BUFFER-1)) < 0) {
+        pa_xfree(cpuinfo);
+        pa_close(fd);
+        return NULL;
+    }
+    cpuinfo[n] = 0;
+    pa_close(fd);
+
+    return cpuinfo;
+}
+#endif /* defined (__arm__) && defined (__linux__) */
+
+void pa_cpu_init_arm (void) {
+#if defined (__arm__)
+#if defined (__linux__)
+    char *cpuinfo, *line;
+    int arch;
+    pa_cpu_arm_flag_t flags = 0;
+
+    /* We need to read the CPU flags from /proc/cpuinfo because there is no user
+     * space support to get the CPU features. This only works on linux AFAIK. */
+    if (!(cpuinfo = get_cpuinfo ())) {
+        pa_log ("Can't read cpuinfo");
+        return;
+    }
+
+    /* get the CPU architecture */
+    if ((line = get_cpuinfo_line (cpuinfo, "CPU architecture"))) {
+        arch = strtoul (line, NULL, 0);
+        if (arch >= 6)
+            flags |= PA_CPU_ARM_V6;
+        if (arch >= 7)
+            flags |= PA_CPU_ARM_V7;
+
+        pa_xfree(line);
+    }
+    /* get the CPU features */
+    if ((line = get_cpuinfo_line (cpuinfo, "Features"))) {
+        char *state = NULL, *current;
+
+        while ((current = pa_split_spaces (line, &state))) {
+            if (!strcmp (current, "vfp"))
+                flags |= PA_CPU_ARM_VFP;
+            else if (!strcmp (current, "edsp"))
+                flags |= PA_CPU_ARM_EDSP;
+            else if (!strcmp (current, "neon"))
+                flags |= PA_CPU_ARM_NEON;
+            else if (!strcmp (current, "vfpv3"))
+                flags |= PA_CPU_ARM_VFPV3;
+
+            pa_xfree(current);
+        }
+    }
+    pa_xfree(cpuinfo);
+
+    pa_log_info ("CPU flags: %s%s%s%s%s%s",
+          (flags & PA_CPU_ARM_V6) ? "V6 " : "",
+          (flags & PA_CPU_ARM_V7) ? "V7 " : "",
+          (flags & PA_CPU_ARM_VFP) ? "VFP " : "",
+          (flags & PA_CPU_ARM_EDSP) ? "EDSP " : "",
+          (flags & PA_CPU_ARM_NEON) ? "NEON " : "",
+          (flags & PA_CPU_ARM_VFPV3) ? "VFPV3 " : "");
+#else /* defined (__linux__) */
+    pa_log ("ARM cpu features not yet supported on this OS");
+#endif /* defined (__linux__) */
+
+    if (flags & PA_CPU_ARM_V6)
+        pa_volume_func_init_arm (flags);
+#endif /* defined (__arm__) */
+}
diff --git a/src/pulsecore/cpu-arm.h b/src/pulsecore/cpu-arm.h
new file mode 100644
index 00000000..a87cb63b
--- /dev/null
+++ b/src/pulsecore/cpu-arm.h
@@ -0,0 +1,42 @@
+#ifndef foocpuarmhfoo
+#define foocpuarmhfoo
+
+/***
+  This file is part of PulseAudio.
+
+  Copyright 2004-2006 Lennart Poettering
+  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk>
+
+  PulseAudio is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License as published
+  by the Free Software Foundation; either version 2.1 of the License,
+  or (at your option) any later version.
+
+  PulseAudio is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with PulseAudio; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+  USA.
+***/
+
+#include <stdint.h>
+
+typedef enum pa_cpu_arm_flag {
+    PA_CPU_ARM_V6       = (1 << 0),
+    PA_CPU_ARM_V7       = (1 << 1),
+    PA_CPU_ARM_VFP      = (1 << 2),
+    PA_CPU_ARM_EDSP     = (1 << 3),
+    PA_CPU_ARM_NEON     = (1 << 4),
+    PA_CPU_ARM_VFPV3    = (1 << 5)
+} pa_cpu_arm_flag_t;
+
+void pa_cpu_init_arm (void);
+
+/* some optimized functions */
+void pa_volume_func_init_arm(pa_cpu_arm_flag_t flags);
+
+#endif /* foocpuarmhfoo */
diff --git a/src/pulsecore/cpu-x86.c b/src/pulsecore/cpu-x86.c
new file mode 100644
index 00000000..bc093ec0
--- /dev/null
+++ b/src/pulsecore/cpu-x86.c
@@ -0,0 +1,122 @@
+/***
+  This file is part of PulseAudio.
+
+  Copyright 2004-2006 Lennart Poettering
+  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk>
+
+  PulseAudio is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License as published
+  by the Free Software Foundation; either version 2.1 of the License,
+  or (at your option) any later version.
+
+  PulseAudio is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with PulseAudio; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+  USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdint.h>
+
+#include <pulsecore/log.h>
+
+#include "cpu-x86.h"
+
+#if defined (__i386__) || defined (__amd64__)
+static void
+get_cpuid (uint32_t op, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d)
+{
+    __asm__ __volatile__ (
+        "  push %%"PA_REG_b"   \n\t"
+        "  cpuid               \n\t"
+        "  mov %%ebx, %%esi    \n\t"
+        "  pop %%"PA_REG_b"    \n\t"
+
+        : "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d)
+        : "0" (op)
+    );
+}
+#endif
+
+void pa_cpu_init_x86 (void) {
+#if defined (__i386__) || defined (__amd64__)
+    uint32_t eax, ebx, ecx, edx;
+    uint32_t level;
+    pa_cpu_x86_flag_t flags = 0;
+
+    /* get standard level */
+    get_cpuid (0x00000000, &level, &ebx, &ecx, &edx);
+    if (level >= 1) {
+        get_cpuid (0x00000001, &eax, &ebx, &ecx, &edx);
+
+        if (edx & (1<<23))
+          flags |= PA_CPU_X86_MMX;
+
+        if (edx & (1<<25))
+          flags |= PA_CPU_X86_SSE;
+
+        if (edx & (1<<26))
+          flags |= PA_CPU_X86_SSE2;
+
+        if (ecx & (1<<0))
+          flags |= PA_CPU_X86_SSE3;
+
+        if (ecx & (1<<9))
+          flags |= PA_CPU_X86_SSSE3;
+
+        if (ecx & (1<<19))
+          flags |= PA_CPU_X86_SSE4_1;
+
+        if (ecx & (1<<20))
+          flags |= PA_CPU_X86_SSE4_2;
+    }
+
+    /* get extended level */
+    get_cpuid (0x80000000, &level, &ebx, &ecx, &edx);
+    if (level >= 0x80000001) {
+        get_cpuid (0x80000001, &eax, &ebx, &ecx, &edx);
+
+        if (edx & (1<<22))
+          flags |= PA_CPU_X86_MMXEXT;
+
+        if (edx & (1<<23))
+          flags |= PA_CPU_X86_MMX;
+
+        if (edx & (1<<30))
+          flags |= PA_CPU_X86_3DNOWEXT;
+
+        if (edx & (1<<31))
+          flags |= PA_CPU_X86_3DNOW;
+    }
+
+    pa_log_info ("CPU flags: %s%s%s%s%s%s%s%s%s%s",
+    (flags & PA_CPU_X86_MMX) ? "MMX " : "",
+    (flags & PA_CPU_X86_SSE) ? "SSE " : "",
+    (flags & PA_CPU_X86_SSE2) ? "SSE2 " : "",
+    (flags & PA_CPU_X86_SSE3) ? "SSE3 " : "",
+    (flags & PA_CPU_X86_SSSE3) ? "SSSE3 " : "",
+    (flags & PA_CPU_X86_SSE4_1) ? "SSE4_1 " : "",
+    (flags & PA_CPU_X86_SSE4_2) ? "SSE4_2 " : "",
+    (flags & PA_CPU_X86_MMXEXT) ? "MMXEXT " : "",
+    (flags & PA_CPU_X86_3DNOW) ? "3DNOW " : "",
+    (flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : "");
+
+    /* activate various optimisations */
+    if (flags & PA_CPU_X86_MMX) {
+        pa_volume_func_init_mmx (flags);
+        pa_remap_func_init_mmx (flags);
+    }
+
+    if (flags & PA_CPU_X86_SSE)
+        pa_volume_func_init_sse (flags);
+
+#endif /* defined (__i386__) || defined (__amd64__) */
+}
diff --git a/src/pulsecore/cpu-x86.h b/src/pulsecore/cpu-x86.h
new file mode 100644
index 00000000..f3f9e56e
--- /dev/null
+++ b/src/pulsecore/cpu-x86.h
@@ -0,0 +1,68 @@
+#ifndef foocpux86hfoo
+#define foocpux86hfoo
+
+/***
+  This file is part of PulseAudio.
+
+  Copyright 2004-2006 Lennart Poettering
+  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk>
+
+  PulseAudio is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License as published
+  by the Free Software Foundation; either version 2.1 of the License,
+  or (at your option) any later version.
+
+  PulseAudio is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with PulseAudio; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+  USA.
+***/
+
+#include <stdint.h>
+
+typedef enum pa_cpu_x86_flag {
+    PA_CPU_X86_MMX       = (1 << 0),
+    PA_CPU_X86_MMXEXT    = (1 << 1),
+    PA_CPU_X86_SSE       = (1 << 2),
+    PA_CPU_X86_SSE2      = (1 << 3),
+    PA_CPU_X86_SSE3      = (1 << 4),
+    PA_CPU_X86_SSSE3     = (1 << 5),
+    PA_CPU_X86_SSE4_1    = (1 << 6),
+    PA_CPU_X86_SSE4_2    = (1 << 7),
+    PA_CPU_X86_3DNOW     = (1 << 8),
+    PA_CPU_X86_3DNOWEXT  = (1 << 9)
+} pa_cpu_x86_flag_t;
+
+void pa_cpu_init_x86 (void);
+
+
+#if defined (__i386__)
+typedef int32_t pa_reg_x86;
+#define PA_REG_a "eax"
+#define PA_REG_b "ebx"
+#define PA_REG_c "ecx"
+#define PA_REG_d "edx"
+#define PA_REG_D "edi"
+#define PA_REG_S "esi"
+#elif defined (__amd64__)
+typedef int64_t pa_reg_x86;
+#define PA_REG_a "rax"
+#define PA_REG_b "rbx"
+#define PA_REG_c "rcx"
+#define PA_REG_d "rdx"
+#define PA_REG_D "rdi"
+#define PA_REG_S "rsi"
+#endif
+
+/* some optimized functions */
+void pa_volume_func_init_mmx(pa_cpu_x86_flag_t flags);
+void pa_volume_func_init_sse(pa_cpu_x86_flag_t flags);
+
+void pa_remap_func_init_mmx(pa_cpu_x86_flag_t flags);
+
+#endif /* foocpux86hfoo */
diff --git a/src/pulsecore/msgobject.c b/src/pulsecore/msgobject.c
index 6a2a612d..075a28c5 100644
--- a/src/pulsecore/msgobject.c
+++ b/src/pulsecore/msgobject.c
@@ -26,22 +26,22 @@
 
 #include "msgobject.h"
 
-PA_DEFINE_CHECK_TYPE(pa_msgobject, pa_object);
+PA_DEFINE_PUBLIC_CLASS(pa_msgobject, pa_object);
 
-pa_msgobject *pa_msgobject_new_internal(size_t size, const char *type_name, int (*check_type)(const char *type_name)) {
+pa_msgobject *pa_msgobject_new_internal(size_t size, const char *type_id, pa_bool_t (*check_type)(const char *type_name)) {
     pa_msgobject *o;
 
     pa_assert(size > sizeof(pa_msgobject));
-    pa_assert(type_name);
+    pa_assert(type_id);
 
     if (!check_type)
         check_type = pa_msgobject_check_type;
 
-    pa_assert(check_type(type_name));
-    pa_assert(check_type("pa_object"));
-    pa_assert(check_type("pa_msgobject"));
+    pa_assert(check_type(type_id));
+    pa_assert(check_type(pa_object_type_id));
+    pa_assert(check_type(pa_msgobject_type_id));
 
-    o = PA_MSGOBJECT(pa_object_new_internal(size, type_name, check_type));
+    o = PA_MSGOBJECT(pa_object_new_internal(size, type_id, check_type));
     o->process_msg = NULL;
     return o;
 }
diff --git a/src/pulsecore/msgobject.h b/src/pulsecore/msgobject.h
index a35a23b5..ee0ec1ed 100644
--- a/src/pulsecore/msgobject.h
+++ b/src/pulsecore/msgobject.h
@@ -38,15 +38,13 @@ struct pa_msgobject {
     int (*process_msg)(pa_msgobject *o, int code, void *userdata, int64_t offset, pa_memchunk *chunk);
 };
 
-pa_msgobject *pa_msgobject_new_internal(size_t size, const char *type_name, int (*check_type)(const char *type_name));
+pa_msgobject *pa_msgobject_new_internal(size_t size, const char *type_id, pa_bool_t (*check_type)(const char *type_name));
 
-int pa_msgobject_check_type(const char *type);
-
-#define pa_msgobject_new(type) ((type*) pa_msgobject_new_internal(sizeof(type), #type, type##_check_type))
+#define pa_msgobject_new(type) ((type*) pa_msgobject_new_internal(sizeof(type), type##_type_id, type##_check_type))
 #define pa_msgobject_free ((void (*) (pa_msgobject* o)) pa_object_free)
 
 #define PA_MSGOBJECT(o) pa_msgobject_cast(o)
 
-PA_DECLARE_CLASS(pa_msgobject);
+PA_DECLARE_PUBLIC_CLASS(pa_msgobject);
 
 #endif
diff --git a/src/pulsecore/object.c b/src/pulsecore/object.c
index f3ead9c5..099d50d9 100644
--- a/src/pulsecore/object.c
+++ b/src/pulsecore/object.c
@@ -28,21 +28,23 @@
 
 #include "object.h"
 
-pa_object *pa_object_new_internal(size_t size, const char *type_name, int (*check_type)(const char *type_name)) {
+const char pa_object_type_id[] = "pa_object";
+
+pa_object *pa_object_new_internal(size_t size, const char *type_id, pa_bool_t (*check_type)(const char *type_id)) {
     pa_object *o;
 
     pa_assert(size > sizeof(pa_object));
-    pa_assert(type_name);
+    pa_assert(type_id);
 
     if (!check_type)
         check_type = pa_object_check_type;
 
-    pa_assert(check_type(type_name));
-    pa_assert(check_type("pa_object"));
+    pa_assert(check_type(type_id));
+    pa_assert(check_type(pa_object_type_id));
 
     o = pa_xmalloc(size);
     PA_REFCNT_INIT(o);
-    o->type_name = type_name;
+    o->type_id = type_id;
     o->free = pa_object_free;
     o->check_type = check_type;
 
@@ -65,8 +67,8 @@ void pa_object_unref(pa_object *o) {
     }
 }
 
-int pa_object_check_type(const char *type_name) {
-    pa_assert(type_name);
+pa_bool_t pa_object_check_type(const char *type_id) {
+    pa_assert(type_id);
 
-    return pa_streq(type_name, "pa_object");
+    return type_id == pa_object_type_id;
 }
diff --git a/src/pulsecore/object.h b/src/pulsecore/object.h
index 43e79327..4c120cd5 100644
--- a/src/pulsecore/object.h
+++ b/src/pulsecore/object.h
@@ -34,21 +34,23 @@ typedef struct pa_object pa_object;
 
 struct pa_object {
     PA_REFCNT_DECLARE;
-    const char *type_name;
+    const char *type_id;
     void (*free)(pa_object *o);
-    int (*check_type)(const char *type_name);
+    pa_bool_t (*check_type)(const char *type_name);
 };
 
-pa_object *pa_object_new_internal(size_t size, const char *type_name, int (*check_type)(const char *type_name));
-#define pa_object_new(type) ((type*) pa_object_new_internal(sizeof(type), #type, type##_check_type)
+pa_object *pa_object_new_internal(size_t size, const char *type_id, pa_bool_t (*check_type)(const char *type_id));
+#define pa_object_new(type) ((type*) pa_object_new_internal(sizeof(type), type##_type_id, type##_check_type)
 
 #define pa_object_free ((void (*) (pa_object* _obj)) pa_xfree)
 
-int pa_object_check_type(const char *type);
+pa_bool_t pa_object_check_type(const char *type_id);
 
-static inline int pa_object_isinstance(void *o) {
+extern const char pa_object_type_id[];
+
+static inline pa_bool_t pa_object_isinstance(void *o) {
     pa_object *obj = (pa_object*) o;
-    return obj ? obj->check_type("pa_object") : 0;
+    return obj ? obj->check_type(pa_object_type_id) : TRUE;
 }
 
 pa_object *pa_object_ref(pa_object *o);
@@ -60,7 +62,7 @@ static inline int pa_object_refcnt(pa_object *o) {
 
 static inline pa_object* pa_object_cast(void *o) {
     pa_object *obj = (pa_object*) o;
-    pa_assert(!obj || obj->check_type("pa_object"));
+    pa_assert(!obj || obj->check_type(pa_object_type_id));
     return obj;
 }
 
@@ -68,10 +70,10 @@ static inline pa_object* pa_object_cast(void *o) {
 
 #define PA_OBJECT(o) pa_object_cast(o)
 
-#define PA_DECLARE_CLASS(c)                                             \
-    static inline int c##_isinstance(void *o) {                         \
+#define PA_DECLARE_CLASS_COMMON(c)                                      \
+    static inline pa_bool_t c##_isinstance(void *o) {                   \
         pa_object *obj = (pa_object*) o;                                \
-        return obj ? obj->check_type(#c) : 1;                           \
+        return obj ? obj->check_type(c##_type_id) : TRUE;               \
     }                                                                   \
     static inline c* c##_cast(void *o) {                                \
         pa_assert(c##_isinstance(o));                                   \
@@ -91,12 +93,27 @@ static inline pa_object* pa_object_cast(void *o) {
     }                                                                   \
     struct __stupid_useless_struct_to_allow_trailing_semicolon
 
-#define PA_DEFINE_CHECK_TYPE(c, parent)                                 \
-    int c##_check_type(const char *type) {                              \
-        pa_assert(type);                                                \
-        if (strcmp(type, #c) == 0)                                      \
-            return 1;                                                   \
-        return parent##_check_type(type);                               \
+#define PA_DECLARE_PUBLIC_CLASS(c)                                      \
+    extern const char c##_type_id[];                                    \
+    PA_DECLARE_CLASS_COMMON(c);                                         \
+    pa_bool_t c##_check_type(const char *type_id)
+
+#define PA_DEFINE_PUBLIC_CLASS(c, parent)                               \
+    const char c##_type_id[] = #c;                                      \
+    pa_bool_t c##_check_type(const char *type_id) {                     \
+        if (type_id == c##_type_id)                                     \
+            return TRUE;                                                \
+        return parent##_check_type(type_id);                            \
+    }                                                                   \
+    struct __stupid_useless_struct_to_allow_trailing_semicolon
+
+#define PA_DEFINE_PRIVATE_CLASS(c, parent)                              \
+    static const char c##_type_id[] = #c;                               \
+    PA_DECLARE_CLASS_COMMON(c);                                         \
+    static pa_bool_t c##_check_type(const char *type_id) {              \
+        if (type_id == c##_type_id)                                     \
+            return TRUE;                                                \
+        return parent##_check_type(type_id);                            \
     }                                                                   \
     struct __stupid_useless_struct_to_allow_trailing_semicolon
 
diff --git a/src/pulsecore/play-memblockq.c b/src/pulsecore/play-memblockq.c
index fceb2ca1..b0d76993 100644
--- a/src/pulsecore/play-memblockq.c
+++ b/src/pulsecore/play-memblockq.c
@@ -47,9 +47,8 @@ enum {
     MEMBLOCKQ_STREAM_MESSAGE_UNLINK,
 };
 
-PA_DECLARE_CLASS(memblockq_stream);
+PA_DEFINE_PRIVATE_CLASS(memblockq_stream, pa_msgobject);
 #define MEMBLOCKQ_STREAM(o) (memblockq_stream_cast(o))
-static PA_DEFINE_CHECK_TYPE(memblockq_stream, pa_msgobject);
 
 static void memblockq_stream_unlink(memblockq_stream *u) {
     pa_assert(u);
diff --git a/src/pulsecore/protocol-esound.c b/src/pulsecore/protocol-esound.c
index f64552aa..cfbaee6f 100644
--- a/src/pulsecore/protocol-esound.c
+++ b/src/pulsecore/protocol-esound.c
@@ -120,9 +120,8 @@ typedef struct connection {
     pa_time_event *auth_timeout_event;
 } connection;
 
-PA_DECLARE_CLASS(connection);
+PA_DEFINE_PRIVATE_CLASS(connection, pa_msgobject);
 #define CONNECTION(o) (connection_cast(o))
-static PA_DEFINE_CHECK_TYPE(connection, pa_msgobject);
 
 struct pa_esound_protocol {
     PA_REFCNT_DECLARE;
diff --git a/src/pulsecore/protocol-native.c b/src/pulsecore/protocol-native.c
index b1285e15..6678d847 100644
--- a/src/pulsecore/protocol-native.c
+++ b/src/pulsecore/protocol-native.c
@@ -98,17 +98,15 @@ typedef struct record_stream {
     pa_usec_t current_source_latency;
 } record_stream;
 
-PA_DECLARE_CLASS(record_stream);
 #define RECORD_STREAM(o) (record_stream_cast(o))
-static PA_DEFINE_CHECK_TYPE(record_stream, pa_msgobject);
+PA_DEFINE_PRIVATE_CLASS(record_stream, pa_msgobject);
 
 typedef struct output_stream {
     pa_msgobject parent;
 } output_stream;
 
-PA_DECLARE_CLASS(output_stream);
 #define OUTPUT_STREAM(o) (output_stream_cast(o))
-static PA_DEFINE_CHECK_TYPE(output_stream, pa_msgobject);
+PA_DEFINE_PRIVATE_CLASS(output_stream, pa_msgobject);
 
 typedef struct playback_stream {
     output_stream parent;
@@ -138,9 +136,8 @@ typedef struct playback_stream {
     uint64_t playing_for, underrun_for;
 } playback_stream;
 
-PA_DECLARE_CLASS(playback_stream);
 #define PLAYBACK_STREAM(o) (playback_stream_cast(o))
-static PA_DEFINE_CHECK_TYPE(playback_stream, output_stream);
+PA_DEFINE_PRIVATE_CLASS(playback_stream, output_stream);
 
 typedef struct upload_stream {
     output_stream parent;
@@ -156,9 +153,8 @@ typedef struct upload_stream {
     pa_proplist *proplist;
 } upload_stream;
 
-PA_DECLARE_CLASS(upload_stream);
 #define UPLOAD_STREAM(o) (upload_stream_cast(o))
-static PA_DEFINE_CHECK_TYPE(upload_stream, output_stream);
+PA_DEFINE_PRIVATE_CLASS(upload_stream, output_stream);
 
 struct pa_native_connection {
     pa_msgobject parent;
@@ -176,9 +172,8 @@ struct pa_native_connection {
     pa_time_event *auth_timeout_event;
 };
 
-PA_DECLARE_CLASS(pa_native_connection);
 #define PA_NATIVE_CONNECTION(o) (pa_native_connection_cast(o))
-static PA_DEFINE_CHECK_TYPE(pa_native_connection, pa_msgobject);
+PA_DEFINE_PRIVATE_CLASS(pa_native_connection, pa_msgobject);
 
 struct pa_native_protocol {
     PA_REFCNT_DECLARE;
diff --git a/src/pulsecore/protocol-simple.c b/src/pulsecore/protocol-simple.c
index 776d74b6..95ec6ac8 100644
--- a/src/pulsecore/protocol-simple.c
+++ b/src/pulsecore/protocol-simple.c
@@ -69,9 +69,8 @@ typedef struct connection {
     } playback;
 } connection;
 
-PA_DECLARE_CLASS(connection);
+PA_DEFINE_PRIVATE_CLASS(connection, pa_msgobject);
 #define CONNECTION(o) (connection_cast(o))
-static PA_DEFINE_CHECK_TYPE(connection, pa_msgobject);
 
 struct pa_simple_protocol {
     PA_REFCNT_DECLARE;
diff --git a/src/pulsecore/remap.c b/src/pulsecore/remap.c
new file mode 100644
index 00000000..a0fc85b9
--- /dev/null
+++ b/src/pulsecore/remap.c
@@ -0,0 +1,204 @@
+/***
+  This file is part of PulseAudio.
+
+  Copyright 2004-2006 Lennart Poettering
+  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk.com>
+
+  PulseAudio is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License as published
+  by the Free Software Foundation; either version 2.1 of the License,
+  or (at your option) any later version.
+
+  PulseAudio is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with PulseAudio; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+  USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <string.h>
+
+#include <pulse/sample.h>
+#include <pulsecore/log.h>
+#include <pulsecore/macro.h>
+
+#include "remap.h"
+
+static void remap_mono_to_stereo_c (pa_remap_t *m, void *dst, const void *src, unsigned n) {
+    unsigned i;
+
+    switch (*m->format) {
+        case PA_SAMPLE_FLOAT32NE:
+        {
+            float *d, *s;
+
+            d = (float *) dst;
+            s = (float *) src;
+
+            for (i = n >> 2; i; i--) {
+                d[0] = d[1] = s[0];
+                d[2] = d[3] = s[1];
+                d[4] = d[5] = s[2];
+                d[6] = d[7] = s[3];
+                s += 4;
+                d += 8;
+            }
+            for (i = n & 3; i; i--) {
+                d[0] = d[1] = s[0];
+                s++;
+                d += 2;
+            }
+            break;
+        }
+        case PA_SAMPLE_S16NE:
+        {
+            int16_t *d, *s;
+
+            d = (int16_t *) dst;
+            s = (int16_t *) src;
+
+            for (i = n >> 2; i; i--) {
+                d[0] = d[1] = s[0];
+                d[2] = d[3] = s[1];
+                d[4] = d[5] = s[2];
+                d[6] = d[7] = s[3];
+                s += 4;
+                d += 8;
+            }
+            for (i = n & 3; i; i--) {
+                d[0] = d[1] = s[0];
+                s++;
+                d += 2;
+            }
+            break;
+        }
+        default:
+            pa_assert_not_reached();
+    }
+}
+
+static void remap_channels_matrix_c (pa_remap_t *m, void *dst, const void *src, unsigned n) {
+    unsigned oc, ic, i;
+    unsigned n_ic, n_oc;
+
+    n_ic = m->i_ss->channels;
+    n_oc = m->o_ss->channels;
+
+    switch (*m->format) {
+        case PA_SAMPLE_FLOAT32NE:
+        {
+            float *d, *s;
+
+            memset(dst, 0, n * sizeof (float) * n_oc);
+
+            for (oc = 0; oc < n_oc; oc++) {
+
+                for (ic = 0; ic < n_ic; ic++) {
+                    float vol;
+
+                    vol = m->map_table_f[oc][ic];
+
+                    if (vol <= 0.0)
+                        continue;
+
+                    d = (float *)dst + oc;
+                    s = (float *)src + ic;
+
+                    if (vol >= 1.0) {
+                        for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+                            *d += *s;
+                    } else {
+                        for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+                            *d += *s * vol;
+                    }
+                }
+            }
+
+            break;
+        }
+        case PA_SAMPLE_S16NE:
+        {
+            int16_t *d, *s;
+
+            memset(dst, 0, n * sizeof (int16_t) * n_oc);
+
+            for (oc = 0; oc < n_oc; oc++) {
+
+                for (ic = 0; ic < n_ic; ic++) {
+                    int32_t vol;
+
+                    vol = m->map_table_i[oc][ic];
+
+                    if (vol <= 0)
+                        continue;
+
+                    d = (int16_t *)dst + oc;
+                    s = (int16_t *)src + ic;
+
+                    if (vol >= 0x10000) {
+                        for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+                            *d += *s;
+                    } else {
+                        for (i = n; i > 0; i--, s += n_ic, d += n_oc)
+                            *d += (int16_t) (((int32_t)*s * vol) >> 16);
+                    }
+                }
+            }
+            break;
+        }
+        default:
+            pa_assert_not_reached();
+    }
+}
+
+/* set the function that will execute the remapping based on the matrices */
+static void init_remap_c (pa_remap_t *m) {
+    unsigned n_oc, n_ic;
+
+    n_oc = m->o_ss->channels;
+    n_ic = m->i_ss->channels;
+
+    /* find some common channel remappings, fall back to full matrix operation. */
+    if (n_ic == 1 && n_oc == 2 &&
+            m->map_table_f[0][0] >= 1.0 && m->map_table_f[1][0] >= 1.0) {
+        m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo_c;
+        pa_log_info("Using mono to stereo remapping");
+    } else {
+        m->do_remap = (pa_do_remap_func_t) remap_channels_matrix_c;
+        pa_log_info("Using generic matrix remapping");
+    }
+}
+
+
+/* default C implementation */
+static pa_init_remap_func_t remap_func = init_remap_c;
+
+void pa_init_remap (pa_remap_t *m) {
+    pa_assert (remap_func);
+
+    m->do_remap = NULL;
+
+    /* call the installed remap init function */
+    remap_func (m);
+
+    if (m->do_remap == NULL) {
+        /* nothing was installed, fallback to C version */
+        init_remap_c (m);
+    }
+}
+
+pa_init_remap_func_t pa_get_init_remap_func(void) {
+    return remap_func;
+}
+
+void pa_set_init_remap_func(pa_init_remap_func_t func) {
+    remap_func = func;
+}
diff --git a/src/pulsecore/remap.h b/src/pulsecore/remap.h
new file mode 100644
index 00000000..32a67cdd
--- /dev/null
+++ b/src/pulsecore/remap.h
@@ -0,0 +1,48 @@
+#ifndef fooremapfoo
+#define fooremapfoo
+
+/***
+  This file is part of PulseAudio.
+
+  Copyright 2004-2006 Lennart Poettering
+  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk.com>
+
+  PulseAudio is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License as published
+  by the Free Software Foundation; either version 2.1 of the License,
+  or (at your option) any later version.
+
+  PulseAudio is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with PulseAudio; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+  USA.
+***/
+
+#include <pulse/sample.h>
+
+typedef struct pa_remap pa_remap_t;
+
+typedef void (*pa_do_remap_func_t) (pa_remap_t *m, void *d, const void *s, unsigned n);
+
+struct pa_remap {
+    pa_sample_format_t *format;
+    pa_sample_spec *i_ss, *o_ss;
+    float map_table_f[PA_CHANNELS_MAX][PA_CHANNELS_MAX];
+    int32_t map_table_i[PA_CHANNELS_MAX][PA_CHANNELS_MAX];
+    pa_do_remap_func_t do_remap;
+};
+
+void pa_init_remap (pa_remap_t *m);
+
+/* custom installation of init functions */
+typedef void (*pa_init_remap_func_t) (pa_remap_t *m);
+
+pa_init_remap_func_t pa_get_init_remap_func(void);
+void pa_set_init_remap_func(pa_init_remap_func_t func);
+
+#endif /* fooremapfoo */
diff --git a/src/pulsecore/remap_mmx.c b/src/pulsecore/remap_mmx.c
new file mode 100644
index 00000000..bfcae6c5
--- /dev/null
+++ b/src/pulsecore/remap_mmx.c
@@ -0,0 +1,148 @@
+/***
+  This file is part of PulseAudio.
+
+  Copyright 2004-2006 Lennart Poettering
+  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk.com>
+
+  PulseAudio is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License as published
+  by the Free Software Foundation; either version 2.1 of the License,
+  or (at your option) any later version.
+
+  PulseAudio is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with PulseAudio; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+  USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <string.h>
+
+#include <pulse/sample.h>
+#include <pulsecore/log.h>
+#include <pulsecore/macro.h>
+
+#include "cpu-x86.h"
+#include "remap.h"
+
+#define LOAD_SAMPLES                                   \
+                " movq (%1), %%mm0              \n\t"  \
+                " movq 8(%1), %%mm2             \n\t"  \
+                " movq 16(%1), %%mm4            \n\t"  \
+                " movq 24(%1), %%mm6            \n\t"  \
+                " movq %%mm0, %%mm1             \n\t"  \
+                " movq %%mm2, %%mm3             \n\t"  \
+                " movq %%mm4, %%mm5             \n\t"  \
+                " movq %%mm6, %%mm7             \n\t"
+
+#define UNPACK_SAMPLES(s)                              \
+                " punpckl"#s" %%mm0, %%mm0      \n\t"  \
+                " punpckh"#s" %%mm1, %%mm1      \n\t"  \
+                " punpckl"#s" %%mm2, %%mm2      \n\t"  \
+                " punpckh"#s" %%mm3, %%mm3      \n\t"  \
+                " punpckl"#s" %%mm4, %%mm4      \n\t"  \
+                " punpckh"#s" %%mm5, %%mm5      \n\t"  \
+                " punpckl"#s" %%mm6, %%mm6      \n\t"  \
+                " punpckh"#s" %%mm7, %%mm7      \n\t"  \
+
+#define STORE_SAMPLES                                  \
+                " movq %%mm0, (%0)              \n\t"  \
+                " movq %%mm1, 8(%0)             \n\t"  \
+                " movq %%mm2, 16(%0)            \n\t"  \
+                " movq %%mm3, 24(%0)            \n\t"  \
+                " movq %%mm4, 32(%0)            \n\t"  \
+                " movq %%mm5, 40(%0)            \n\t"  \
+                " movq %%mm6, 48(%0)            \n\t"  \
+                " movq %%mm7, 56(%0)            \n\t"  \
+                " add $32, %1                   \n\t"  \
+                " add $64, %0                   \n\t"
+
+#define HANDLE_SINGLE(s)                               \
+                " movd (%1), %%mm0              \n\t"  \
+                " movq %%mm0, %%mm1             \n\t"  \
+                " punpckl"#s" %%mm0, %%mm0      \n\t"  \
+                " movq %%mm0, (%0)              \n\t"  \
+                " add $4, %1                    \n\t"  \
+                " add $8, %0                    \n\t"
+
+#define MONO_TO_STEREO(s)                               \
+                " mov %3, %2                    \n\t"   \
+                " sar $3, %2                    \n\t"   \
+                " cmp $0, %2                    \n\t"   \
+                " je 2f                         \n\t"   \
+                "1:                             \n\t"   \
+                LOAD_SAMPLES                            \
+                UNPACK_SAMPLES(s)                       \
+                STORE_SAMPLES                           \
+                " dec %2                        \n\t"   \
+                " jne 1b                        \n\t"   \
+                "2:                             \n\t"   \
+                " mov %3, %2                    \n\t"   \
+                " and $7, %2                    \n\t"   \
+                " je 4f                         \n\t"   \
+                "3:                             \n\t"   \
+                HANDLE_SINGLE(s)                        \
+                " dec %2                        \n\t"   \
+                " jne 3b                        \n\t"   \
+                "4:                             \n\t"   \
+                " emms                          \n\t"
+
+static void remap_mono_to_stereo_mmx (pa_remap_t *m, void *dst, const void *src, unsigned n) {
+    pa_reg_x86 temp;
+
+    switch (*m->format) {
+        case PA_SAMPLE_FLOAT32NE:
+        {
+            __asm__ __volatile__ (
+                MONO_TO_STEREO(dq) /* do doubles to quads */
+                : "+r" (dst), "+r" (src), "=&r" (temp)
+                : "r" ((pa_reg_x86)n)
+                : "cc"
+            );
+            break;
+        }
+        case PA_SAMPLE_S16NE:
+        {
+            __asm__ __volatile__ (
+                MONO_TO_STEREO(wd) /* do words to doubles */
+                : "+r" (dst), "+r" (src), "=&r" (temp)
+                : "r" ((pa_reg_x86)n)
+                : "cc"
+            );
+            break;
+        }
+        default:
+            pa_assert_not_reached();
+    }
+}
+
+/* set the function that will execute the remapping based on the matrices */
+static void init_remap_mmx (pa_remap_t *m) {
+    unsigned n_oc, n_ic;
+
+    n_oc = m->o_ss->channels;
+    n_ic = m->i_ss->channels;
+
+    /* find some common channel remappings, fall back to full matrix operation. */
+    if (n_ic == 1 && n_oc == 2 &&
+            m->map_table_f[0][0] >= 1.0 && m->map_table_f[1][0] >= 1.0) {
+        m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo_mmx;
+        pa_log_info("Using MMX mono to stereo remapping");
+    }
+}
+
+void pa_remap_func_init_mmx (pa_cpu_x86_flag_t flags) {
+#if defined (__i386__) || defined (__amd64__)
+    pa_log_info("Initialising MMX optimized remappers.");
+
+    pa_set_init_remap_func ((pa_init_remap_func_t) init_remap_mmx);
+#endif /* defined (__i386__) || defined (__amd64__) */
+}
diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c
index 59e0a0c1..f1bfa156 100644
--- a/src/pulsecore/resampler.c
+++ b/src/pulsecore/resampler.c
@@ -31,9 +31,6 @@
 
 #include <speex/speex_resampler.h>
 
-#include <liboil/liboilfuncs.h>
-#include <liboil/liboil.h>
-
 #include <pulse/xmalloc.h>
 #include <pulsecore/sconv.h>
 #include <pulsecore/log.h>
@@ -43,6 +40,7 @@
 #include "ffmpeg/avcodec.h"
 
 #include "resampler.h"
+#include "remap.h"
 
 /* Number of samples of extra space we allow the resamplers to return */
 #define EXTRA_FRAMES 128
@@ -64,7 +62,7 @@ struct pa_resampler {
     pa_convert_func_t to_work_format_func;
     pa_convert_func_t from_work_format_func;
 
-    float map_table[PA_CHANNELS_MAX][PA_CHANNELS_MAX];
+    pa_remap_t remap;
     pa_bool_t map_required;
 
     void (*impl_free)(pa_resampler *r);
@@ -214,6 +212,11 @@ pa_resampler* pa_resampler_new(
     r->i_ss = *a;
     r->o_ss = *b;
 
+    /* set up the remap structure */
+    r->remap.i_ss = &r->i_ss;
+    r->remap.o_ss = &r->o_ss;
+    r->remap.format = &r->work_format;
+
     if (am)
         r->i_cm = *am;
     else if (!pa_channel_map_init_auto(&r->i_cm, r->i_ss.channels, PA_CHANNEL_MAP_DEFAULT))
@@ -580,32 +583,41 @@ static int front_rear_side(pa_channel_position_t p) {
 
 static void calc_map_table(pa_resampler *r) {
     unsigned oc, ic;
+    unsigned n_oc, n_ic;
     pa_bool_t ic_connected[PA_CHANNELS_MAX];
     pa_bool_t remix;
     pa_strbuf *s;
     char *t;
+    pa_remap_t *m;
 
     pa_assert(r);
 
     if (!(r->map_required = (r->i_ss.channels != r->o_ss.channels || (!(r->flags & PA_RESAMPLER_NO_REMAP) && !pa_channel_map_equal(&r->i_cm, &r->o_cm)))))
         return;
 
-    memset(r->map_table, 0, sizeof(r->map_table));
+    m = &r->remap;
+
+    n_oc = r->o_ss.channels;
+    n_ic = r->i_ss.channels;
+
+    memset(m->map_table_f, 0, sizeof(m->map_table_f));
+    memset(m->map_table_i, 0, sizeof(m->map_table_i));
+
     memset(ic_connected, 0, sizeof(ic_connected));
     remix = (r->flags & (PA_RESAMPLER_NO_REMAP|PA_RESAMPLER_NO_REMIX)) == 0;
 
-    for (oc = 0; oc < r->o_ss.channels; oc++) {
+    for (oc = 0; oc < n_oc; oc++) {
         pa_bool_t oc_connected = FALSE;
         pa_channel_position_t b = r->o_cm.map[oc];
 
-        for (ic = 0; ic < r->i_ss.channels; ic++) {
+        for (ic = 0; ic < n_ic; ic++) {
             pa_channel_position_t a = r->i_cm.map[ic];
 
             if (r->flags & PA_RESAMPLER_NO_REMAP) {
                 /* We shall not do any remapping. Hence, just check by index */
 
                 if (ic == oc)
-                    r->map_table[oc][ic] = 1.0;
+                    m->map_table_f[oc][ic] = 1.0;
 
                 continue;
             }
@@ -614,7 +626,7 @@ static void calc_map_table(pa_resampler *r) {
                 /* We shall not do any remixing. Hence, just check by name */
 
                 if (a == b)
-                    r->map_table[oc][ic] = 1.0;
+                    m->map_table_f[oc][ic] = 1.0;
 
                 continue;
             }
@@ -689,7 +701,7 @@ static void calc_map_table(pa_resampler *r) {
              */
 
             if (a == b || a == PA_CHANNEL_POSITION_MONO || b == PA_CHANNEL_POSITION_MONO) {
-                r->map_table[oc][ic] = 1.0;
+                m->map_table_f[oc][ic] = 1.0;
 
                 oc_connected = TRUE;
                 ic_connected[ic] = TRUE;
@@ -707,14 +719,14 @@ static void calc_map_table(pa_resampler *r) {
                 /* We are not connected and on the left side, let's
                  * average all left side input channels. */
 
-                for (ic = 0; ic < r->i_ss.channels; ic++)
+                for (ic = 0; ic < n_ic; ic++)
                     if (on_left(r->i_cm.map[ic]))
                         n++;
 
                 if (n > 0)
-                    for (ic = 0; ic < r->i_ss.channels; ic++)
+                    for (ic = 0; ic < n_ic; ic++)
                         if (on_left(r->i_cm.map[ic])) {
-                            r->map_table[oc][ic] = 1.0f / (float) n;
+                            m->map_table_f[oc][ic] = 1.0f / (float) n;
                             ic_connected[ic] = TRUE;
                         }
 
@@ -728,14 +740,14 @@ static void calc_map_table(pa_resampler *r) {
                 /* We are not connected and on the right side, let's
                  * average all right side input channels. */
 
-                for (ic = 0; ic < r->i_ss.channels; ic++)
+                for (ic = 0; ic < n_ic; ic++)
                     if (on_right(r->i_cm.map[ic]))
                         n++;
 
                 if (n > 0)
-                    for (ic = 0; ic < r->i_ss.channels; ic++)
+                    for (ic = 0; ic < n_ic; ic++)
                         if (on_right(r->i_cm.map[ic])) {
-                            r->map_table[oc][ic] = 1.0f / (float) n;
+                            m->map_table_f[oc][ic] = 1.0f / (float) n;
                             ic_connected[ic] = TRUE;
                         }
 
@@ -749,14 +761,14 @@ static void calc_map_table(pa_resampler *r) {
                 /* We are not connected and at the center. Let's
                  * average all center input channels. */
 
-                for (ic = 0; ic < r->i_ss.channels; ic++)
+                for (ic = 0; ic < n_ic; ic++)
                     if (on_center(r->i_cm.map[ic]))
                         n++;
 
                 if (n > 0) {
-                    for (ic = 0; ic < r->i_ss.channels; ic++)
+                    for (ic = 0; ic < n_ic; ic++)
                         if (on_center(r->i_cm.map[ic])) {
-                            r->map_table[oc][ic] = 1.0f / (float) n;
+                            m->map_table_f[oc][ic] = 1.0f / (float) n;
                             ic_connected[ic] = TRUE;
                         }
                 } else {
@@ -766,14 +778,14 @@ static void calc_map_table(pa_resampler *r) {
 
                     n = 0;
 
-                    for (ic = 0; ic < r->i_ss.channels; ic++)
+                    for (ic = 0; ic < n_ic; ic++)
                         if (on_left(r->i_cm.map[ic]) || on_right(r->i_cm.map[ic]))
                             n++;
 
                     if (n > 0)
-                        for (ic = 0; ic < r->i_ss.channels; ic++)
+                        for (ic = 0; ic < n_ic; ic++)
                             if (on_left(r->i_cm.map[ic]) || on_right(r->i_cm.map[ic])) {
-                                r->map_table[oc][ic] = 1.0f / (float) n;
+                                m->map_table_f[oc][ic] = 1.0f / (float) n;
                                 ic_connected[ic] = TRUE;
                             }
 
@@ -787,12 +799,12 @@ static void calc_map_table(pa_resampler *r) {
                 /* We are not connected and an LFE. Let's average all
                  * channels for LFE. */
 
-                for (ic = 0; ic < r->i_ss.channels; ic++) {
+                for (ic = 0; ic < n_ic; ic++) {
 
                     if (!(r->flags & PA_RESAMPLER_NO_LFE))
-                        r->map_table[oc][ic] = 1.0f / (float) r->i_ss.channels;
+                        m->map_table_f[oc][ic] = 1.0f / (float) n_ic;
                     else
-                        r->map_table[oc][ic] = 0;
+                        m->map_table_f[oc][ic] = 0;
 
                     /* Please note that a channel connected to LFE
                      * doesn't really count as connected. */
@@ -808,7 +820,7 @@ static void calc_map_table(pa_resampler *r) {
             ic_unconnected_center = 0,
             ic_unconnected_lfe = 0;
 
-        for (ic = 0; ic < r->i_ss.channels; ic++) {
+        for (ic = 0; ic < n_ic; ic++) {
             pa_channel_position_t a = r->i_cm.map[ic];
 
             if (ic_connected[ic])
@@ -831,20 +843,20 @@ static void calc_map_table(pa_resampler *r) {
              * the left side by .9 and add in our averaged unconnected
              * channels multplied by .1 */
 
-            for (oc = 0; oc < r->o_ss.channels; oc++) {
+            for (oc = 0; oc < n_oc; oc++) {
 
                 if (!on_left(r->o_cm.map[oc]))
                     continue;
 
-                for (ic = 0; ic < r->i_ss.channels; ic++) {
+                for (ic = 0; ic < n_ic; ic++) {
 
                     if (ic_connected[ic]) {
-                        r->map_table[oc][ic] *= .9f;
+                        m->map_table_f[oc][ic] *= .9f;
                         continue;
                     }
 
                     if (on_left(r->i_cm.map[ic]))
-                        r->map_table[oc][ic] = .1f / (float) ic_unconnected_left;
+                        m->map_table_f[oc][ic] = .1f / (float) ic_unconnected_left;
                 }
             }
         }
@@ -856,20 +868,20 @@ static void calc_map_table(pa_resampler *r) {
              * the right side by .9 and add in our averaged unconnected
              * channels multplied by .1 */
 
-            for (oc = 0; oc < r->o_ss.channels; oc++) {
+            for (oc = 0; oc < n_oc; oc++) {
 
                 if (!on_right(r->o_cm.map[oc]))
                     continue;
 
-                for (ic = 0; ic < r->i_ss.channels; ic++) {
+                for (ic = 0; ic < n_ic; ic++) {
 
                     if (ic_connected[ic]) {
-                        r->map_table[oc][ic] *= .9f;
+                        m->map_table_f[oc][ic] *= .9f;
                         continue;
                     }
 
                     if (on_right(r->i_cm.map[ic]))
-                        r->map_table[oc][ic] = .1f / (float) ic_unconnected_right;
+                        m->map_table_f[oc][ic] = .1f / (float) ic_unconnected_right;
                 }
             }
         }
@@ -882,20 +894,20 @@ static void calc_map_table(pa_resampler *r) {
              * the center side by .9 and add in our averaged unconnected
              * channels multplied by .1 */
 
-            for (oc = 0; oc < r->o_ss.channels; oc++) {
+            for (oc = 0; oc < n_oc; oc++) {
 
                 if (!on_center(r->o_cm.map[oc]))
                     continue;
 
-                for (ic = 0; ic < r->i_ss.channels; ic++)  {
+                for (ic = 0; ic < n_ic; ic++)  {
 
                     if (ic_connected[ic]) {
-                        r->map_table[oc][ic] *= .9f;
+                        m->map_table_f[oc][ic] *= .9f;
                         continue;
                     }
 
                     if (on_center(r->i_cm.map[ic])) {
-                        r->map_table[oc][ic] = .1f / (float) ic_unconnected_center;
+                        m->map_table_f[oc][ic] = .1f / (float) ic_unconnected_center;
                         mixed_in = TRUE;
                     }
                 }
@@ -913,7 +925,7 @@ static void calc_map_table(pa_resampler *r) {
                    it into left and right. Using .375 and 0.75 as
                    factors. */
 
-                for (ic = 0; ic < r->i_ss.channels; ic++) {
+                for (ic = 0; ic < n_ic; ic++) {
 
                     if (ic_connected[ic])
                         continue;
@@ -921,7 +933,7 @@ static void calc_map_table(pa_resampler *r) {
                     if (!on_center(r->i_cm.map[ic]))
                         continue;
 
-                    for (oc = 0; oc < r->o_ss.channels; oc++) {
+                    for (oc = 0; oc < n_oc; oc++) {
 
                         if (!on_left(r->o_cm.map[oc]) && !on_right(r->o_cm.map[oc]))
                             continue;
@@ -932,7 +944,7 @@ static void calc_map_table(pa_resampler *r) {
                         }
                     }
 
-                    for (oc = 0; oc < r->o_ss.channels; oc++) {
+                    for (oc = 0; oc < n_oc; oc++) {
 
                         if (!on_left(r->o_cm.map[oc]) && !on_right(r->o_cm.map[oc]))
                             continue;
@@ -942,7 +954,7 @@ static void calc_map_table(pa_resampler *r) {
                     }
                 }
 
-                for (oc = 0; oc < r->o_ss.channels; oc++) {
+                for (oc = 0; oc < n_oc; oc++) {
 
                     if (!on_left(r->o_cm.map[oc]) && !on_right(r->o_cm.map[oc]))
                         continue;
@@ -950,10 +962,10 @@ static void calc_map_table(pa_resampler *r) {
                     if (ncenter[oc] <= 0)
                         continue;
 
-                    for (ic = 0; ic < r->i_ss.channels; ic++)  {
+                    for (ic = 0; ic < n_ic; ic++)  {
 
                         if (ic_connected[ic]) {
-                            r->map_table[oc][ic] *= .75f;
+                            m->map_table_f[oc][ic] *= .75f;
                             continue;
                         }
 
@@ -961,7 +973,7 @@ static void calc_map_table(pa_resampler *r) {
                             continue;
 
                         if (!found_frs[ic] || front_rear_side(r->i_cm.map[ic]) == front_rear_side(r->o_cm.map[oc]))
-                            r->map_table[oc][ic] = .375f / (float) ncenter[oc];
+                            m->map_table_f[oc][ic] = .375f / (float) ncenter[oc];
                     }
                 }
             }
@@ -972,40 +984,46 @@ static void calc_map_table(pa_resampler *r) {
             /* OK, so there is an unconnected LFE channel. Let's mix
              * it into all channels, with factor 0.375 */
 
-            for (ic = 0; ic < r->i_ss.channels; ic++)  {
+            for (ic = 0; ic < n_ic; ic++)  {
 
                 if (!on_lfe(r->i_cm.map[ic]))
                     continue;
 
-                for (oc = 0; oc < r->o_ss.channels; oc++)
-                    r->map_table[oc][ic] = 0.375f / (float) ic_unconnected_lfe;
+                for (oc = 0; oc < n_oc; oc++)
+                    m->map_table_f[oc][ic] = 0.375f / (float) ic_unconnected_lfe;
             }
         }
     }
-
+    /* make an 16:16 int version of the matrix */
+    for (oc = 0; oc < n_oc; oc++)
+        for (ic = 0; ic < n_ic; ic++)
+            m->map_table_i[oc][ic] = (int32_t) (m->map_table_f[oc][ic] * 0x10000);
 
     s = pa_strbuf_new();
 
     pa_strbuf_printf(s, "     ");
-    for (ic = 0; ic < r->i_ss.channels; ic++)
+    for (ic = 0; ic < n_ic; ic++)
         pa_strbuf_printf(s, "  I%02u ", ic);
     pa_strbuf_puts(s, "\n    +");
 
-    for (ic = 0; ic < r->i_ss.channels; ic++)
+    for (ic = 0; ic < n_ic; ic++)
         pa_strbuf_printf(s, "------");
     pa_strbuf_puts(s, "\n");
 
-    for (oc = 0; oc < r->o_ss.channels; oc++) {
+    for (oc = 0; oc < n_oc; oc++) {
         pa_strbuf_printf(s, "O%02u |", oc);
 
-        for (ic = 0; ic < r->i_ss.channels; ic++)
-            pa_strbuf_printf(s, " %1.3f", r->map_table[oc][ic]);
+        for (ic = 0; ic < n_ic; ic++)
+            pa_strbuf_printf(s, " %1.3f", m->map_table_f[oc][ic]);
 
         pa_strbuf_puts(s, "\n");
     }
 
     pa_log_debug("Channel matrix:\n%s", t = pa_strbuf_tostring_free(s));
     pa_xfree(t);
+
+    /* initialize the remapping function */
+    pa_init_remap (m);
 }
 
 static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input) {
@@ -1045,41 +1063,10 @@ static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input)
     return &r->buf1;
 }
 
-static void vectoradd_s16_with_fraction(
-        int16_t *d, int dstr,
-        const int16_t *s1, int sstr1,
-        const int16_t *s2, int sstr2,
-        int n,
-        float s3, float s4) {
-
-    int32_t i3, i4;
-
-    i3 = (int32_t) (s3 * 0x10000);
-    i4 = (int32_t) (s4 * 0x10000);
-
-    for (; n > 0; n--) {
-        int32_t a, b;
-
-        a = *s1;
-        b = *s2;
-
-        a = (a * i3) / 0x10000;
-        b = (b * i4) / 0x10000;
-
-        *d = (int16_t) (a + b);
-
-        s1 = (const int16_t*) ((const uint8_t*) s1 + sstr1);
-        s2 = (const int16_t*) ((const uint8_t*) s2 + sstr2);
-        d = (int16_t*) ((uint8_t*) d + dstr);
-
-    }
-}
-
 static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) {
     unsigned in_n_samples, out_n_samples, n_frames;
-    int i_skip, o_skip;
-    unsigned oc;
     void *src, *dst;
+    pa_remap_t *remap;
 
     pa_assert(r);
     pa_assert(input);
@@ -1108,76 +1095,14 @@ static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) {
     src = ((uint8_t*) pa_memblock_acquire(input->memblock) + input->index);
     dst = pa_memblock_acquire(r->buf2.memblock);
 
-    memset(dst, 0, r->buf2.length);
-
-    o_skip = (int) (r->w_sz * r->o_ss.channels);
-    i_skip = (int) (r->w_sz * r->i_ss.channels);
-
-    switch (r->work_format) {
-        case PA_SAMPLE_FLOAT32NE:
-
-            for (oc = 0; oc < r->o_ss.channels; oc++) {
-                unsigned ic;
-                static const float one = 1.0;
-
-                for (ic = 0; ic < r->i_ss.channels; ic++) {
+    remap = &r->remap;
 
-                    if (r->map_table[oc][ic] <= 0.0)
-                        continue;
-
-                    oil_vectoradd_f32(
-                            (float*) dst + oc, o_skip,
-                            (float*) dst + oc, o_skip,
-                            (float*) src + ic, i_skip,
-                            (int) n_frames,
-                            &one, &r->map_table[oc][ic]);
-                }
-            }
-
-            break;
-
-        case PA_SAMPLE_S16NE:
-
-            for (oc = 0; oc < r->o_ss.channels; oc++) {
-                unsigned ic;
-
-                for (ic = 0; ic < r->i_ss.channels; ic++) {
-
-                    if (r->map_table[oc][ic] <= 0.0)
-                        continue;
-
-                    if (r->map_table[oc][ic] >= 1.0) {
-                        static const int16_t one = 1;
-
-                        oil_vectoradd_s16(
-                                (int16_t*) dst + oc, o_skip,
-                                (int16_t*) dst + oc, o_skip,
-                                (int16_t*) src + ic, i_skip,
-                                (int) n_frames,
-                                &one, &one);
-
-                    } else
-
-                        vectoradd_s16_with_fraction(
-                                (int16_t*) dst + oc, o_skip,
-                                (int16_t*) dst + oc, o_skip,
-                                (int16_t*) src + ic, i_skip,
-                                (int) n_frames,
-                                1.0f, r->map_table[oc][ic]);
-                }
-            }
-
-            break;
-
-        default:
-            pa_assert_not_reached();
-    }
+    pa_assert (remap->do_remap);
+    remap->do_remap (remap, dst, src, n_frames);
 
     pa_memblock_release(input->memblock);
     pa_memblock_release(r->buf2.memblock);
 
-    r->buf2.length = out_n_samples * r->w_sz;
-
     return &r->buf2;
 }
 
@@ -1469,7 +1394,7 @@ static void trivial_resample(pa_resampler *r, const pa_memchunk *input, unsigned
 
         pa_assert(o_index * fz < pa_memblock_get_length(output->memblock));
 
-        oil_memcpy((uint8_t*) dst + fz * o_index,
+        memcpy((uint8_t*) dst + fz * o_index,
                    (uint8_t*) src + fz * j, (int) fz);
     }
 
diff --git a/src/pulsecore/rtpoll.c b/src/pulsecore/rtpoll.c
index 42708a8a..666cbc98 100644
--- a/src/pulsecore/rtpoll.c
+++ b/src/pulsecore/rtpoll.c
@@ -63,6 +63,7 @@ struct pa_rtpoll {
     pa_bool_t running:1;
     pa_bool_t rebuild_needed:1;
     pa_bool_t quit:1;
+    pa_bool_t timer_elapsed:1;
 
 #ifdef DEBUG_TIMING
     pa_usec_t timestamp;
@@ -94,26 +95,14 @@ PA_STATIC_FLIST_DECLARE(items, 0, pa_xfree);
 pa_rtpoll *pa_rtpoll_new(void) {
     pa_rtpoll *p;
 
-    p = pa_xnew(pa_rtpoll, 1);
+    p = pa_xnew0(pa_rtpoll, 1);
 
     p->n_pollfd_alloc = 32;
     p->pollfd = pa_xnew(struct pollfd, p->n_pollfd_alloc);
     p->pollfd2 = pa_xnew(struct pollfd, p->n_pollfd_alloc);
-    p->n_pollfd_used = 0;
-
-    pa_zero(p->next_elapse);
-    p->timer_enabled = FALSE;
-
-    p->running = FALSE;
-    p->scan_for_dead = FALSE;
-    p->rebuild_needed = FALSE;
-    p->quit = FALSE;
-
-    PA_LLIST_HEAD_INIT(pa_rtpoll_item, p->items);
 
 #ifdef DEBUG_TIMING
     p->timestamp = pa_rtclock_now();
-    p->slept = p->awake = 0;
 #endif
 
     return p;
@@ -229,6 +218,7 @@ int pa_rtpoll_run(pa_rtpoll *p, pa_bool_t wait_op) {
     pa_assert(!p->running);
 
     p->running = TRUE;
+    p->timer_elapsed = FALSE;
 
     /* First, let's do some work */
     for (i = p->items; i && i->priority < PA_RTPOLL_NEVER; i = i->next) {
@@ -286,7 +276,7 @@ int pa_rtpoll_run(pa_rtpoll *p, pa_bool_t wait_op) {
     if (p->rebuild_needed)
         rtpoll_rebuild(p);
 
-    memset(&timeout, 0, sizeof(timeout));
+    pa_zero(timeout);
 
     /* Calculate timeout */
     if (wait_op && !p->quit && p->timer_enabled) {
@@ -314,9 +304,11 @@ int pa_rtpoll_run(pa_rtpoll *p, pa_bool_t wait_op) {
         r = ppoll(p->pollfd, p->n_pollfd_used, (!wait_op || p->quit || p->timer_enabled) ? &ts : NULL, NULL);
     }
 #else
-        r = poll(p->pollfd, p->n_pollfd_used, (!wait_op || p->quit || p->timer_enabled) ? (int) ((timeout.tv_sec*1000) + (timeout.tv_usec / 1000)) : -1);
+    r = poll(p->pollfd, p->n_pollfd_used, (!wait_op || p->quit || p->timer_enabled) ? (int) ((timeout.tv_sec*1000) + (timeout.tv_usec / 1000)) : -1);
 #endif
 
+    p->timer_elapsed = r == 0;
+
 #ifdef DEBUG_TIMING
     {
         pa_usec_t now = pa_rtclock_now();
@@ -628,3 +620,9 @@ void pa_rtpoll_quit(pa_rtpoll *p) {
 
     p->quit = TRUE;
 }
+
+pa_bool_t pa_rtpoll_timer_elapsed(pa_rtpoll *p) {
+    pa_assert(p);
+
+    return p->timer_elapsed;
+}
diff --git a/src/pulsecore/rtpoll.h b/src/pulsecore/rtpoll.h
index d2d69cad..b2a87fca 100644
--- a/src/pulsecore/rtpoll.h
+++ b/src/pulsecore/rtpoll.h
@@ -73,6 +73,10 @@ void pa_rtpoll_set_timer_absolute(pa_rtpoll *p, pa_usec_t usec);
 void pa_rtpoll_set_timer_relative(pa_rtpoll *p, pa_usec_t usec);
 void pa_rtpoll_set_timer_disabled(pa_rtpoll *p);
 
+/* Return TRUE when the elapsed timer was the reason for
+ * the last pa_rtpoll_run() invocation to finish */
+pa_bool_t pa_rtpoll_timer_elapsed(pa_rtpoll *p);
+
 /* A new fd wakeup item for pa_rtpoll */
 pa_rtpoll_item *pa_rtpoll_item_new(pa_rtpoll *p, pa_rtpoll_priority_t prio, unsigned n_fds);
 void pa_rtpoll_item_free(pa_rtpoll_item *i);
diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c
index 5b8ccf59..6e97e5a9 100644
--- a/src/pulsecore/sample-util.c
+++ b/src/pulsecore/sample-util.c
@@ -30,9 +30,6 @@
 #include <stdio.h>
 #include <errno.h>
 
-#include <liboil/liboilfuncs.h>
-#include <liboil/liboil.h>
-
 #include <pulse/timeval.h>
 
 #include <pulsecore/log.h>
@@ -106,24 +103,36 @@ void* pa_silence_memory(void *p, size_t length, const pa_sample_spec *spec) {
     return p;
 }
 
+#define VOLUME_PADDING 32
+
 static void calc_linear_integer_volume(int32_t linear[], const pa_cvolume *volume) {
-    unsigned channel;
+    unsigned channel, nchannels, padding;
 
     pa_assert(linear);
     pa_assert(volume);
 
-    for (channel = 0; channel < volume->channels; channel++)
+    nchannels = volume->channels;
+
+    for (channel = 0; channel < nchannels; channel++)
         linear[channel] = (int32_t) lrint(pa_sw_volume_to_linear(volume->values[channel]) * 0x10000);
+
+    for (padding = 0; padding < VOLUME_PADDING; padding++, channel++)
+        linear[channel] = linear[padding];
 }
 
 static void calc_linear_float_volume(float linear[], const pa_cvolume *volume) {
-    unsigned channel;
+    unsigned channel, nchannels, padding;
 
     pa_assert(linear);
     pa_assert(volume);
 
-    for (channel = 0; channel < volume->channels; channel++)
+    nchannels = volume->channels;
+
+    for (channel = 0; channel < nchannels; channel++)
         linear[channel] = (float) pa_sw_volume_to_linear(volume->values[channel]);
+
+    for (padding = 0; padding < VOLUME_PADDING; padding++, channel++)
+        linear[channel] = linear[padding];
 }
 
 static void calc_linear_integer_stream_volumes(pa_mix_info streams[], unsigned nstreams, const pa_cvolume *volume, const pa_sample_spec *spec) {
@@ -690,6 +699,28 @@ size_t pa_mix(
     return length;
 }
 
+typedef union {
+  float f;
+  uint32_t i;
+} volume_val;
+
+typedef void (*pa_calc_volume_func_t) (void *volumes, const pa_cvolume *volume);
+
+static const pa_calc_volume_func_t calc_volume_table[] = {
+  [PA_SAMPLE_U8]        = (pa_calc_volume_func_t) calc_linear_integer_volume,
+  [PA_SAMPLE_ALAW]      = (pa_calc_volume_func_t) calc_linear_integer_volume,
+  [PA_SAMPLE_ULAW]      = (pa_calc_volume_func_t) calc_linear_integer_volume,
+  [PA_SAMPLE_S16LE]     = (pa_calc_volume_func_t) calc_linear_integer_volume,
+  [PA_SAMPLE_S16BE]     = (pa_calc_volume_func_t) calc_linear_integer_volume,
+  [PA_SAMPLE_FLOAT32LE] = (pa_calc_volume_func_t) calc_linear_float_volume,
+  [PA_SAMPLE_FLOAT32BE] = (pa_calc_volume_func_t) calc_linear_float_volume,
+  [PA_SAMPLE_S32LE]     = (pa_calc_volume_func_t) calc_linear_integer_volume,
+  [PA_SAMPLE_S32BE]     = (pa_calc_volume_func_t) calc_linear_integer_volume,
+  [PA_SAMPLE_S24LE]     = (pa_calc_volume_func_t) calc_linear_integer_volume,
+  [PA_SAMPLE_S24BE]     = (pa_calc_volume_func_t) calc_linear_integer_volume,
+  [PA_SAMPLE_S24_32LE]  = (pa_calc_volume_func_t) calc_linear_integer_volume,
+  [PA_SAMPLE_S24_32BE]  = (pa_calc_volume_func_t) calc_linear_integer_volume
+};
 
 void pa_volume_memchunk(
         pa_memchunk*c,
@@ -697,6 +728,8 @@ void pa_volume_memchunk(
         const pa_cvolume *volume) {
 
     void *ptr;
+    volume_val linear[PA_CHANNELS_MAX + VOLUME_PADDING];
+    pa_do_volume_func_t do_volume;
 
     pa_assert(c);
     pa_assert(spec);
@@ -714,337 +747,19 @@ void pa_volume_memchunk(
         return;
     }
 
-    ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index;
-
-    switch (spec->format) {
-
-        case PA_SAMPLE_S16NE: {
-            int16_t *d, *e;
-            unsigned channel;
-            int32_t linear[PA_CHANNELS_MAX];
-
-            calc_linear_integer_volume(linear, volume);
-
-            e = (int16_t*) ptr + c->length/sizeof(int16_t);
-
-            for (channel = 0, d = ptr; d < e; d++) {
-                int32_t t, hi, lo;
-
-                /* Multiplying the 32bit volume factor with the 16bit
-                 * sample might result in an 48bit value. We want to
-                 * do without 64 bit integers and hence do the
-                 * multiplication independantly for the HI and LO part
-                 * of the volume. */
-
-                hi = linear[channel] >> 16;
-                lo = linear[channel] & 0xFFFF;
-
-                t = (int32_t)(*d);
-                t = ((t * lo) >> 16) + (t * hi);
-                t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
-                *d = (int16_t) t;
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-
-            break;
-        }
-
-        case PA_SAMPLE_S16RE: {
-            int16_t *d, *e;
-            unsigned channel;
-            int32_t linear[PA_CHANNELS_MAX];
-
-            calc_linear_integer_volume(linear, volume);
-
-            e = (int16_t*) ptr + c->length/sizeof(int16_t);
-
-            for (channel = 0, d = ptr; d < e; d++) {
-                int32_t t, hi, lo;
-
-                hi = linear[channel] >> 16;
-                lo = linear[channel] & 0xFFFF;
-
-                t = (int32_t) PA_INT16_SWAP(*d);
-                t = ((t * lo) >> 16) + (t * hi);
-                t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
-                *d = PA_INT16_SWAP((int16_t) t);
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-
-            break;
-        }
-
-        case PA_SAMPLE_S32NE: {
-            int32_t *d, *e;
-            unsigned channel;
-            int32_t linear[PA_CHANNELS_MAX];
-
-            calc_linear_integer_volume(linear, volume);
-
-            e = (int32_t*) ptr + c->length/sizeof(int32_t);
-
-            for (channel = 0, d = ptr; d < e; d++) {
-                int64_t t;
-
-                t = (int64_t)(*d);
-                t = (t * linear[channel]) >> 16;
-                t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
-                *d = (int32_t) t;
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-            break;
-        }
-
-        case PA_SAMPLE_S32RE: {
-            int32_t *d, *e;
-            unsigned channel;
-            int32_t linear[PA_CHANNELS_MAX];
-
-            calc_linear_integer_volume(linear, volume);
-
-            e = (int32_t*) ptr + c->length/sizeof(int32_t);
-
-            for (channel = 0, d = ptr; d < e; d++) {
-                int64_t t;
-
-                t = (int64_t) PA_INT32_SWAP(*d);
-                t = (t * linear[channel]) >> 16;
-                t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
-                *d = PA_INT32_SWAP((int32_t) t);
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-            break;
-        }
-
-        case PA_SAMPLE_S24NE: {
-            uint8_t *d, *e;
-            unsigned channel;
-            int32_t linear[PA_CHANNELS_MAX];
-
-            calc_linear_integer_volume(linear, volume);
-
-            e = (uint8_t*) ptr + c->length;
-
-            for (channel = 0, d = ptr; d < e; d += 3) {
-                int64_t t;
-
-                t = (int64_t)((int32_t) (PA_READ24NE(d) << 8));
-                t = (t * linear[channel]) >> 16;
-                t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
-                PA_WRITE24NE(d, ((uint32_t) (int32_t) t) >> 8);
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-            break;
-        }
-
-        case PA_SAMPLE_S24RE: {
-            uint8_t *d, *e;
-            unsigned channel;
-            int32_t linear[PA_CHANNELS_MAX];
-
-            calc_linear_integer_volume(linear, volume);
-
-            e = (uint8_t*) ptr + c->length;
-
-            for (channel = 0, d = ptr; d < e; d += 3) {
-                int64_t t;
-
-                t = (int64_t)((int32_t) (PA_READ24RE(d) << 8));
-                t = (t * linear[channel]) >> 16;
-                t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
-                PA_WRITE24RE(d, ((uint32_t) (int32_t) t) >> 8);
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-            break;
-        }
-
-        case PA_SAMPLE_S24_32NE: {
-            uint32_t *d, *e;
-            unsigned channel;
-            int32_t linear[PA_CHANNELS_MAX];
-
-            calc_linear_integer_volume(linear, volume);
-
-            e = (uint32_t*) ptr + c->length/sizeof(uint32_t);
-
-            for (channel = 0, d = ptr; d < e; d++) {
-                int64_t t;
-
-                t = (int64_t) ((int32_t) (*d << 8));
-                t = (t * linear[channel]) >> 16;
-                t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
-                *d = ((uint32_t) ((int32_t) t)) >> 8;
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-            break;
-        }
-
-        case PA_SAMPLE_S24_32RE: {
-            uint32_t *d, *e;
-            unsigned channel;
-            int32_t linear[PA_CHANNELS_MAX];
-
-            calc_linear_integer_volume(linear, volume);
-
-            e = (uint32_t*) ptr + c->length/sizeof(uint32_t);
-
-            for (channel = 0, d = ptr; d < e; d++) {
-                int64_t t;
-
-                t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*d) << 8));
-                t = (t * linear[channel]) >> 16;
-                t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
-                *d = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8);
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-            break;
-        }
-
-        case PA_SAMPLE_U8: {
-            uint8_t *d, *e;
-            unsigned channel;
-            int32_t linear[PA_CHANNELS_MAX];
-
-            calc_linear_integer_volume(linear, volume);
-
-            e = (uint8_t*) ptr + c->length;
-
-            for (channel = 0, d = ptr; d < e; d++) {
-                int32_t t, hi, lo;
-
-                hi = linear[channel] >> 16;
-                lo = linear[channel] & 0xFFFF;
-
-                t = (int32_t) *d - 0x80;
-                t = ((t * lo) >> 16) + (t * hi);
-                t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
-                *d = (uint8_t) (t + 0x80);
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-            break;
-        }
-
-        case PA_SAMPLE_ULAW: {
-            uint8_t *d, *e;
-            unsigned channel;
-            int32_t linear[PA_CHANNELS_MAX];
-
-            calc_linear_integer_volume(linear, volume);
-
-            e = (uint8_t*) ptr + c->length;
-
-            for (channel = 0, d = ptr; d < e; d++) {
-                int32_t t, hi, lo;
-
-                hi = linear[channel] >> 16;
-                lo = linear[channel] & 0xFFFF;
-
-                t = (int32_t) st_ulaw2linear16(*d);
-                t = ((t * lo) >> 16) + (t * hi);
-                t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
-                *d = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-            break;
-        }
-
-        case PA_SAMPLE_ALAW: {
-            uint8_t *d, *e;
-            unsigned channel;
-            int32_t linear[PA_CHANNELS_MAX];
-
-            calc_linear_integer_volume(linear, volume);
-
-            e = (uint8_t*) ptr + c->length;
-
-            for (channel = 0, d = ptr; d < e; d++) {
-                int32_t t, hi, lo;
-
-                hi = linear[channel] >> 16;
-                lo = linear[channel] & 0xFFFF;
-
-                t = (int32_t) st_alaw2linear16(*d);
-                t = ((t * lo) >> 16) + (t * hi);
-                t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
-                *d = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
-
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-            break;
-        }
-
-        case PA_SAMPLE_FLOAT32NE: {
-            float *d;
-            int skip;
-            unsigned n;
-            unsigned channel;
-
-            d = ptr;
-            skip = (int) (spec->channels * sizeof(float));
-            n = (unsigned) (c->length/sizeof(float)/spec->channels);
-
-            for (channel = 0; channel < spec->channels; channel ++) {
-                float v, *t;
-
-                if (PA_UNLIKELY(volume->values[channel] == PA_VOLUME_NORM))
-                    continue;
-
-                v = (float) pa_sw_volume_to_linear(volume->values[channel]);
-                t = d + channel;
-                oil_scalarmult_f32(t, skip, t, skip, &v, (int) n);
-            }
-            break;
-        }
-
-        case PA_SAMPLE_FLOAT32RE: {
-            float *d, *e;
-            unsigned channel;
-            float linear[PA_CHANNELS_MAX];
-
-            calc_linear_float_volume(linear, volume);
-
-            e = (float*) ptr + c->length/sizeof(float);
-
-            for (channel = 0, d = ptr; d < e; d++) {
-                float t;
+    if (spec->format < 0 || spec->format > PA_SAMPLE_MAX) {
+      pa_log_warn(" Unable to change volume of format %s.", pa_sample_format_to_string(spec->format));
+      return;
+    }
 
-                t = PA_FLOAT32_SWAP(*d);
-                t *= linear[channel];
-                *d = PA_FLOAT32_SWAP(t);
+    do_volume = pa_get_volume_func (spec->format);
+    pa_assert(do_volume);
 
-                if (PA_UNLIKELY(++channel >= spec->channels))
-                    channel = 0;
-            }
-
-            break;
-        }
+    calc_volume_table[spec->format] ((void *)linear, volume);
 
+    ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index;
 
-        default:
-            pa_log_warn(" Unable to change volume of format %s.", pa_sample_format_to_string(spec->format));
-            /* If we cannot change the volume, we just don't do it */
-    }
+    do_volume (ptr, (void *)linear, spec->channels, c->length);
 
     pa_memblock_release(c->memblock);
 }
@@ -1090,7 +805,7 @@ void pa_interleave(const void *src[], unsigned channels, void *dst, size_t ss, u
         d = (uint8_t*) dst + c * ss;
 
         for (j = 0; j < n; j ++) {
-            oil_memcpy(d, s, (int) ss);
+            memcpy(d, s, (int) ss);
             s = (uint8_t*) s + ss;
             d = (uint8_t*) d + fs;
         }
@@ -1118,7 +833,7 @@ void pa_deinterleave(const void *src, void *dst[], unsigned channels, size_t ss,
         d = dst[c];
 
         for (j = 0; j < n; j ++) {
-            oil_memcpy(d, s, (int) ss);
+            memcpy(d, s, (int) ss);
             s = (uint8_t*) s + fs;
             d = (uint8_t*) d + ss;
         }
@@ -1227,10 +942,15 @@ void pa_sample_clamp(pa_sample_format_t format, void *dst, size_t dstr, const vo
     s = src; d = dst;
 
     if (format == PA_SAMPLE_FLOAT32NE) {
+        for (; n > 0; n--) {
+            float f;
 
-        float minus_one = -1.0, plus_one = 1.0;
-        oil_clip_f32(d, (int) dstr, s, (int) sstr, (int) n, &minus_one, &plus_one);
+            f = *s;
+            *d = PA_CLAMP_UNLIKELY(f, -1.0f, 1.0f);
 
+            s = (const float*) ((const uint8_t*) s + sstr);
+            d = (float*) ((uint8_t*) d + dstr);
+        }
     } else {
         pa_assert(format == PA_SAMPLE_FLOAT32RE);
 
diff --git a/src/pulsecore/sample-util.h b/src/pulsecore/sample-util.h
index 6a306c11..34df5cf3 100644
--- a/src/pulsecore/sample-util.h
+++ b/src/pulsecore/sample-util.h
@@ -86,6 +86,11 @@ void pa_memchunk_dump_to_file(pa_memchunk *c, const char *fn);
 
 void pa_memchunk_sine(pa_memchunk *c, pa_mempool *pool, unsigned rate, unsigned freq);
 
+typedef void (*pa_do_volume_func_t) (void *samples, void *volumes, unsigned channels, unsigned length);
+
+pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f);
+void pa_set_volume_func(pa_sample_format_t f, pa_do_volume_func_t func);
+
 #define PA_CHANNEL_POSITION_MASK_LEFT                                   \
     (PA_CHANNEL_POSITION_MASK(PA_CHANNEL_POSITION_FRONT_LEFT)           \
      | PA_CHANNEL_POSITION_MASK(PA_CHANNEL_POSITION_REAR_LEFT)          \
diff --git a/src/pulsecore/sconv-s16le.c b/src/pulsecore/sconv-s16le.c
index 43b8cb3e..0fefdf1c 100644
--- a/src/pulsecore/sconv-s16le.c
+++ b/src/pulsecore/sconv-s16le.c
@@ -28,8 +28,6 @@
 #include <inttypes.h>
 #include <stdio.h>
 
-#include <liboil/liboilfuncs.h>
-
 #include <pulsecore/sconv.h>
 #include <pulsecore/macro.h>
 #include <pulsecore/log.h>
@@ -86,17 +84,13 @@ void pa_sconv_s16le_to_float32ne(unsigned n, const int16_t *a, float *b) {
     pa_assert(b);
 
 #if SWAP_WORDS == 1
-
     for (; n > 0; n--) {
         int16_t s = *(a++);
         *(b++) = ((float) INT16_FROM(s))/(float) 0x7FFF;
     }
-
 #else
-{
-    static const double add = 0, factor = 1.0/0x7FFF;
-    oil_scaleconv_f32_s16(b, a, (int) n, &add, &factor);
-}
+    for (; n > 0; n--)
+        *(b++) = ((float) (*(a++)))/(float) 0x7FFF;
 #endif
 }
 
@@ -105,17 +99,13 @@ void pa_sconv_s32le_to_float32ne(unsigned n, const int32_t *a, float *b) {
     pa_assert(b);
 
 #if SWAP_WORDS == 1
-
     for (; n > 0; n--) {
         int32_t s = *(a++);
         *(b++) = (float) (((double) INT32_FROM(s))/0x7FFFFFFF);
     }
-
 #else
-{
-    static const double add = 0, factor = 1.0/0x7FFFFFFF;
-    oil_scaleconv_f32_s32(b, a, (int) n, &add, &factor);
-}
+    for (; n > 0; n--)
+        *(b++) = (float) (((double) (*(a++)))/0x7FFFFFFF);
 #endif
 }
 
@@ -124,7 +114,6 @@ void pa_sconv_s16le_from_float32ne(unsigned n, const float *a, int16_t *b) {
     pa_assert(b);
 
 #if SWAP_WORDS == 1
-
     for (; n > 0; n--) {
         int16_t s;
         float v = *(a++);
@@ -133,12 +122,13 @@ void pa_sconv_s16le_from_float32ne(unsigned n, const float *a, int16_t *b) {
         s = (int16_t) lrintf(v * 0x7FFF);
         *(b++) = INT16_TO(s);
     }
-
 #else
-{
-    static const double add = 0, factor = 0x7FFF;
-    oil_scaleconv_s16_f32(b, a, (int) n, &add, &factor);
-}
+    for (; n > 0; n--) {
+        float v = *(a++);
+
+        v = PA_CLAMP_UNLIKELY(v, -1.0f, 1.f);
+        *(b++) = (int16_t) lrintf(v * 0x7FFF);
+    }
 #endif
 }
 
@@ -147,7 +137,6 @@ void pa_sconv_s32le_from_float32ne(unsigned n, const float *a, int32_t *b) {
     pa_assert(b);
 
 #if SWAP_WORDS == 1
-
     for (; n > 0; n--) {
         int32_t s;
         float v = *(a++);
@@ -156,12 +145,13 @@ void pa_sconv_s32le_from_float32ne(unsigned n, const float *a, int32_t *b) {
         s = (int32_t) lrint((double) v * (double) 0x7FFFFFFF);
         *(b++) = INT32_TO(s);
     }
-
 #else
-{
-    static const double add = 0, factor = 0x7FFFFFFF;
-    oil_scaleconv_s32_f32(b, a, (int) n, &add, &factor);
-}
+    for (; n > 0; n--) {
+        float v = *(a++);
+
+        v = PA_CLAMP_UNLIKELY(v, -1.0f, 1.0f);
+        *(b++) = (int32_t) lrint((double) v * (double) 0x7FFFFFFF);
+    }
 #endif
 }
 
diff --git a/src/pulsecore/sconv.c b/src/pulsecore/sconv.c
index d89f4283..d06d6985 100644
--- a/src/pulsecore/sconv.c
+++ b/src/pulsecore/sconv.c
@@ -27,9 +27,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 
-#include <liboil/liboilfuncs.h>
-#include <liboil/liboil.h>
-
 #include <pulsecore/g711.h>
 #include <pulsecore/macro.h>
 
@@ -41,32 +38,31 @@
 
 /* u8 */
 static void u8_to_float32ne(unsigned n, const uint8_t *a, float *b) {
-    static const double add = -1, factor = 1.0/128.0;
-
     pa_assert(a);
     pa_assert(b);
 
-    oil_scaleconv_f32_u8(b, a, (int) n, &add, &factor);
+    for (; n > 0; n--, a++, b++)
+        *b = (*a * 1.0/128.0) - 1.0;
 }
 
 static void u8_from_float32ne(unsigned n, const float *a, uint8_t *b) {
-    static const double add = 128, factor = 127.0;
-
     pa_assert(a);
     pa_assert(b);
 
-    oil_scaleconv_u8_f32(b, a, (int) n, &add, &factor);
+    for (; n > 0; n--, a++, b++) {
+        float v;
+        v = (*a * 127.0) + 128.0;
+	v = PA_CLAMP_UNLIKELY (v, 0.0, 255.0);
+	*b = rint (v);
+    }
 }
 
 static void u8_to_s16ne(unsigned n, const uint8_t *a, int16_t *b) {
-    static const int16_t add = -0x80, factor = 0x100;
-
     pa_assert(a);
     pa_assert(b);
 
-    oil_conv_s16_u8(b, 2, a, 1, (int) n);
-    oil_scalaradd_s16(b, 2, b, 2, &add, (int) n);
-    oil_scalarmult_s16(b, 2, b, 2, &factor, (int) n);
+    for (; n > 0; n--, a++, b++)
+        *b = (((int16_t)*a) - 128) << 8;
 }
 
 static void u8_from_s16ne(unsigned n, const int16_t *a, uint8_t *b) {
@@ -84,7 +80,7 @@ static void float32ne_to_float32ne(unsigned n, const float *a, float *b) {
     pa_assert(a);
     pa_assert(b);
 
-    oil_memcpy(b, a, (int) (sizeof(float) * n));
+    memcpy(b, a, (int) (sizeof(float) * n));
 }
 
 static void float32re_to_float32ne(unsigned n, const float *a, float *b) {
@@ -101,7 +97,7 @@ static void s16ne_to_s16ne(unsigned n, const int16_t *a, int16_t *b) {
     pa_assert(a);
     pa_assert(b);
 
-    oil_memcpy(b, a, (int) (sizeof(int16_t) * n));
+    memcpy(b, a, (int) (sizeof(int16_t) * n));
 }
 
 static void s16re_to_s16ne(unsigned n, const int16_t *a, int16_t *b) {
@@ -188,98 +184,130 @@ static void alaw_from_s16ne(unsigned n, const int16_t *a, uint8_t *b) {
         *b = st_13linear2alaw(*a >> 3);
 }
 
+static pa_convert_func_t to_float32ne_table[] = {
+    [PA_SAMPLE_U8]        = (pa_convert_func_t) u8_to_float32ne,
+    [PA_SAMPLE_ALAW]      = (pa_convert_func_t) alaw_to_float32ne,
+    [PA_SAMPLE_ULAW]      = (pa_convert_func_t) ulaw_to_float32ne,
+    [PA_SAMPLE_S16LE]     = (pa_convert_func_t) pa_sconv_s16le_to_float32ne,
+    [PA_SAMPLE_S16BE]     = (pa_convert_func_t) pa_sconv_s16be_to_float32ne,
+    [PA_SAMPLE_S32LE]     = (pa_convert_func_t) pa_sconv_s32le_to_float32ne,
+    [PA_SAMPLE_S32BE]     = (pa_convert_func_t) pa_sconv_s32be_to_float32ne,
+    [PA_SAMPLE_S24LE]     = (pa_convert_func_t) pa_sconv_s24le_to_float32ne,
+    [PA_SAMPLE_S24BE]     = (pa_convert_func_t) pa_sconv_s24be_to_float32ne,
+    [PA_SAMPLE_S24_32LE]  = (pa_convert_func_t) pa_sconv_s24_32le_to_float32ne,
+    [PA_SAMPLE_S24_32BE]  = (pa_convert_func_t) pa_sconv_s24_32be_to_float32ne,
+    [PA_SAMPLE_FLOAT32NE] = (pa_convert_func_t) float32ne_to_float32ne,
+    [PA_SAMPLE_FLOAT32RE] = (pa_convert_func_t) float32re_to_float32ne,
+};
+
 pa_convert_func_t pa_get_convert_to_float32ne_function(pa_sample_format_t f) {
 
-    static const pa_convert_func_t table[] = {
-        [PA_SAMPLE_U8]        = (pa_convert_func_t) u8_to_float32ne,
-        [PA_SAMPLE_ALAW]      = (pa_convert_func_t) alaw_to_float32ne,
-        [PA_SAMPLE_ULAW]      = (pa_convert_func_t) ulaw_to_float32ne,
-        [PA_SAMPLE_S16LE]     = (pa_convert_func_t) pa_sconv_s16le_to_float32ne,
-        [PA_SAMPLE_S16BE]     = (pa_convert_func_t) pa_sconv_s16be_to_float32ne,
-        [PA_SAMPLE_S32LE]     = (pa_convert_func_t) pa_sconv_s32le_to_float32ne,
-        [PA_SAMPLE_S32BE]     = (pa_convert_func_t) pa_sconv_s32be_to_float32ne,
-        [PA_SAMPLE_S24LE]     = (pa_convert_func_t) pa_sconv_s24le_to_float32ne,
-        [PA_SAMPLE_S24BE]     = (pa_convert_func_t) pa_sconv_s24be_to_float32ne,
-        [PA_SAMPLE_S24_32LE]  = (pa_convert_func_t) pa_sconv_s24_32le_to_float32ne,
-        [PA_SAMPLE_S24_32BE]  = (pa_convert_func_t) pa_sconv_s24_32be_to_float32ne,
-        [PA_SAMPLE_FLOAT32NE] = (pa_convert_func_t) float32ne_to_float32ne,
-        [PA_SAMPLE_FLOAT32RE] = (pa_convert_func_t) float32re_to_float32ne,
-    };
+    pa_assert(f >= 0);
+    pa_assert(f < PA_SAMPLE_MAX);
+
+    return to_float32ne_table[f];
+}
+
+void pa_set_convert_to_float32ne_function(pa_sample_format_t f, pa_convert_func_t func) {
 
     pa_assert(f >= 0);
     pa_assert(f < PA_SAMPLE_MAX);
 
-    return table[f];
+    to_float32ne_table[f] = func;
 }
 
+static pa_convert_func_t from_float32ne_table[] = {
+    [PA_SAMPLE_U8]        = (pa_convert_func_t) u8_from_float32ne,
+    [PA_SAMPLE_S16LE]     = (pa_convert_func_t) pa_sconv_s16le_from_float32ne,
+    [PA_SAMPLE_S16BE]     = (pa_convert_func_t) pa_sconv_s16be_from_float32ne,
+    [PA_SAMPLE_S32LE]     = (pa_convert_func_t) pa_sconv_s32le_from_float32ne,
+    [PA_SAMPLE_S32BE]     = (pa_convert_func_t) pa_sconv_s32be_from_float32ne,
+    [PA_SAMPLE_S24LE]     = (pa_convert_func_t) pa_sconv_s24le_from_float32ne,
+    [PA_SAMPLE_S24BE]     = (pa_convert_func_t) pa_sconv_s24be_from_float32ne,
+    [PA_SAMPLE_S24_32LE]  = (pa_convert_func_t) pa_sconv_s24_32le_from_float32ne,
+    [PA_SAMPLE_S24_32BE]  = (pa_convert_func_t) pa_sconv_s24_32be_from_float32ne,
+    [PA_SAMPLE_FLOAT32NE] = (pa_convert_func_t) float32ne_to_float32ne,
+    [PA_SAMPLE_FLOAT32RE] = (pa_convert_func_t) float32re_to_float32ne,
+    [PA_SAMPLE_ALAW]      = (pa_convert_func_t) alaw_from_float32ne,
+    [PA_SAMPLE_ULAW]      = (pa_convert_func_t) ulaw_from_float32ne
+};
+
 pa_convert_func_t pa_get_convert_from_float32ne_function(pa_sample_format_t f) {
 
-    static const pa_convert_func_t table[] = {
-        [PA_SAMPLE_U8]        = (pa_convert_func_t) u8_from_float32ne,
-        [PA_SAMPLE_S16LE]     = (pa_convert_func_t) pa_sconv_s16le_from_float32ne,
-        [PA_SAMPLE_S16BE]     = (pa_convert_func_t) pa_sconv_s16be_from_float32ne,
-        [PA_SAMPLE_S32LE]     = (pa_convert_func_t) pa_sconv_s32le_from_float32ne,
-        [PA_SAMPLE_S32BE]     = (pa_convert_func_t) pa_sconv_s32be_from_float32ne,
-        [PA_SAMPLE_S24LE]     = (pa_convert_func_t) pa_sconv_s24le_from_float32ne,
-        [PA_SAMPLE_S24BE]     = (pa_convert_func_t) pa_sconv_s24be_from_float32ne,
-        [PA_SAMPLE_S24_32LE]  = (pa_convert_func_t) pa_sconv_s24_32le_from_float32ne,
-        [PA_SAMPLE_S24_32BE]  = (pa_convert_func_t) pa_sconv_s24_32be_from_float32ne,
-        [PA_SAMPLE_FLOAT32NE] = (pa_convert_func_t) float32ne_to_float32ne,
-        [PA_SAMPLE_FLOAT32RE] = (pa_convert_func_t) float32re_to_float32ne,
-        [PA_SAMPLE_ALAW]      = (pa_convert_func_t) alaw_from_float32ne,
-        [PA_SAMPLE_ULAW]      = (pa_convert_func_t) ulaw_from_float32ne
-    };
+    pa_assert(f >= 0);
+    pa_assert(f < PA_SAMPLE_MAX);
+
+    return from_float32ne_table[f];
+}
+
+void pa_set_convert_from_float32ne_function(pa_sample_format_t f, pa_convert_func_t func) {
 
     pa_assert(f >= 0);
     pa_assert(f < PA_SAMPLE_MAX);
 
-    return table[f];
+    from_float32ne_table[f] = func;
 }
 
+static pa_convert_func_t to_s16ne_table[] = {
+    [PA_SAMPLE_U8]        = (pa_convert_func_t) u8_to_s16ne,
+    [PA_SAMPLE_S16NE]     = (pa_convert_func_t) s16ne_to_s16ne,
+    [PA_SAMPLE_S16RE]     = (pa_convert_func_t) s16re_to_s16ne,
+    [PA_SAMPLE_FLOAT32BE] = (pa_convert_func_t) pa_sconv_float32be_to_s16ne,
+    [PA_SAMPLE_FLOAT32LE] = (pa_convert_func_t) pa_sconv_float32le_to_s16ne,
+    [PA_SAMPLE_S32BE]     = (pa_convert_func_t) pa_sconv_s32be_to_s16ne,
+    [PA_SAMPLE_S32LE]     = (pa_convert_func_t) pa_sconv_s32le_to_s16ne,
+    [PA_SAMPLE_S24BE]     = (pa_convert_func_t) pa_sconv_s24be_to_s16ne,
+    [PA_SAMPLE_S24LE]     = (pa_convert_func_t) pa_sconv_s24le_to_s16ne,
+    [PA_SAMPLE_S24_32BE]  = (pa_convert_func_t) pa_sconv_s24_32be_to_s16ne,
+    [PA_SAMPLE_S24_32LE]  = (pa_convert_func_t) pa_sconv_s24_32le_to_s16ne,
+    [PA_SAMPLE_ALAW]      = (pa_convert_func_t) alaw_to_s16ne,
+    [PA_SAMPLE_ULAW]      = (pa_convert_func_t) ulaw_to_s16ne
+};
+
 pa_convert_func_t pa_get_convert_to_s16ne_function(pa_sample_format_t f) {
 
-    static const pa_convert_func_t table[] = {
-        [PA_SAMPLE_U8]        = (pa_convert_func_t) u8_to_s16ne,
-        [PA_SAMPLE_S16NE]     = (pa_convert_func_t) s16ne_to_s16ne,
-        [PA_SAMPLE_S16RE]     = (pa_convert_func_t) s16re_to_s16ne,
-        [PA_SAMPLE_FLOAT32BE] = (pa_convert_func_t) pa_sconv_float32be_to_s16ne,
-        [PA_SAMPLE_FLOAT32LE] = (pa_convert_func_t) pa_sconv_float32le_to_s16ne,
-        [PA_SAMPLE_S32BE]     = (pa_convert_func_t) pa_sconv_s32be_to_s16ne,
-        [PA_SAMPLE_S32LE]     = (pa_convert_func_t) pa_sconv_s32le_to_s16ne,
-        [PA_SAMPLE_S24BE]     = (pa_convert_func_t) pa_sconv_s24be_to_s16ne,
-        [PA_SAMPLE_S24LE]     = (pa_convert_func_t) pa_sconv_s24le_to_s16ne,
-        [PA_SAMPLE_S24_32BE]  = (pa_convert_func_t) pa_sconv_s24_32be_to_s16ne,
-        [PA_SAMPLE_S24_32LE]  = (pa_convert_func_t) pa_sconv_s24_32le_to_s16ne,
-        [PA_SAMPLE_ALAW]      = (pa_convert_func_t) alaw_to_s16ne,
-        [PA_SAMPLE_ULAW]      = (pa_convert_func_t) ulaw_to_s16ne
-    };
+    pa_assert(f >= 0);
+    pa_assert(f < PA_SAMPLE_MAX);
+
+    return to_s16ne_table[f];
+}
+
+void pa_set_convert_to_s16ne_function(pa_sample_format_t f, pa_convert_func_t func) {
 
     pa_assert(f >= 0);
     pa_assert(f < PA_SAMPLE_MAX);
 
-    return table[f];
+    to_s16ne_table[f] = func;
 }
 
+static pa_convert_func_t from_s16ne_table[] = {
+    [PA_SAMPLE_U8]        = (pa_convert_func_t) u8_from_s16ne,
+    [PA_SAMPLE_S16NE]     = (pa_convert_func_t) s16ne_to_s16ne,
+    [PA_SAMPLE_S16RE]     = (pa_convert_func_t) s16re_to_s16ne,
+    [PA_SAMPLE_FLOAT32BE] = (pa_convert_func_t) pa_sconv_float32be_from_s16ne,
+    [PA_SAMPLE_FLOAT32LE] = (pa_convert_func_t) pa_sconv_float32le_from_s16ne,
+    [PA_SAMPLE_S32BE]     = (pa_convert_func_t) pa_sconv_s32be_from_s16ne,
+    [PA_SAMPLE_S32LE]     = (pa_convert_func_t) pa_sconv_s32le_from_s16ne,
+    [PA_SAMPLE_S24BE]     = (pa_convert_func_t) pa_sconv_s24be_from_s16ne,
+    [PA_SAMPLE_S24LE]     = (pa_convert_func_t) pa_sconv_s24le_from_s16ne,
+    [PA_SAMPLE_S24_32BE]  = (pa_convert_func_t) pa_sconv_s24_32be_from_s16ne,
+    [PA_SAMPLE_S24_32LE]  = (pa_convert_func_t) pa_sconv_s24_32le_from_s16ne,
+    [PA_SAMPLE_ALAW]      = (pa_convert_func_t) alaw_from_s16ne,
+    [PA_SAMPLE_ULAW]      = (pa_convert_func_t) ulaw_from_s16ne,
+};
+
 pa_convert_func_t pa_get_convert_from_s16ne_function(pa_sample_format_t f) {
 
-    static const pa_convert_func_t table[] = {
-        [PA_SAMPLE_U8]        = (pa_convert_func_t) u8_from_s16ne,
-        [PA_SAMPLE_S16NE]     = (pa_convert_func_t) s16ne_to_s16ne,
-        [PA_SAMPLE_S16RE]     = (pa_convert_func_t) s16re_to_s16ne,
-        [PA_SAMPLE_FLOAT32BE] = (pa_convert_func_t) pa_sconv_float32be_from_s16ne,
-        [PA_SAMPLE_FLOAT32LE] = (pa_convert_func_t) pa_sconv_float32le_from_s16ne,
-        [PA_SAMPLE_S32BE]     = (pa_convert_func_t) pa_sconv_s32be_from_s16ne,
-        [PA_SAMPLE_S32LE]     = (pa_convert_func_t) pa_sconv_s32le_from_s16ne,
-        [PA_SAMPLE_S24BE]     = (pa_convert_func_t) pa_sconv_s24be_from_s16ne,
-        [PA_SAMPLE_S24LE]     = (pa_convert_func_t) pa_sconv_s24le_from_s16ne,
-        [PA_SAMPLE_S24_32BE]  = (pa_convert_func_t) pa_sconv_s24_32be_from_s16ne,
-        [PA_SAMPLE_S24_32LE]  = (pa_convert_func_t) pa_sconv_s24_32le_from_s16ne,
-        [PA_SAMPLE_ALAW]      = (pa_convert_func_t) alaw_from_s16ne,
-        [PA_SAMPLE_ULAW]      = (pa_convert_func_t) ulaw_from_s16ne,
-    };
+    pa_assert(f >= 0);
+    pa_assert(f < PA_SAMPLE_MAX);
+
+    return from_s16ne_table[f];
+}
+
+void pa_set_convert_from_s16ne_function(pa_sample_format_t f, pa_convert_func_t func) {
 
     pa_assert(f >= 0);
     pa_assert(f < PA_SAMPLE_MAX);
 
-    return table[f];
+    from_s16ne_table[f] = func;
 }
diff --git a/src/pulsecore/sconv.h b/src/pulsecore/sconv.h
index b00a16a4..cd937559 100644
--- a/src/pulsecore/sconv.h
+++ b/src/pulsecore/sconv.h
@@ -33,4 +33,10 @@ pa_convert_func_t pa_get_convert_from_float32ne_function(pa_sample_format_t f) P
 pa_convert_func_t pa_get_convert_to_s16ne_function(pa_sample_format_t f) PA_GCC_PURE;
 pa_convert_func_t pa_get_convert_from_s16ne_function(pa_sample_format_t f) PA_GCC_PURE;
 
+void pa_set_convert_to_float32ne_function(pa_sample_format_t f, pa_convert_func_t func);
+void pa_set_convert_from_float32ne_function(pa_sample_format_t f, pa_convert_func_t func);
+
+void pa_set_convert_to_s16ne_function(pa_sample_format_t f, pa_convert_func_t func);
+void pa_set_convert_from_s16ne_function(pa_sample_format_t f, pa_convert_func_t func);
+
 #endif
diff --git a/src/pulsecore/sink-input.c b/src/pulsecore/sink-input.c
index a29334f9..0ad95e6f 100644
--- a/src/pulsecore/sink-input.c
+++ b/src/pulsecore/sink-input.c
@@ -44,7 +44,7 @@
 #define MEMBLOCKQ_MAXLENGTH (32*1024*1024)
 #define CONVERT_BUFFER_LENGTH (PA_PAGE_SIZE)
 
-static PA_DEFINE_CHECK_TYPE(pa_sink_input, pa_msgobject);
+PA_DEFINE_PUBLIC_CLASS(pa_sink_input, pa_msgobject);
 
 static void sink_input_free(pa_object *o);
 static void set_real_ratio(pa_sink_input *i, const pa_cvolume *v);
@@ -126,6 +126,8 @@ static void reset_callbacks(pa_sink_input *i) {
     i->state_change = NULL;
     i->may_move_to = NULL;
     i->send_event = NULL;
+    i->volume_changed = NULL;
+    i->mute_changed = NULL;
 }
 
 /* Called from main context */
@@ -485,7 +487,10 @@ static void sink_input_free(pa_object *o) {
 
     pa_log_info("Freeing input %u \"%s\"", i->index, pa_strnull(pa_proplist_gets(i->proplist, PA_PROP_MEDIA_NAME)));
 
-    pa_assert(!i->thread_info.attached);
+    /* Side note: this function must be able to destruct properly any
+     * kind of sink input in any state, even those which are
+     * "half-moved" or are connected to sinks that have no asyncmsgq
+     * and are hence half-destructed themselves! */
 
     if (i->thread_info.render_memblockq)
         pa_memblockq_free(i->thread_info.render_memblockq);
@@ -968,7 +973,10 @@ void pa_sink_input_set_volume(pa_sink_input *i, const pa_cvolume *volume, pa_boo
         pa_assert_se(pa_asyncmsgq_send(i->sink->asyncmsgq, PA_MSGOBJECT(i), PA_SINK_INPUT_MESSAGE_SET_SOFT_VOLUME, NULL, 0, NULL) == 0);
     }
 
-    /* The virtual volume changed, let's tell people so */
+    /* The volume changed, let's tell people so */
+    if (i->volume_changed)
+        i->volume_changed(i);
+
     pa_subscription_post(i->core, PA_SUBSCRIPTION_EVENT_SINK_INPUT|PA_SUBSCRIPTION_EVENT_CHANGE, i->index);
 }
 
@@ -999,6 +1007,11 @@ void pa_sink_input_set_mute(pa_sink_input *i, pa_bool_t mute, pa_bool_t save) {
     i->save_muted = save;
 
     pa_assert_se(pa_asyncmsgq_send(i->sink->asyncmsgq, PA_MSGOBJECT(i), PA_SINK_INPUT_MESSAGE_SET_SOFT_MUTE, NULL, 0, NULL) == 0);
+
+    /* The mute status changed, let's tell people so */
+    if (i->mute_changed)
+        i->mute_changed(i);
+
     pa_subscription_post(i->core, PA_SUBSCRIPTION_EVENT_SINK_INPUT|PA_SUBSCRIPTION_EVENT_CHANGE, i->index);
 }
 
@@ -1263,6 +1276,10 @@ int pa_sink_input_finish_move(pa_sink_input *i, pa_sink *dest, pa_bool_t save) {
 
     /* Notify everyone */
     pa_hook_fire(&i->core->hooks[PA_CORE_HOOK_SINK_INPUT_MOVE_FINISH], i);
+
+    if (i->volume_changed)
+        i->volume_changed(i);
+
     pa_subscription_post(i->core, PA_SUBSCRIPTION_EVENT_SINK_INPUT|PA_SUBSCRIPTION_EVENT_CHANGE, i->index);
 
     return 0;
diff --git a/src/pulsecore/sink-input.h b/src/pulsecore/sink-input.h
index ea0f8c0e..fe6cf75c 100644
--- a/src/pulsecore/sink-input.h
+++ b/src/pulsecore/sink-input.h
@@ -192,8 +192,16 @@ struct pa_sink_input {
     pa_bool_t (*may_move_to) (pa_sink_input *i, pa_sink *s); /* may be NULL */
 
     /* If non-NULL this function is used to dispatch asynchronous
-     * control events. */
-    void (*send_event)(pa_sink_input *i, const char *event, pa_proplist* data);
+     * control events. Called from main context. */
+    void (*send_event)(pa_sink_input *i, const char *event, pa_proplist* data); /* may be NULL */
+
+    /* If non-NULL this function is called whenever the sink input
+     * volume changes. Called from main context */
+    void (*volume_changed)(pa_sink_input *i); /* may be NULL */
+
+    /* If non-NULL this function is called whenever the sink input
+     * mute status changes. Called from main context */
+    void (*mute_changed)(pa_sink_input *i); /* may be NULL */
 
     struct {
         pa_sink_input_state_t state;
@@ -227,7 +235,7 @@ struct pa_sink_input {
     void *userdata;
 };
 
-PA_DECLARE_CLASS(pa_sink_input);
+PA_DECLARE_PUBLIC_CLASS(pa_sink_input);
 #define PA_SINK_INPUT(o) pa_sink_input_cast(o)
 
 enum {
diff --git a/src/pulsecore/sink.c b/src/pulsecore/sink.c
index 1cce8e6b..5cec7747 100644
--- a/src/pulsecore/sink.c
+++ b/src/pulsecore/sink.c
@@ -52,7 +52,7 @@
 #define ABSOLUTE_MAX_LATENCY (10*PA_USEC_PER_SEC)
 #define DEFAULT_FIXED_LATENCY (250*PA_USEC_PER_MSEC)
 
-static PA_DEFINE_CHECK_TYPE(pa_sink, pa_msgobject);
+PA_DEFINE_PUBLIC_CLASS(pa_sink, pa_msgobject);
 
 static void sink_free(pa_object *s);
 
@@ -1380,9 +1380,14 @@ static void propagate_reference_volume(pa_sink *s) {
         pa_cvolume_remap(&remapped, &s->channel_map, &i->channel_map);
         pa_sw_cvolume_multiply(&i->volume, &remapped, &i->reference_ratio);
 
-        /* The reference volume changed, let's tell people so */
-        if (!pa_cvolume_equal(&old_volume, &i->volume))
+        /* The volume changed, let's tell people so */
+        if (!pa_cvolume_equal(&old_volume, &i->volume)) {
+
+            if (i->volume_changed)
+                i->volume_changed(i);
+
             pa_subscription_post(i->core, PA_SUBSCRIPTION_EVENT_SINK_INPUT|PA_SUBSCRIPTION_EVENT_CHANGE, i->index);
+        }
     }
 }
 
@@ -1522,8 +1527,13 @@ static void propagate_real_volume(pa_sink *s, const pa_cvolume *old_real_volume)
             pa_sw_cvolume_multiply(&i->volume, &remapped, &i->reference_ratio);
 
             /* Notify if something changed */
-            if (!pa_cvolume_equal(&old_volume, &i->volume))
+            if (!pa_cvolume_equal(&old_volume, &i->volume)) {
+
+                if (i->volume_changed)
+                    i->volume_changed(i);
+
                 pa_subscription_post(i->core, PA_SUBSCRIPTION_EVENT_SINK_INPUT|PA_SUBSCRIPTION_EVENT_CHANGE, i->index);
+            }
         }
     }
 
diff --git a/src/pulsecore/sink.h b/src/pulsecore/sink.h
index 936d1c2a..b5284b71 100644
--- a/src/pulsecore/sink.h
+++ b/src/pulsecore/sink.h
@@ -191,7 +191,7 @@ struct pa_sink {
     void *userdata;
 };
 
-PA_DECLARE_CLASS(pa_sink);
+PA_DECLARE_PUBLIC_CLASS(pa_sink);
 #define PA_SINK(s) (pa_sink_cast(s))
 
 typedef enum pa_sink_message {
diff --git a/src/pulsecore/sound-file-stream.c b/src/pulsecore/sound-file-stream.c
index 502e5c69..f41c53f3 100644
--- a/src/pulsecore/sound-file-stream.c
+++ b/src/pulsecore/sound-file-stream.c
@@ -64,9 +64,8 @@ enum {
     FILE_STREAM_MESSAGE_UNLINK
 };
 
-PA_DECLARE_CLASS(file_stream);
+PA_DEFINE_PRIVATE_CLASS(file_stream, pa_msgobject);
 #define FILE_STREAM(o) (file_stream_cast(o))
-static PA_DEFINE_CHECK_TYPE(file_stream, pa_msgobject);
 
 /* Called from main context */
 static void file_stream_unlink(file_stream *u) {
diff --git a/src/pulsecore/source-output.c b/src/pulsecore/source-output.c
index 3803a6cc..43733400 100644
--- a/src/pulsecore/source-output.c
+++ b/src/pulsecore/source-output.c
@@ -41,7 +41,7 @@
 
 #define MEMBLOCKQ_MAXLENGTH (32*1024*1024)
 
-static PA_DEFINE_CHECK_TYPE(pa_source_output, pa_msgobject);
+PA_DEFINE_PUBLIC_CLASS(pa_source_output, pa_msgobject);
 
 static void source_output_free(pa_object* mo);
 
@@ -359,8 +359,6 @@ static void source_output_free(pa_object* mo) {
 
     pa_log_info("Freeing output %u \"%s\"", o->index, pa_strnull(pa_proplist_gets(o->proplist, PA_PROP_MEDIA_NAME)));
 
-    pa_assert(!o->thread_info.attached);
-
     if (o->thread_info.delay_memblockq)
         pa_memblockq_free(o->thread_info.delay_memblockq);
 
diff --git a/src/pulsecore/source-output.h b/src/pulsecore/source-output.h
index a70a3fdb..aca9ddf2 100644
--- a/src/pulsecore/source-output.h
+++ b/src/pulsecore/source-output.h
@@ -182,7 +182,7 @@ struct pa_source_output {
     void *userdata;
 };
 
-PA_DECLARE_CLASS(pa_source_output);
+PA_DECLARE_PUBLIC_CLASS(pa_source_output);
 #define PA_SOURCE_OUTPUT(o) pa_source_output_cast(o)
 
 enum {
diff --git a/src/pulsecore/source.c b/src/pulsecore/source.c
index 8aa07f5e..3026654e 100644
--- a/src/pulsecore/source.c
+++ b/src/pulsecore/source.c
@@ -46,7 +46,7 @@
 #define ABSOLUTE_MAX_LATENCY (10*PA_USEC_PER_SEC)
 #define DEFAULT_FIXED_LATENCY (250*PA_USEC_PER_MSEC)
 
-static PA_DEFINE_CHECK_TYPE(pa_source, pa_msgobject);
+PA_DEFINE_PUBLIC_CLASS(pa_source, pa_msgobject);
 
 static void source_free(pa_object *o);
 
diff --git a/src/pulsecore/source.h b/src/pulsecore/source.h
index 7b3e4953..df3f99df 100644
--- a/src/pulsecore/source.h
+++ b/src/pulsecore/source.h
@@ -158,7 +158,7 @@ struct pa_source {
     void *userdata;
 };
 
-PA_DECLARE_CLASS(pa_source);
+PA_DECLARE_PUBLIC_CLASS(pa_source);
 #define PA_SOURCE(s) pa_source_cast(s)
 
 typedef enum pa_source_message {
diff --git a/src/pulsecore/svolume_arm.c b/src/pulsecore/svolume_arm.c
new file mode 100644
index 00000000..5bd1448f
--- /dev/null
+++ b/src/pulsecore/svolume_arm.c
@@ -0,0 +1,195 @@
+/***
+  This file is part of PulseAudio.
+
+  Copyright 2004-2006 Lennart Poettering
+  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk>
+
+  PulseAudio is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License as published
+  by the Free Software Foundation; either version 2.1 of the License,
+  or (at your option) any later version.
+
+  PulseAudio is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with PulseAudio; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+  USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <pulse/timeval.h>
+#include <pulsecore/random.h>
+#include <pulsecore/macro.h>
+#include <pulsecore/g711.h>
+#include <pulsecore/core-util.h>
+
+#include "cpu-arm.h"
+
+#include "sample-util.h"
+#include "endianmacros.h"
+
+#if defined (__arm__)
+
+#define MOD_INC() \
+    " subs  r0, r6, %2              \n\t" \
+    " addcs r0, %1                  \n\t" \
+    " movcs r6, r0                  \n\t"
+
+static void
+pa_volume_s16ne_arm (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+    int32_t *ve;
+
+    channels = PA_MAX (4U, channels);
+    ve = volumes + channels;
+
+    __asm__ __volatile__ (
+        " mov r6, %1                      \n\t"
+        " mov %3, %3, LSR #1              \n\t" /* length /= sizeof (int16_t) */
+        " tst %3, #1                      \n\t" /* check for odd samples */
+        " beq  2f                         \n\t"
+
+        "1:                               \n\t"
+        " ldr  r0, [r6], #4               \n\t" /* odd samples volumes */
+        " ldrh r2, [%0]                   \n\t"
+
+        " smulwb r0, r0, r2               \n\t"
+        " ssat r0, #16, r0                \n\t"
+
+        " strh r0, [%0], #2               \n\t"
+
+        MOD_INC()
+
+        "2:                               \n\t"
+        " mov %3, %3, LSR #1              \n\t"
+        " tst %3, #1                      \n\t" /* check for odd samples */
+        " beq  4f                         \n\t"
+
+        "3:                               \n\t"
+        " ldrd r2, [r6], #8               \n\t" /* 2 samples at a time */
+        " ldr  r0, [%0]                   \n\t"
+
+        " smulwt r2, r2, r0               \n\t"
+        " smulwb r3, r3, r0               \n\t"
+
+        " ssat r2, #16, r2                \n\t"
+        " ssat r3, #16, r3                \n\t"
+
+        " pkhbt r0, r3, r2, LSL #16       \n\t"
+        " str  r0, [%0], #4               \n\t"
+
+        MOD_INC()
+
+        "4:                               \n\t"
+        " movs %3, %3, LSR #1             \n\t"
+        " beq  6f                         \n\t"
+
+        "5:                               \n\t"
+        " ldrd r2, [r6], #8               \n\t" /* 4 samples at a time */
+        " ldrd r4, [r6], #8               \n\t"
+        " ldrd r0, [%0]                   \n\t"
+
+        " smulwt r2, r2, r0               \n\t"
+        " smulwb r3, r3, r0               \n\t"
+        " smulwt r4, r4, r1               \n\t"
+        " smulwb r5, r5, r1               \n\t"
+
+        " ssat r2, #16, r2                \n\t"
+        " ssat r3, #16, r3                \n\t"
+        " ssat r4, #16, r4                \n\t"
+        " ssat r5, #16, r5                \n\t"
+
+        " pkhbt r0, r3, r2, LSL #16       \n\t"
+        " pkhbt r1, r5, r4, LSL #16       \n\t"
+        " strd  r0, [%0], #8              \n\t"
+
+        MOD_INC()
+
+        " subs %3, %3, #1                 \n\t"
+        " bne 5b                          \n\t"
+        "6:                               \n\t"
+
+        : "+r" (samples), "+r" (volumes), "+r" (ve), "+r" (length)
+        :
+        : "r6", "r5", "r4", "r3", "r2", "r1", "r0", "cc"
+    );
+}
+
+#undef RUN_TEST
+
+#ifdef RUN_TEST
+#define CHANNELS 2
+#define SAMPLES 1023
+#define TIMES 1000
+#define PADDING 16
+
+static void run_test (void) {
+    int16_t samples[SAMPLES];
+    int16_t samples_ref[SAMPLES];
+    int16_t samples_orig[SAMPLES];
+    int32_t volumes[CHANNELS + PADDING];
+    int i, j, padding;
+    pa_do_volume_func_t func;
+    pa_usec_t start, stop;
+
+    func = pa_get_volume_func (PA_SAMPLE_S16NE);
+
+    printf ("checking ARM %zd\n", sizeof (samples));
+
+    pa_random (samples, sizeof (samples));
+    memcpy (samples_ref, samples, sizeof (samples));
+    memcpy (samples_orig, samples, sizeof (samples));
+
+    for (i = 0; i < CHANNELS; i++)
+        volumes[i] = rand() >> 1;
+    for (padding = 0; padding < PADDING; padding++, i++)
+        volumes[i] = volumes[padding];
+
+    func (samples_ref, volumes, CHANNELS, sizeof (samples));
+    pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
+    for (i = 0; i < SAMPLES; i++) {
+        if (samples[i] != samples_ref[i]) {
+            printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
+                  samples_orig[i], volumes[i % CHANNELS]);
+        }
+    }
+
+    start = pa_rtclock_now();
+    for (j = 0; j < TIMES; j++) {
+        memcpy (samples, samples_orig, sizeof (samples));
+        pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples));
+    }
+    stop = pa_rtclock_now();
+    pa_log_info("ARM: %llu usec.", (long long unsigned int) (stop - start));
+
+    start = pa_rtclock_now();
+    for (j = 0; j < TIMES; j++) {
+        memcpy (samples_ref, samples_orig, sizeof (samples));
+        func (samples_ref, volumes, CHANNELS, sizeof (samples));
+    }
+    stop = pa_rtclock_now();
+    pa_log_info("ref: %llu usec.", (long long unsigned int) (stop - start));
+}
+#endif
+
+#endif /* defined (__arm__) */
+
+
+void pa_volume_func_init_arm (pa_cpu_arm_flag_t flags) {
+#if defined (__arm__)
+    pa_log_info("Initialising ARM optimized functions.");
+
+#ifdef RUN_TEST
+    run_test ();
+#endif
+
+    pa_set_volume_func (PA_SAMPLE_S16NE,     (pa_do_volume_func_t) pa_volume_s16ne_arm);
+#endif /* defined (__arm__) */
+}
diff --git a/src/pulsecore/svolume_c.c b/src/pulsecore/svolume_c.c
new file mode 100644
index 00000000..5fc052b8
--- /dev/null
+++ b/src/pulsecore/svolume_c.c
@@ -0,0 +1,335 @@
+/***
+  This file is part of PulseAudio.
+
+  Copyright 2004-2006 Lennart Poettering
+  Copyright 2006 Pierre Ossman <ossman@cendio.se> for Cendio AB
+
+  PulseAudio is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License as published
+  by the Free Software Foundation; either version 2.1 of the License,
+  or (at your option) any later version.
+
+  PulseAudio is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with PulseAudio; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+  USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+
+#include <pulsecore/macro.h>
+#include <pulsecore/g711.h>
+#include <pulsecore/core-util.h>
+
+#include "sample-util.h"
+#include "endianmacros.h"
+
+static void
+pa_volume_u8_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+    unsigned channel;
+
+    for (channel = 0; length; length--) {
+        int32_t t, hi, lo;
+
+        hi = volumes[channel] >> 16;
+        lo = volumes[channel] & 0xFFFF;
+
+        t = (int32_t) *samples - 0x80;
+        t = ((t * lo) >> 16) + (t * hi);
+        t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
+        *samples++ = (uint8_t) (t + 0x80);
+
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
+
+static void
+pa_volume_alaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+    unsigned channel;
+
+    for (channel = 0; length; length--) {
+        int32_t t, hi, lo;
+
+        hi = volumes[channel] >> 16;
+        lo = volumes[channel] & 0xFFFF;
+
+        t = (int32_t) st_alaw2linear16(*samples);
+        t = ((t * lo) >> 16) + (t * hi);
+        t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+        *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
+
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
+
+static void
+pa_volume_ulaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+    unsigned channel;
+
+    for (channel = 0; length; length--) {
+        int32_t t, hi, lo;
+
+        hi = volumes[channel] >> 16;
+        lo = volumes[channel] & 0xFFFF;
+
+        t = (int32_t) st_ulaw2linear16(*samples);
+        t = ((t * lo) >> 16) + (t * hi);
+        t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+        *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
+
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
+
+static void
+pa_volume_s16ne_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+    unsigned channel;
+
+    length /= sizeof (int16_t);
+
+    for (channel = 0; length; length--) {
+        int32_t t, hi, lo;
+
+        /* Multiplying the 32bit volume factor with the 16bit
+         * sample might result in an 48bit value. We want to
+         * do without 64 bit integers and hence do the
+         * multiplication independantly for the HI and LO part
+         * of the volume. */
+
+        hi = volumes[channel] >> 16;
+        lo = volumes[channel] & 0xFFFF;
+
+        t = (int32_t)(*samples);
+        t = ((t * lo) >> 16) + (t * hi);
+        t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+        *samples++ = (int16_t) t;
+
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
+
+static void
+pa_volume_s16re_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+    unsigned channel;
+
+    length /= sizeof (int16_t);
+
+    for (channel = 0; length; length--) {
+        int32_t t, hi, lo;
+
+        hi = volumes[channel] >> 16;
+        lo = volumes[channel] & 0xFFFF;
+
+        t = (int32_t) PA_INT16_SWAP(*samples);
+        t = ((t * lo) >> 16) + (t * hi);
+        t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
+        *samples++ = PA_INT16_SWAP((int16_t) t);
+
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
+
+static void
+pa_volume_float32ne_c (float *samples, float *volumes, unsigned channels, unsigned length)
+{
+    unsigned channel;
+
+    length /= sizeof (float);
+
+    for (channel = 0; length; length--) {
+        *samples++ *= volumes[channel];
+
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
+
+static void
+pa_volume_float32re_c (float *samples, float *volumes, unsigned channels, unsigned length)
+{
+    unsigned channel;
+
+    length /= sizeof (float);
+
+    for (channel = 0; length; length--) {
+        float t;
+
+        t = PA_FLOAT32_SWAP(*samples);
+        t *= volumes[channel];
+        *samples++ = PA_FLOAT32_SWAP(t);
+
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
+
+static void
+pa_volume_s32ne_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+    unsigned channel;
+
+    length /= sizeof (int32_t);
+
+    for (channel = 0; length; length--) {
+        int64_t t;
+
+        t = (int64_t)(*samples);
+        t = (t * volumes[channel]) >> 16;
+        t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+        *samples++ = (int32_t) t;
+
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
+
+static void
+pa_volume_s32re_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+    unsigned channel;
+
+    length /= sizeof (int32_t);
+
+    for (channel = 0; length; length--) {
+        int64_t t;
+
+        t = (int64_t) PA_INT32_SWAP(*samples);
+        t = (t * volumes[channel]) >> 16;
+        t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+        *samples++ = PA_INT32_SWAP((int32_t) t);
+
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
+
+static void
+pa_volume_s24ne_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+    unsigned channel;
+    uint8_t *e;
+
+    e = samples + length;
+
+    for (channel = 0; samples < e; samples += 3) {
+        int64_t t;
+
+        t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8));
+        t = (t * volumes[channel]) >> 16;
+        t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+        PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8);
+
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
+
+static void
+pa_volume_s24re_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+    unsigned channel;
+    uint8_t *e;
+
+    e = samples + length;
+
+    for (channel = 0; samples < e; samples += 3) {
+        int64_t t;
+
+        t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8));
+        t = (t * volumes[channel]) >> 16;
+        t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+        PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8);
+
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
+
+static void
+pa_volume_s24_32ne_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+    unsigned channel;
+
+    length /= sizeof (uint32_t);
+
+    for (channel = 0; length; length--) {
+        int64_t t;
+
+        t = (int64_t) ((int32_t) (*samples << 8));
+        t = (t * volumes[channel]) >> 16;
+        t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+        *samples++ = ((uint32_t) ((int32_t) t)) >> 8;
+
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
+
+static void
+pa_volume_s24_32re_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+    unsigned channel;
+
+    length /= sizeof (uint32_t);
+
+    for (channel = 0; length; length--) {
+        int64_t t;
+
+        t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8));
+        t = (t * volumes[channel]) >> 16;
+        t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
+        *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8);
+
+        if (PA_UNLIKELY(++channel >= channels))
+            channel = 0;
+    }
+}
+
+static pa_do_volume_func_t do_volume_table[] =
+{
+    [PA_SAMPLE_U8]        = (pa_do_volume_func_t) pa_volume_u8_c,
+    [PA_SAMPLE_ALAW]      = (pa_do_volume_func_t) pa_volume_alaw_c,
+    [PA_SAMPLE_ULAW]      = (pa_do_volume_func_t) pa_volume_ulaw_c,
+    [PA_SAMPLE_S16NE]     = (pa_do_volume_func_t) pa_volume_s16ne_c,
+    [PA_SAMPLE_S16RE]     = (pa_do_volume_func_t) pa_volume_s16re_c,
+    [PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c,
+    [PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c,
+    [PA_SAMPLE_S32NE]     = (pa_do_volume_func_t) pa_volume_s32ne_c,
+    [PA_SAMPLE_S32RE]     = (pa_do_volume_func_t) pa_volume_s32re_c,
+    [PA_SAMPLE_S24NE]     = (pa_do_volume_func_t) pa_volume_s24ne_c,
+    [PA_SAMPLE_S24RE]     = (pa_do_volume_func_t) pa_volume_s24re_c,
+    [PA_SAMPLE_S24_32NE]  = (pa_do_volume_func_t) pa_volume_s24_32ne_c,
+    [PA_SAMPLE_S24_32RE]  = (pa_do_volume_func_t) pa_volume_s24_32re_c
+};
+
+pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f) {
+    pa_assert(f >= 0);
+    pa_assert(f < PA_SAMPLE_MAX);
+
+    return do_volume_table[f];
+}
+
+void pa_set_volume_func(pa_sample_format_t f, pa_do_volume_func_t func) {
+    pa_assert(f >= 0);
+    pa_assert(f < PA_SAMPLE_MAX);
+
+    do_volume_table[f] = func;
+}
diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c
new file mode 100644
index 00000000..8510b0c4
--- /dev/null
+++ b/src/pulsecore/svolume_mmx.c
@@ -0,0 +1,313 @@
+/***
+  This file is part of PulseAudio.
+
+  Copyright 2004-2006 Lennart Poettering
+  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk>
+
+  PulseAudio is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License as published
+  by the Free Software Foundation; either version 2.1 of the License,
+  or (at your option) any later version.
+
+  PulseAudio is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with PulseAudio; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+  USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <pulse/timeval.h>
+#include <pulsecore/random.h>
+#include <pulsecore/macro.h>
+#include <pulsecore/g711.h>
+#include <pulsecore/core-util.h>
+
+#include "cpu-x86.h"
+
+#include "sample-util.h"
+#include "endianmacros.h"
+
+#if defined (__i386__) || defined (__amd64__)
+/* in s: 2 int16_t samples
+ * in v: 2 int32_t volumes, fixed point 16:16
+ * out s: contains scaled and clamped int16_t samples.
+ *
+ * We calculate the high 32 bits of a 32x16 multiply which we then
+ * clamp to 16 bits. The calulcation is:
+ *
+ *  vl = (v & 0xffff)
+ *  vh = (v >> 16)
+ *  s = ((s * vl) >> 16) + (s * vh);
+ *
+ * For the first multiply we have to do a sign correction as we need to
+ * multiply a signed int with an unsigned int. Hacker's delight 8-3 gives a
+ * simple formula to correct the sign of the high word after the signed
+ * multiply.
+ */
+#define VOLUME_32x16(s,v)                  /* .. |   vh  |   vl  | */                   \
+      " pxor  %%mm4, %%mm4           \n\t" /* .. |    0  |    0  | */                   \
+      " punpcklwd %%mm4, "#s"        \n\t" /* .. |    0  |   p0  | */                   \
+      " pcmpgtw "#v", %%mm4          \n\t" /* .. |    0  | s(vl) | */                   \
+      " pand "#s", %%mm4             \n\t" /* .. |    0  |  (p0) |  (vl >> 15) & p */   \
+      " movq %%mm6, %%mm5            \n\t" /* .. |  ffff |   0   | */                   \
+      " pand "#v", %%mm5             \n\t" /* .. |   vh  |   0   | */                   \
+      " por %%mm5, %%mm4             \n\t" /* .. |   vh  |  (p0) | */                   \
+      " pmulhw "#s", "#v"            \n\t" /* .. |    0  | vl*p0 | */                   \
+      " paddw %%mm4, "#v"            \n\t" /* .. |   vh  | vl*p0 | vh + sign correct */ \
+      " pslld $16, "#s"              \n\t" /* .. |   p0  |    0  | */                   \
+      " por %%mm7, "#s"              \n\t" /* .. |   p0  |    1  | */                   \
+      " pmaddwd "#s", "#v"           \n\t" /* .. |    p0 * v0    | */                   \
+      " packssdw "#v", "#v"          \n\t" /* .. | p1*v1 | p0*v0 | */
+
+/* approximately advances %3 = (%3 + a) % b. This function requires that
+ * a <= b. */
+#define MOD_ADD(a,b) \
+      " add "#a", %3                 \n\t" \
+      " mov %3, %4                   \n\t" \
+      " sub "#b", %4                 \n\t" \
+      " cmovae %4, %3                \n\t"
+
+/* swap 16 bits */
+#define SWAP_16(s) \
+      " movq "#s", %%mm4             \n\t" /* .. |  h  l |  */ \
+      " psrlw $8, %%mm4              \n\t" /* .. |  0  h |  */ \
+      " psllw $8, "#s"               \n\t" /* .. |  l  0 |  */ \
+      " por %%mm4, "#s"              \n\t" /* .. |  l  h |  */
+
+/* swap 2 registers 16 bits for better pairing */
+#define SWAP_16_2(s1,s2) \
+      " movq "#s1", %%mm4            \n\t" /* .. |  h  l |  */ \
+      " movq "#s2", %%mm5            \n\t"                     \
+      " psrlw $8, %%mm4              \n\t" /* .. |  0  h |  */ \
+      " psrlw $8, %%mm5              \n\t"                     \
+      " psllw $8, "#s1"              \n\t" /* .. |  l  0 |  */ \
+      " psllw $8, "#s2"              \n\t"                     \
+      " por %%mm4, "#s1"             \n\t" /* .. |  l  h |  */ \
+      " por %%mm5, "#s2"             \n\t"
+
+static void
+pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+    pa_reg_x86 channel, temp;
+
+    /* the max number of samples we process at a time, this is also the max amount
+     * we overread the volume array, which should have enough padding. */
+    channels = PA_MAX (4U, channels);
+
+    __asm__ __volatile__ (
+        " xor %3, %3                    \n\t"
+        " sar $1, %2                    \n\t" /* length /= sizeof (int16_t) */
+        " pcmpeqw %%mm6, %%mm6          \n\t" /* .. |  ffff |  ffff | */
+        " pcmpeqw %%mm7, %%mm7          \n\t" /* .. |  ffff |  ffff | */
+        " pslld  $16, %%mm6             \n\t" /* .. |  ffff |     0 | */
+        " psrld  $31, %%mm7             \n\t" /* .. |     0 |     1 | */
+
+        " test $1, %2                   \n\t" /* check for odd samples */
+        " je 2f                         \n\t"
+
+        " movd (%1, %3, 4), %%mm0       \n\t" /* |  v0h  |  v0l  | */
+        " movw (%0), %w4                \n\t" /*     ..  |  p0   | */
+        " movd %4, %%mm1                \n\t"
+        VOLUME_32x16 (%%mm1, %%mm0)
+        " movd %%mm0, %4                \n\t" /*     ..  | p0*v0 | */
+        " movw %w4, (%0)                \n\t"
+        " add $2, %0                    \n\t"
+        MOD_ADD ($1, %5)
+
+        "2:                             \n\t"
+        " sar $1, %2                    \n\t" /* prepare for processing 2 samples at a time */
+        " test $1, %2                   \n\t" /* check for odd samples */
+        " je 4f                         \n\t"
+
+        "3:                             \n\t" /* do samples in groups of 2 */
+        " movq (%1, %3, 4), %%mm0       \n\t" /* |  v1h  |  v1l  |  v0h  |  v0l  | */
+        " movd (%0), %%mm1              \n\t" /*              .. |   p1  |  p0   | */
+        VOLUME_32x16 (%%mm1, %%mm0)
+        " movd %%mm0, (%0)              \n\t" /*              .. | p1*v1 | p0*v0 | */
+        " add $4, %0                    \n\t"
+        MOD_ADD ($2, %5)
+
+        "4:                             \n\t"
+        " sar $1, %2                    \n\t" /* prepare for processing 4 samples at a time */
+        " cmp $0, %2                    \n\t"
+        " je 6f                         \n\t"
+
+        "5:                             \n\t" /* do samples in groups of 4 */
+        " movq (%1, %3, 4), %%mm0       \n\t" /* |  v1h  |  v1l  |  v0h  |  v0l  | */
+        " movq 8(%1, %3, 4), %%mm2      \n\t" /* |  v3h  |  v3l  |  v2h  |  v2l  | */
+        " movd (%0), %%mm1              \n\t" /*              .. |   p1  |  p0   | */
+        " movd 4(%0), %%mm3             \n\t" /*              .. |   p3  |  p2   | */
+        VOLUME_32x16 (%%mm1, %%mm0)
+        VOLUME_32x16 (%%mm3, %%mm2)
+        " movd %%mm0, (%0)              \n\t" /*              .. | p1*v1 | p0*v0 | */
+        " movd %%mm2, 4(%0)             \n\t" /*              .. | p3*v3 | p2*v2 | */
+        " add $8, %0                    \n\t"
+        MOD_ADD ($4, %5)
+        " dec %2                        \n\t"
+        " jne 5b                        \n\t"
+
+        "6:                             \n\t"
+        " emms                          \n\t"
+
+        : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp)
+        : "r" ((pa_reg_x86)channels)
+        : "cc"
+    );
+}
+
+static void
+pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+    pa_reg_x86 channel, temp;
+
+    /* the max number of samples we process at a time, this is also the max amount
+     * we overread the volume array, which should have enough padding. */
+    channels = PA_MAX (4U, channels);
+
+    __asm__ __volatile__ (
+        " xor %3, %3                    \n\t"
+        " sar $1, %2                    \n\t" /* length /= sizeof (int16_t) */
+        " pcmpeqw %%mm6, %%mm6          \n\t" /* .. |  ffff |  ffff | */
+        " pcmpeqw %%mm7, %%mm7          \n\t" /* .. |  ffff |  ffff | */
+        " pslld  $16, %%mm6             \n\t" /* .. |  ffff |     0 | */
+        " psrld  $31, %%mm7             \n\t" /* .. |     0 |     1 | */
+
+        " test $1, %2                   \n\t" /* check for odd samples */
+        " je 2f                         \n\t"
+
+        " movd (%1, %3, 4), %%mm0       \n\t" /* |  v0h  |  v0l  | */
+        " movw (%0), %w4                \n\t" /*     ..  |  p0   | */
+        " rorw $8, %w4                  \n\t"
+        " movd %4, %%mm1                \n\t"
+        VOLUME_32x16 (%%mm1, %%mm0)
+        " movd %%mm0, %4                \n\t" /*     ..  | p0*v0 | */
+        " rorw $8, %w4                  \n\t"
+        " movw %w4, (%0)                \n\t"
+        " add $2, %0                    \n\t"
+        MOD_ADD ($1, %5)
+
+        "2:                             \n\t"
+        " sar $1, %2                    \n\t" /* prepare for processing 2 samples at a time */
+        " test $1, %2                   \n\t" /* check for odd samples */
+        " je 4f                         \n\t"
+
+        "3:                             \n\t" /* do samples in groups of 2 */
+        " movq (%1, %3, 4), %%mm0       \n\t" /* |  v1h  |  v1l  |  v0h  |  v0l  | */
+        " movd (%0), %%mm1              \n\t" /*              .. |   p1  |  p0   | */
+        SWAP_16 (%%mm1)
+        VOLUME_32x16 (%%mm1, %%mm0)
+        SWAP_16 (%%mm0)
+        " movd %%mm0, (%0)              \n\t" /*              .. | p1*v1 | p0*v0 | */
+        " add $4, %0                    \n\t"
+        MOD_ADD ($2, %5)
+
+        "4:                             \n\t"
+        " sar $1, %2                    \n\t" /* prepare for processing 4 samples at a time */
+        " cmp $0, %2                    \n\t"
+        " je 6f                         \n\t"
+
+        "5:                             \n\t" /* do samples in groups of 4 */
+        " movq (%1, %3, 4), %%mm0       \n\t" /* |  v1h  |  v1l  |  v0h  |  v0l  | */
+        " movq 8(%1, %3, 4), %%mm2      \n\t" /* |  v3h  |  v3l  |  v2h  |  v2l  | */
+        " movd (%0), %%mm1              \n\t" /*              .. |   p1  |  p0   | */
+        " movd 4(%0), %%mm3             \n\t" /*              .. |   p3  |  p2   | */
+        SWAP_16_2 (%%mm1, %%mm3)
+        VOLUME_32x16 (%%mm1, %%mm0)
+        VOLUME_32x16 (%%mm3, %%mm2)
+        SWAP_16_2 (%%mm0, %%mm2)
+        " movd %%mm0, (%0)              \n\t" /*              .. | p1*v1 | p0*v0 | */
+        " movd %%mm2, 4(%0)             \n\t" /*              .. | p3*v3 | p2*v2 | */
+        " add $8, %0                    \n\t"
+        MOD_ADD ($4, %5)
+        " dec %2                        \n\t"
+        " jne 5b                        \n\t"
+
+        "6:                             \n\t"
+        " emms                          \n\t"
+
+        : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp)
+        : "r" ((pa_reg_x86)channels)
+        : "cc"
+    );
+}
+
+#undef RUN_TEST
+
+#ifdef RUN_TEST
+#define CHANNELS 2
+#define SAMPLES 1021
+#define TIMES 1000
+#define PADDING 16
+
+static void run_test (void) {
+    int16_t samples[SAMPLES];
+    int16_t samples_ref[SAMPLES];
+    int16_t samples_orig[SAMPLES];
+    int32_t volumes[CHANNELS + PADDING];
+    int i, j, padding;
+    pa_do_volume_func_t func;
+    pa_usec_t start, stop;
+
+    func = pa_get_volume_func (PA_SAMPLE_S16NE);
+
+    printf ("checking MMX %zd\n", sizeof (samples));
+
+    pa_random (samples, sizeof (samples));
+    memcpy (samples_ref, samples, sizeof (samples));
+    memcpy (samples_orig, samples, sizeof (samples));
+
+    for (i = 0; i < CHANNELS; i++)
+        volumes[i] = rand() >> 1;
+    for (padding = 0; padding < PADDING; padding++, i++)
+        volumes[i] = volumes[padding];
+
+    func (samples_ref, volumes, CHANNELS, sizeof (samples));
+    pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
+    for (i = 0; i < SAMPLES; i++) {
+        if (samples[i] != samples_ref[i]) {
+            printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
+                  samples_orig[i], volumes[i % CHANNELS]);
+        }
+    }
+
+    start = pa_rtclock_now();
+    for (j = 0; j < TIMES; j++) {
+        memcpy (samples, samples_orig, sizeof (samples));
+        pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples));
+    }
+    stop = pa_rtclock_now();
+    pa_log_info("MMX: %llu usec.", (long long unsigned int)(stop - start));
+
+    start = pa_rtclock_now();
+    for (j = 0; j < TIMES; j++) {
+        memcpy (samples_ref, samples_orig, sizeof (samples));
+        func (samples_ref, volumes, CHANNELS, sizeof (samples));
+    }
+    stop = pa_rtclock_now();
+    pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
+}
+#endif
+
+#endif /* defined (__i386__) || defined (__amd64__) */
+
+
+void pa_volume_func_init_mmx (pa_cpu_x86_flag_t flags) {
+#if defined (__i386__) || defined (__amd64__)
+    pa_log_info("Initialising MMX optimized functions.");
+
+#ifdef RUN_TEST
+    run_test ();
+#endif
+
+    pa_set_volume_func (PA_SAMPLE_S16NE,     (pa_do_volume_func_t) pa_volume_s16ne_mmx);
+    pa_set_volume_func (PA_SAMPLE_S16RE,     (pa_do_volume_func_t) pa_volume_s16re_mmx);
+#endif /* defined (__i386__) || defined (__amd64__) */
+}
diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c
new file mode 100644
index 00000000..54af4a57
--- /dev/null
+++ b/src/pulsecore/svolume_sse.c
@@ -0,0 +1,314 @@
+/***
+  This file is part of PulseAudio.
+
+  Copyright 2004-2006 Lennart Poettering
+  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk>
+
+  PulseAudio is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License as published
+  by the Free Software Foundation; either version 2.1 of the License,
+  or (at your option) any later version.
+
+  PulseAudio is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with PulseAudio; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+  USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <pulse/timeval.h>
+#include <pulsecore/random.h>
+#include <pulsecore/macro.h>
+#include <pulsecore/g711.h>
+#include <pulsecore/core-util.h>
+
+#include "cpu-x86.h"
+
+#include "sample-util.h"
+#include "endianmacros.h"
+
+#if defined (__i386__) || defined (__amd64__)
+
+#define VOLUME_32x16(s,v)                  /* .. |   vh  |   vl  | */                   \
+      " pxor %%xmm4, %%xmm4          \n\t" /* .. |    0  |    0  | */                   \
+      " punpcklwd %%xmm4, "#s"       \n\t" /* .. |    0  |   p0  | */                   \
+      " pcmpgtw "#s", %%xmm4         \n\t" /* .. |    0  | s(p0) | */                   \
+      " pand "#v", %%xmm4            \n\t" /* .. |    0  |  (vl) | */                   \
+      " movdqa "#s", %%xmm5          \n\t"                                              \
+      " pmulhuw "#v", "#s"           \n\t" /* .. |    0  | vl*p0 | */                   \
+      " psubd %%xmm4, "#s"           \n\t" /* .. |    0  | vl*p0 | + sign correct */    \
+      " psrld $16, "#v"              \n\t" /* .. |   p0  |    0  | */                   \
+      " pmaddwd %%xmm5, "#v"         \n\t" /* .. |    p0 * vh    | */                   \
+      " paddd "#s", "#v"             \n\t" /* .. |    p0 * v0    | */                   \
+      " packssdw "#v", "#v"          \n\t" /* .. | p1*v1 | p0*v0 | */
+
+#define MOD_ADD(a,b) \
+      " add "#a", %3                 \n\t" /* channel += inc           */ \
+      " mov %3, %4                   \n\t"                                \
+      " sub "#b", %4                 \n\t" /* tmp = channel - channels */ \
+      " cmovae %4, %3                \n\t" /* if (tmp >= 0) channel = tmp  */
+
+/* swap 16 bits */
+#define SWAP_16(s) \
+      " movdqa "#s", %%xmm4          \n\t" /* .. |  h  l |  */ \
+      " psrlw $8, %%xmm4             \n\t" /* .. |  0  h |  */ \
+      " psllw $8, "#s"               \n\t" /* .. |  l  0 |  */ \
+      " por %%xmm4, "#s"             \n\t" /* .. |  l  h |  */
+
+/* swap 2 registers 16 bits for better pairing */
+#define SWAP_16_2(s1,s2) \
+      " movdqa "#s1", %%xmm4         \n\t" /* .. |  h  l |  */ \
+      " movdqa "#s2", %%xmm5         \n\t"                     \
+      " psrlw $8, %%xmm4             \n\t" /* .. |  0  h |  */ \
+      " psrlw $8, %%xmm5             \n\t"                     \
+      " psllw $8, "#s1"              \n\t" /* .. |  l  0 |  */ \
+      " psllw $8, "#s2"              \n\t"                     \
+      " por %%xmm4, "#s1"            \n\t" /* .. |  l  h |  */ \
+      " por %%xmm5, "#s2"            \n\t"
+
+static void
+pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+    pa_reg_x86 channel, temp;
+
+    /* the max number of samples we process at a time, this is also the max amount
+     * we overread the volume array, which should have enough padding. */
+    channels = PA_MAX (8U, channels);
+
+    __asm__ __volatile__ (
+        " xor %3, %3                    \n\t"
+        " sar $1, %2                    \n\t" /* length /= sizeof (int16_t) */
+
+        " test $1, %2                   \n\t" /* check for odd samples */
+        " je 2f                         \n\t"
+
+        " movd (%1, %3, 4), %%xmm0      \n\t" /* |  v0h  |  v0l  | */
+        " movw (%0), %w4                \n\t" /*     ..  |   p0  | */
+        " movd %4, %%xmm1               \n\t"
+        VOLUME_32x16 (%%xmm1, %%xmm0)
+        " movd %%xmm0, %4               \n\t" /*     ..  | p0*v0 | */
+        " movw %w4, (%0)                \n\t"
+        " add $2, %0                    \n\t"
+        MOD_ADD ($1, %5)
+
+        "2:                             \n\t"
+        " sar $1, %2                    \n\t" /* prepare for processing 2 samples at a time */
+        " test $1, %2                   \n\t"
+        " je 4f                         \n\t"
+
+        "3:                             \n\t" /* do samples in groups of 2 */
+        " movq (%1, %3, 4), %%xmm0      \n\t" /* |  v1h  |  v1l  |  v0h  |  v0l  | */
+        " movd (%0), %%xmm1             \n\t" /*              .. |   p1  |  p0   | */
+        VOLUME_32x16 (%%xmm1, %%xmm0)
+        " movd %%xmm0, (%0)             \n\t" /*              .. | p1*v1 | p0*v0 | */
+        " add $4, %0                    \n\t"
+        MOD_ADD ($2, %5)
+
+        "4:                             \n\t"
+        " sar $1, %2                    \n\t" /* prepare for processing 4 samples at a time */
+        " test $1, %2                   \n\t"
+        " je 6f                         \n\t"
+
+        /* FIXME, we can do aligned access of the volume values if we can guarantee
+         * that the array is 16 bytes aligned, we probably have to do the odd values
+         * after this then. */
+        "5:                             \n\t" /* do samples in groups of 4 */
+        " movdqu (%1, %3, 4), %%xmm0    \n\t" /* |  v3h  |  v3l  ..  v0h  |  v0l  | */
+        " movq (%0), %%xmm1             \n\t" /*              .. |   p3  ..  p0   | */
+        VOLUME_32x16 (%%xmm1, %%xmm0)
+        " movq %%xmm0, (%0)             \n\t" /*              .. | p3*v3 .. p0*v0 | */
+        " add $8, %0                    \n\t"
+        MOD_ADD ($4, %5)
+
+        "6:                             \n\t"
+        " sar $1, %2                    \n\t" /* prepare for processing 8 samples at a time */
+        " cmp $0, %2                    \n\t"
+        " je 8f                         \n\t"
+
+        "7:                             \n\t" /* do samples in groups of 8 */
+        " movdqu (%1, %3, 4), %%xmm0    \n\t" /* |  v3h  |  v3l  ..  v0h  |  v0l  | */
+        " movdqu 16(%1, %3, 4), %%xmm2  \n\t" /* |  v7h  |  v7l  ..  v4h  |  v4l  | */
+        " movq (%0), %%xmm1             \n\t" /*              .. |   p3  ..  p0   | */
+        " movq 8(%0), %%xmm3            \n\t" /*              .. |   p7  ..  p4   | */
+        VOLUME_32x16 (%%xmm1, %%xmm0)
+        VOLUME_32x16 (%%xmm3, %%xmm2)
+        " movq %%xmm0, (%0)             \n\t" /*              .. | p3*v3 .. p0*v0 | */
+        " movq %%xmm2, 8(%0)            \n\t" /*              .. | p7*v7 .. p4*v4 | */
+        " add $16, %0                   \n\t"
+        MOD_ADD ($8, %5)
+        " dec %2                        \n\t"
+        " jne 7b                        \n\t"
+        "8:                             \n\t"
+
+        : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp)
+        : "r" ((pa_reg_x86)channels)
+        : "cc"
+    );
+}
+
+static void
+pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length)
+{
+    pa_reg_x86 channel, temp;
+
+    /* the max number of samples we process at a time, this is also the max amount
+     * we overread the volume array, which should have enough padding. */
+    channels = PA_MAX (8U, channels);
+
+    __asm__ __volatile__ (
+        " xor %3, %3                    \n\t"
+        " sar $1, %2                    \n\t" /* length /= sizeof (int16_t) */
+
+        " test $1, %2                   \n\t" /* check for odd samples */
+        " je 2f                         \n\t"
+
+        " movd (%1, %3, 4), %%xmm0      \n\t" /* |  v0h  |  v0l  | */
+        " movw (%0), %w4                \n\t" /*     ..  |   p0  | */
+        " rorw $8, %w4                  \n\t"
+        " movd %4, %%xmm1               \n\t"
+        VOLUME_32x16 (%%xmm1, %%xmm0)
+        " movd %%xmm0, %4               \n\t" /*     ..  | p0*v0 | */
+        " rorw $8, %w4                  \n\t"
+        " movw %w4, (%0)                \n\t"
+        " add $2, %0                    \n\t"
+        MOD_ADD ($1, %5)
+
+        "2:                             \n\t"
+        " sar $1, %2                    \n\t" /* prepare for processing 2 samples at a time */
+        " test $1, %2                   \n\t"
+        " je 4f                         \n\t"
+
+        "3:                             \n\t" /* do samples in groups of 2 */
+        " movq (%1, %3, 4), %%xmm0      \n\t" /* |  v1h  |  v1l  |  v0h  |  v0l  | */
+        " movd (%0), %%xmm1             \n\t" /*              .. |   p1  |  p0   | */
+        SWAP_16 (%%xmm1)
+        VOLUME_32x16 (%%xmm1, %%xmm0)
+        SWAP_16 (%%xmm0)
+        " movd %%xmm0, (%0)             \n\t" /*              .. | p1*v1 | p0*v0 | */
+        " add $4, %0                    \n\t"
+        MOD_ADD ($2, %5)
+
+        "4:                             \n\t"
+        " sar $1, %2                    \n\t" /* prepare for processing 4 samples at a time */
+        " test $1, %2                   \n\t"
+        " je 6f                         \n\t"
+
+        /* FIXME, we can do aligned access of the volume values if we can guarantee
+         * that the array is 16 bytes aligned, we probably have to do the odd values
+         * after this then. */
+        "5:                             \n\t" /* do samples in groups of 4 */
+        " movdqu (%1, %3, 4), %%xmm0    \n\t" /* |  v3h  |  v3l  ..  v0h  |  v0l  | */
+        " movq (%0), %%xmm1             \n\t" /*              .. |   p3  ..  p0   | */
+        SWAP_16 (%%xmm1)
+        VOLUME_32x16 (%%xmm1, %%xmm0)
+        SWAP_16 (%%xmm0)
+        " movq %%xmm0, (%0)             \n\t" /*              .. | p3*v3 .. p0*v0 | */
+        " add $8, %0                    \n\t"
+        MOD_ADD ($4, %5)
+
+        "6:                             \n\t"
+        " sar $1, %2                    \n\t" /* prepare for processing 8 samples at a time */
+        " cmp $0, %2                    \n\t"
+        " je 8f                         \n\t"
+
+        "7:                             \n\t" /* do samples in groups of 8 */
+        " movdqu (%1, %3, 4), %%xmm0    \n\t" /* |  v3h  |  v3l  ..  v0h  |  v0l  | */
+        " movdqu 16(%1, %3, 4), %%xmm2  \n\t" /* |  v7h  |  v7l  ..  v4h  |  v4l  | */
+        " movq (%0), %%xmm1             \n\t" /*              .. |   p3  ..  p0   | */
+        " movq 8(%0), %%xmm3            \n\t" /*              .. |   p7  ..  p4   | */
+        SWAP_16_2 (%%xmm1, %%xmm3)
+        VOLUME_32x16 (%%xmm1, %%xmm0)
+        VOLUME_32x16 (%%xmm3, %%xmm2)
+        SWAP_16_2 (%%xmm0, %%xmm2)
+        " movq %%xmm0, (%0)             \n\t" /*              .. | p3*v3 .. p0*v0 | */
+        " movq %%xmm2, 8(%0)            \n\t" /*              .. | p7*v7 .. p4*v4 | */
+        " add $16, %0                   \n\t"
+        MOD_ADD ($8, %5)
+        " dec %2                        \n\t"
+        " jne 7b                        \n\t"
+        "8:                             \n\t"
+
+        : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp)
+        : "r" ((pa_reg_x86)channels)
+        : "cc"
+    );
+}
+
+#undef RUN_TEST
+
+#ifdef RUN_TEST
+#define CHANNELS 2
+#define SAMPLES 1021
+#define TIMES 1000
+#define PADDING 16
+
+static void run_test (void) {
+    int16_t samples[SAMPLES];
+    int16_t samples_ref[SAMPLES];
+    int16_t samples_orig[SAMPLES];
+    int32_t volumes[CHANNELS + PADDING];
+    int i, j, padding;
+    pa_do_volume_func_t func;
+    pa_usec_t start, stop;
+
+    func = pa_get_volume_func (PA_SAMPLE_S16NE);
+
+    printf ("checking SSE %zd\n", sizeof (samples));
+
+    pa_random (samples, sizeof (samples));
+    memcpy (samples_ref, samples, sizeof (samples));
+    memcpy (samples_orig, samples, sizeof (samples));
+
+    for (i = 0; i < CHANNELS; i++)
+        volumes[i] = rand() >> 1;
+    for (padding = 0; padding < PADDING; padding++, i++)
+        volumes[i] = volumes[padding];
+
+    func (samples_ref, volumes, CHANNELS, sizeof (samples));
+    pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
+    for (i = 0; i < SAMPLES; i++) {
+        if (samples[i] != samples_ref[i]) {
+            printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i],
+                      samples_orig[i], volumes[i % CHANNELS]);
+        }
+    }
+
+    start = pa_rtclock_now();
+    for (j = 0; j < TIMES; j++) {
+        memcpy (samples, samples_orig, sizeof (samples));
+        pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples));
+    }
+    stop = pa_rtclock_now();
+    pa_log_info("SSE: %llu usec.", (long long unsigned int)(stop - start));
+
+    start = pa_rtclock_now();
+    for (j = 0; j < TIMES; j++) {
+        memcpy (samples_ref, samples_orig, sizeof (samples));
+        func (samples_ref, volumes, CHANNELS, sizeof (samples));
+    }
+    stop = pa_rtclock_now();
+    pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start));
+}
+#endif
+#endif /* defined (__i386__) || defined (__amd64__) */
+
+void pa_volume_func_init_sse (pa_cpu_x86_flag_t flags) {
+#if defined (__i386__) || defined (__amd64__)
+    pa_log_info("Initialising SSE optimized functions.");
+
+#ifdef RUN_TEST
+    run_test ();
+#endif
+
+    pa_set_volume_func (PA_SAMPLE_S16NE,     (pa_do_volume_func_t) pa_volume_s16ne_sse);
+    pa_set_volume_func (PA_SAMPLE_S16RE,     (pa_do_volume_func_t) pa_volume_s16re_sse);
+#endif /* defined (__i386__) || defined (__amd64__) */
+}
diff --git a/src/pulsecore/usergroup.c b/src/pulsecore/usergroup.c
new file mode 100644
index 00000000..71b13bca
--- /dev/null
+++ b/src/pulsecore/usergroup.c
@@ -0,0 +1,372 @@
+/***
+  This file is part of PulseAudio.
+
+  Copyright 2009 Ted Percival
+
+  PulseAudio is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License as
+  published by the Free Software Foundation; either version 2.1 of the
+  License, or (at your option) any later version.
+
+  PulseAudio is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with PulseAudio; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+  USA.
+***/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <sys/types.h>
+#include <errno.h>
+
+#ifdef HAVE_PWD_H
+#include <pwd.h>
+#endif
+
+#ifdef HAVE_GRP_H
+#include <grp.h>
+#endif
+
+#include <pulse/xmalloc.h>
+#include <pulsecore/macro.h>
+
+#include "usergroup.h"
+
+#ifdef HAVE_GRP_H
+
+/* Returns a suitable starting size for a getgrnam_r() or getgrgid_r() buffer,
+   plus the size of a struct group.
+ */
+static size_t starting_getgr_buflen(void) {
+    size_t full_size;
+    long n;
+#ifdef _SC_GETGR_R_SIZE_MAX
+    n = sysconf(_SC_GETGR_R_SIZE_MAX);
+#else
+    n = -1;
+#endif
+    if (n <= 0)
+        n = 512;
+
+    full_size = (size_t) n + sizeof(struct group);
+
+    if (full_size < (size_t) n) /* check for integer overflow */
+        return (size_t) n;
+
+    return full_size;
+}
+
+/* Returns a suitable starting size for a getpwnam_r() or getpwuid_r() buffer,
+   plus the size of a struct passwd.
+ */
+static size_t starting_getpw_buflen(void) {
+    long n;
+    size_t full_size;
+
+#ifdef _SC_GETPW_R_SIZE_MAX
+    n = sysconf(_SC_GETPW_R_SIZE_MAX);
+#else
+    n = -1;
+#endif
+    if (n <= 0)
+        n = 512;
+
+    full_size = (size_t) n + sizeof(struct passwd);
+
+    if (full_size < (size_t) n) /* check for integer overflow */
+        return (size_t) n;
+
+    return full_size;
+}
+
+/* Given a memory allocation (*bufptr) and its length (*buflenptr),
+   double the size of the allocation, updating the given buffer and length
+   arguments. This function should be used in conjunction with the pa_*alloc
+   and pa_xfree functions.
+
+   Unlike realloc(), this function does *not* retain the original buffer's
+   contents.
+
+   Returns 0 on success, nonzero on error. The error cause is indicated by
+   errno.
+ */
+static int expand_buffer_trashcontents(void **bufptr, size_t *buflenptr) {
+    size_t newlen;
+
+    if (!bufptr || !*bufptr || !buflenptr) {
+        errno = EINVAL;
+        return -1;
+    }
+
+    newlen = *buflenptr * 2;
+
+    if (newlen < *buflenptr) {
+        errno = EOVERFLOW;
+        return -1;
+    }
+
+    /* Don't bother retaining memory contents; free & alloc anew */
+    pa_xfree(*bufptr);
+
+    *bufptr = pa_xmalloc(newlen);
+    *buflenptr = newlen;
+
+    return 0;
+}
+
+#ifdef HAVE_GETGRGID_R
+/* Thread-safe getgrgid() replacement.
+   Returned value should be freed using pa_getgrgid_free() when the caller is
+   finished with the returned group data.
+
+   API is the same as getgrgid(), errors are indicated by a NULL return;
+   consult errno for the error cause (zero it before calling).
+ */
+struct group *pa_getgrgid_malloc(gid_t gid) {
+    size_t buflen, getgr_buflen;
+    int err;
+    void *buf;
+    void *getgr_buf;
+    struct group *result = NULL;
+
+    buflen = starting_getgr_buflen();
+    buf = pa_xmalloc(buflen);
+
+    getgr_buflen = buflen - sizeof(struct group);
+    getgr_buf = (char *)buf + sizeof(struct group);
+
+    while ((err = getgrgid_r(gid, (struct group *)buf, getgr_buf,
+                    getgr_buflen, &result)) == ERANGE)
+    {
+        if (expand_buffer_trashcontents(&buf, &buflen))
+            break;
+
+        getgr_buflen = buflen - sizeof(struct group);
+        getgr_buf = (char *)buf + sizeof(struct group);
+    }
+
+    if (err || !result) {
+        result = NULL;
+        if (buf) {
+            pa_xfree(buf);
+            buf = NULL;
+        }
+    }
+
+    pa_assert(result == buf || result == NULL);
+
+    return result;
+}
+
+void pa_getgrgid_free(struct group *grp) {
+    pa_xfree(grp);
+}
+
+#else /* !HAVE_GETGRGID_R */
+
+struct group *pa_getgrgid_malloc(gid_t gid) {
+    return getgrgid(gid);
+}
+
+void pa_getgrgid_free(struct group *grp) {
+    /* nothing */
+    return;
+}
+
+#endif /* !HAVE_GETGRGID_R */
+
+#ifdef HAVE_GETGRNAM_R
+/* Thread-safe getgrnam() function.
+   Returned value should be freed using pa_getgrnam_free() when the caller is
+   finished with the returned group data.
+
+   API is the same as getgrnam(), errors are indicated by a NULL return;
+   consult errno for the error cause (zero it before calling).
+ */
+struct group *pa_getgrnam_malloc(const char *name) {
+    size_t buflen, getgr_buflen;
+    int err;
+    void *buf;
+    void *getgr_buf;
+    struct group *result = NULL;
+
+    buflen = starting_getgr_buflen();
+    buf = pa_xmalloc(buflen);
+
+    getgr_buflen = buflen - sizeof(struct group);
+    getgr_buf = (char *)buf + sizeof(struct group);
+
+    while ((err = getgrnam_r(name, (struct group *)buf, getgr_buf,
+                    getgr_buflen, &result)) == ERANGE)
+    {
+        if (expand_buffer_trashcontents(&buf, &buflen))
+            break;
+
+        getgr_buflen = buflen - sizeof(struct group);
+        getgr_buf = (char *)buf + sizeof(struct group);
+    }
+
+    if (err || !result) {
+        result = NULL;
+        if (buf) {
+            pa_xfree(buf);
+            buf = NULL;
+        }
+    }
+
+    pa_assert(result == buf || result == NULL);
+
+    return result;
+}
+
+void pa_getgrnam_free(struct group *group) {
+    pa_xfree(group);
+}
+
+#else /* !HAVE_GETGRNAM_R */
+
+struct group *pa_getgrnam_malloc(const char *name) {
+    return getgrnam(name);
+}
+
+void pa_getgrnam_free(struct group *group) {
+    /* nothing */
+    return;
+}
+
+#endif /* HAVE_GETGRNAM_R */
+
+#endif /* HAVE_GRP_H */
+
+#ifdef HAVE_PWD_H
+
+#ifdef HAVE_GETPWNAM_R
+/* Thread-safe getpwnam() function.
+   Returned value should be freed using pa_getpwnam_free() when the caller is
+   finished with the returned passwd data.
+
+   API is the same as getpwnam(), errors are indicated by a NULL return;
+   consult errno for the error cause (zero it before calling).
+ */
+struct passwd *pa_getpwnam_malloc(const char *name) {
+    size_t buflen, getpw_buflen;
+    int err;
+    void *buf;
+    void *getpw_buf;
+    struct passwd *result = NULL;
+
+    buflen = starting_getpw_buflen();
+    buf = pa_xmalloc(buflen);
+
+    getpw_buflen = buflen - sizeof(struct passwd);
+    getpw_buf = (char *)buf + sizeof(struct passwd);
+
+    while ((err = getpwnam_r(name, (struct passwd *)buf, getpw_buf,
+                    getpw_buflen, &result)) == ERANGE)
+    {
+        if (expand_buffer_trashcontents(&buf, &buflen))
+            break;
+
+        getpw_buflen = buflen - sizeof(struct passwd);
+        getpw_buf = (char *)buf + sizeof(struct passwd);
+    }
+
+    if (err || !result) {
+        result = NULL;
+        if (buf) {
+            pa_xfree(buf);
+            buf = NULL;
+        }
+    }
+
+    pa_assert(result == buf || result == NULL);
+
+    return result;
+}
+
+void pa_getpwnam_free(struct passwd *passwd) {
+    pa_xfree(passwd);
+}
+
+#else /* !HAVE_GETPWNAM_R */
+
+struct passwd *pa_getpwnam_malloc(const char *name) {
+    return getpwnam(name);
+}
+
+void pa_getpwnam_free(struct passwd *passwd) {
+    /* nothing */
+    return;
+}
+
+#endif /* !HAVE_GETPWNAM_R */
+
+#ifdef HAVE_GETPWUID_R
+/* Thread-safe getpwuid() function.
+   Returned value should be freed using pa_getpwuid_free() when the caller is
+   finished with the returned group data.
+
+   API is the same as getpwuid(), errors are indicated by a NULL return;
+   consult errno for the error cause (zero it before calling).
+ */
+struct passwd *pa_getpwuid_malloc(uid_t uid) {
+    size_t buflen, getpw_buflen;
+    int err;
+    void *buf;
+    void *getpw_buf;
+    struct passwd *result = NULL;
+
+    buflen = starting_getpw_buflen();
+    buf = pa_xmalloc(buflen);
+
+    getpw_buflen = buflen - sizeof(struct passwd);
+    getpw_buf = (char *)buf + sizeof(struct passwd);
+
+    while ((err = getpwuid_r(uid, (struct passwd *)buf, getpw_buf,
+                    getpw_buflen, &result)) == ERANGE)
+    {
+        if (expand_buffer_trashcontents(&buf, &buflen))
+            break;
+
+        getpw_buflen = buflen - sizeof(struct passwd);
+        getpw_buf = (char *)buf + sizeof(struct passwd);
+    }
+
+    if (err || !result) {
+        result = NULL;
+        if (buf) {
+            pa_xfree(buf);
+            buf = NULL;
+        }
+    }
+
+    pa_assert(result == buf || result == NULL);
+
+    return result;
+}
+
+void pa_getpwuid_free(struct passwd *passwd) {
+    pa_xfree(passwd);
+}
+
+#else /* !HAVE_GETPWUID_R */
+
+struct passwd *pa_getpwuid_malloc(uid_t uid) {
+    return getpwuid(uid);
+}
+
+void pa_getpwuid_free(struct passwd *passwd) {
+    /* nothing */
+    return;
+}
+
+#endif /* !HAVE_GETPWUID_R */
+
+#endif /* HAVE_PWD_H */
diff --git a/src/pulsecore/usergroup.h b/src/pulsecore/usergroup.h
new file mode 100644
index 00000000..1c091638
--- /dev/null
+++ b/src/pulsecore/usergroup.h
@@ -0,0 +1,51 @@
+#ifndef foousergrouphfoo
+#define foousergrouphfoo
+
+/***
+  This file is part of PulseAudio.
+
+  Copyright 2009 Ted Percival
+
+  PulseAudio is free software; you can redistribute it and/or modify
+  it under the terms of the GNU Lesser General Public License as
+  published by the Free Software Foundation; either version 2.1 of the
+  License, or (at your option) any later version.
+
+  PulseAudio is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public
+  License along with PulseAudio; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+  USA.
+***/
+
+#include <sys/types.h>
+
+#ifndef PACKAGE
+#error "Please include config.h before including this file!"
+#endif
+
+#ifdef HAVE_GRP_H
+
+struct group *pa_getgrgid_malloc(gid_t gid);
+void pa_getgrgid_free(struct group *grp);
+
+struct group *pa_getgrnam_malloc(const char *name);
+void pa_getgrnam_free(struct group *group);
+
+#endif /* HAVE_GRP_H */
+
+#ifdef HAVE_PWD_H
+
+struct passwd *pa_getpwuid_malloc(uid_t uid);
+void pa_getpwuid_free(struct passwd *passwd);
+
+struct passwd *pa_getpwnam_malloc(const char *name);
+void pa_getpwnam_free(struct passwd *passwd);
+
+#endif /* HAVE_PWD_H */
+
+#endif /* foousergrouphfoo */