diff options
| author | Lennart Poettering <lennart@poettering.net> | 2009-08-23 00:06:35 +0200 | 
|---|---|---|
| committer | Lennart Poettering <lennart@poettering.net> | 2009-08-23 00:06:35 +0200 | 
| commit | ab5ac06ac76c3afbbd99bce2840329dd74756a73 (patch) | |
| tree | 5735f1e62502d0706a0ed7631cb365cab281303a | |
| parent | d6fb8d10819bebc1cee203de7330cceeafde9fed (diff) | |
| parent | 6076cef2092391d8b46aa84f86857cffebce4583 (diff) | |
Merge commit 'wtay/optimize'
| -rw-r--r-- | configure.ac | 6 | ||||
| -rw-r--r-- | src/Makefile.am | 5 | ||||
| -rw-r--r-- | src/daemon/main.c | 9 | ||||
| -rw-r--r-- | src/modules/alsa/alsa-sink.c | 26 | ||||
| -rw-r--r-- | src/pulse/sample.c | 49 | ||||
| -rw-r--r-- | src/pulsecore/cpu-arm.c | 140 | ||||
| -rw-r--r-- | src/pulsecore/cpu-arm.h | 42 | ||||
| -rw-r--r-- | src/pulsecore/cpu-x86.c | 122 | ||||
| -rw-r--r-- | src/pulsecore/cpu-x86.h | 68 | ||||
| -rw-r--r-- | src/pulsecore/remap.c | 204 | ||||
| -rw-r--r-- | src/pulsecore/remap.h | 48 | ||||
| -rw-r--r-- | src/pulsecore/remap_mmx.c | 148 | ||||
| -rw-r--r-- | src/pulsecore/resampler.c | 229 | ||||
| -rw-r--r-- | src/pulsecore/sample-util.c | 396 | ||||
| -rw-r--r-- | src/pulsecore/sample-util.h | 5 | ||||
| -rw-r--r-- | src/pulsecore/sconv-s16le.c | 42 | ||||
| -rw-r--r-- | src/pulsecore/sconv.c | 188 | ||||
| -rw-r--r-- | src/pulsecore/sconv.h | 6 | ||||
| -rw-r--r-- | src/pulsecore/svolume_arm.c | 195 | ||||
| -rw-r--r-- | src/pulsecore/svolume_c.c | 335 | ||||
| -rw-r--r-- | src/pulsecore/svolume_mmx.c | 313 | ||||
| -rw-r--r-- | src/pulsecore/svolume_sse.c | 314 | ||||
| -rw-r--r-- | src/tests/envelope-test.c | 3 | ||||
| -rw-r--r-- | src/tests/mix-test.c | 3 | ||||
| -rw-r--r-- | src/tests/remix-test.c | 3 | ||||
| -rw-r--r-- | src/tests/resampler-test.c | 3 | 
26 files changed, 2255 insertions, 647 deletions
diff --git a/configure.ac b/configure.ac index 05312d39..40455e10 100644 --- a/configure.ac +++ b/configure.ac @@ -941,12 +941,6 @@ AC_SUBST(AVAHI_LIBS)  AC_SUBST(HAVE_AVAHI)  AM_CONDITIONAL([HAVE_AVAHI], [test "x$HAVE_AVAHI" = x1]) -### LIBOIL #### - -PKG_CHECK_MODULES(LIBOIL, [ liboil-0.3 >= 0.3.0 ]) -AC_SUBST(LIBOIL_CFLAGS) -AC_SUBST(LIBOIL_LIBS) -  ### JACK (optional) ####  AC_ARG_ENABLE([jack], diff --git a/src/Makefile.am b/src/Makefile.am index 73c0db5b..2fd9a734 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -831,9 +831,14 @@ libpulsecore_@PA_MAJORMINORMICRO@_la_SOURCES = \  		pulsecore/object.c pulsecore/object.h \  		pulsecore/play-memblockq.c pulsecore/play-memblockq.h \  		pulsecore/play-memchunk.c pulsecore/play-memchunk.h \ +		pulsecore/remap.c pulsecore/remap.h \ +		pulsecore/remap_mmx.c \  		pulsecore/resampler.c pulsecore/resampler.h \  		pulsecore/rtpoll.c pulsecore/rtpoll.h \  		pulsecore/sample-util.c pulsecore/sample-util.h \ +		pulsecore/cpu-arm.c pulsecore/cpu-x86.c \ +		pulsecore/svolume_c.c pulsecore/svolume_arm.c\ +		pulsecore/svolume_mmx.c pulsecore/svolume_sse.c \  		pulsecore/sconv-s16be.c pulsecore/sconv-s16be.h \  		pulsecore/sconv-s16le.c pulsecore/sconv-s16le.h \  		pulsecore/sconv.c pulsecore/sconv.h \ diff --git a/src/daemon/main.c b/src/daemon/main.c index 72984590..b1d1109a 100644 --- a/src/daemon/main.c +++ b/src/daemon/main.c @@ -39,8 +39,6 @@  #include <sys/types.h>  #include <sys/stat.h> -#include <liboil/liboil.h> -  #ifdef HAVE_SYS_MMAN_H  #include <sys/mman.h>  #endif @@ -95,6 +93,8 @@  #ifdef HAVE_DBUS  #include <pulsecore/dbus-shared.h>  #endif +#include <pulsecore/cpu-arm.h> +#include <pulsecore/cpu-x86.h>  #include "cmdline.h"  #include "cpulimit.h" @@ -823,6 +823,9 @@ int main(int argc, char *argv[]) {      pa_memtrap_install(); +    pa_cpu_init_x86(); +    pa_cpu_init_arm(); +      pa_assert_se(mainloop = pa_mainloop_new());      if (!(c = pa_core_new(pa_mainloop_get_api(mainloop), !conf->disable_shm, conf->shm_size))) { @@ -862,8 +865,6 @@ int main(int argc, char *argv[]) {      win32_timer = pa_mainloop_get_api(mainloop)->rtclock_time_new(pa_mainloop_get_api(mainloop), pa_gettimeofday(&win32_tv), message_cb, NULL);  #endif -    oil_init(); -      if (!conf->no_cpu_limit)          pa_assert_se(pa_cpu_limit_init(pa_mainloop_get_api(mainloop)) == 0); diff --git a/src/modules/alsa/alsa-sink.c b/src/modules/alsa/alsa-sink.c index e3707ae7..c3694729 100644 --- a/src/modules/alsa/alsa-sink.c +++ b/src/modules/alsa/alsa-sink.c @@ -68,6 +68,9 @@  #define TSCHED_MIN_SLEEP_USEC (10*PA_USEC_PER_MSEC)               /* 10ms -- Sleep at least 10ms on each iteration */  #define TSCHED_MIN_WAKEUP_USEC (4*PA_USEC_PER_MSEC)               /* 4ms  -- Wakeup at least this long before the buffer runs empty*/ +#define SMOOTHER_MIN_INTERVAL (2*PA_USEC_PER_MSEC)                /* 2ms -- min smoother update interval */ +#define SMOOTHER_MAX_INTERVAL (200*PA_USEC_PER_MSEC)              /* 200ms -- max smoother update inteval */ +  #define VOLUME_ACCURACY (PA_VOLUME_NORM/100)  /* don't require volume adjustments to be perfectly correct. don't necessarily extend granularity in software unless the differences get greater than this level */  struct userdata { @@ -115,6 +118,8 @@ struct userdata {      pa_smoother *smoother;      uint64_t write_count;      uint64_t since_start; +    pa_usec_t smoother_interval; +    pa_usec_t last_smoother_update;      pa_reserve_wrapper *reserve;      pa_hook_slot *reserve_slot; @@ -723,17 +728,27 @@ static void update_smoother(struct userdata *u) {          now1 = pa_timespec_load(&htstamp);      } +    /* Hmm, if the timestamp is 0, then it wasn't set and we take the current time */ +    if (now1 <= 0) +        now1 = pa_rtclock_now(); + +    /* check if the time since the last update is bigger than the interval */ +    if (u->last_smoother_update > 0) { +        if (u->last_smoother_update + u->smoother_interval > now1) +            return; +    } +      position = (int64_t) u->write_count - ((int64_t) delay * (int64_t) u->frame_size);      if (PA_UNLIKELY(position < 0))          position = 0; -    /* Hmm, if the timestamp is 0, then it wasn't set and we take the current time */ -    if (now1 <= 0) -        now1 = pa_rtclock_now(); -      now2 = pa_bytes_to_usec((uint64_t) position, &u->sink->sample_spec); +    u->last_smoother_update = now1; +    /* exponentially increase the update interval up to the MAX limit */ +    u->smoother_interval = PA_MIN (u->smoother_interval * 2, SMOOTHER_MAX_INTERVAL); +      pa_smoother_put(u->smoother, now1, now2);  } @@ -906,6 +921,8 @@ static int unsuspend(struct userdata *u) {      u->write_count = 0;      pa_smoother_reset(u->smoother, pa_rtclock_now(), TRUE); +    u->smoother_interval = SMOOTHER_MIN_INTERVAL; +    u->last_smoother_update = 0;      u->first = TRUE;      u->since_start = 0; @@ -1622,6 +1639,7 @@ pa_sink *pa_alsa_sink_new(pa_module *m, pa_modargs *ma, const char*driver, pa_ca              5,              pa_rtclock_now(),              TRUE); +    u->smoother_interval = SMOOTHER_MIN_INTERVAL;      dev_id = pa_modargs_get_value(              ma, "device_id", diff --git a/src/pulse/sample.c b/src/pulse/sample.c index d5d38eda..9698d8a5 100644 --- a/src/pulse/sample.c +++ b/src/pulse/sample.c @@ -36,28 +36,27 @@  #include "sample.h" -size_t pa_sample_size_of_format(pa_sample_format_t f) { - -    static const size_t table[] = { -        [PA_SAMPLE_U8] = 1, -        [PA_SAMPLE_ULAW] = 1, -        [PA_SAMPLE_ALAW] = 1, -        [PA_SAMPLE_S16LE] = 2, -        [PA_SAMPLE_S16BE] = 2, -        [PA_SAMPLE_FLOAT32LE] = 4, -        [PA_SAMPLE_FLOAT32BE] = 4, -        [PA_SAMPLE_S32LE] = 4, -        [PA_SAMPLE_S32BE] = 4, -        [PA_SAMPLE_S24LE] = 3, -        [PA_SAMPLE_S24BE] = 3, -        [PA_SAMPLE_S24_32LE] = 4, -        [PA_SAMPLE_S24_32BE] = 4 -    }; +static const size_t size_table[] = { +    [PA_SAMPLE_U8] = 1, +    [PA_SAMPLE_ULAW] = 1, +    [PA_SAMPLE_ALAW] = 1, +    [PA_SAMPLE_S16LE] = 2, +    [PA_SAMPLE_S16BE] = 2, +    [PA_SAMPLE_FLOAT32LE] = 4, +    [PA_SAMPLE_FLOAT32BE] = 4, +    [PA_SAMPLE_S32LE] = 4, +    [PA_SAMPLE_S32BE] = 4, +    [PA_SAMPLE_S24LE] = 3, +    [PA_SAMPLE_S24BE] = 3, +    [PA_SAMPLE_S24_32LE] = 4, +    [PA_SAMPLE_S24_32BE] = 4 +}; +size_t pa_sample_size_of_format(pa_sample_format_t f) {      pa_assert(f >= 0);      pa_assert(f < PA_SAMPLE_MAX); -    return table[f]; +    return size_table[f];  }  size_t pa_sample_size(const pa_sample_spec *spec) { @@ -65,35 +64,35 @@ size_t pa_sample_size(const pa_sample_spec *spec) {      pa_assert(spec);      pa_return_val_if_fail(pa_sample_spec_valid(spec), 0); -    return pa_sample_size_of_format(spec->format); +    return size_table[spec->format];  }  size_t pa_frame_size(const pa_sample_spec *spec) {      pa_assert(spec);      pa_return_val_if_fail(pa_sample_spec_valid(spec), 0); -    return pa_sample_size(spec) * spec->channels; +    return size_table[spec->format] * spec->channels;  }  size_t pa_bytes_per_second(const pa_sample_spec *spec) {      pa_assert(spec);      pa_return_val_if_fail(pa_sample_spec_valid(spec), 0); -    return spec->rate*pa_frame_size(spec); +    return spec->rate * size_table[spec->format] * spec->channels;  }  pa_usec_t pa_bytes_to_usec(uint64_t length, const pa_sample_spec *spec) {      pa_assert(spec);      pa_return_val_if_fail(pa_sample_spec_valid(spec), 0); -    return (((pa_usec_t) (length / pa_frame_size(spec)) * PA_USEC_PER_SEC) / spec->rate); +    return (((pa_usec_t) (length / (size_table[spec->format] * spec->channels)) * PA_USEC_PER_SEC) / spec->rate);  }  size_t pa_usec_to_bytes(pa_usec_t t, const pa_sample_spec *spec) {      pa_assert(spec);      pa_return_val_if_fail(pa_sample_spec_valid(spec), 0); -    return (size_t) (((t * spec->rate) / PA_USEC_PER_SEC)) * pa_frame_size(spec); +    return (size_t) (((t * spec->rate) / PA_USEC_PER_SEC)) * (size_table[spec->format] * spec->channels);  }  pa_sample_spec* pa_sample_spec_init(pa_sample_spec *spec) { @@ -109,12 +108,12 @@ pa_sample_spec* pa_sample_spec_init(pa_sample_spec *spec) {  int pa_sample_spec_valid(const pa_sample_spec *spec) {      pa_assert(spec); -    if (spec->rate <= 0 || +    if (PA_UNLIKELY (spec->rate <= 0 ||          spec->rate > PA_RATE_MAX ||          spec->channels <= 0 ||          spec->channels > PA_CHANNELS_MAX ||          spec->format >= PA_SAMPLE_MAX || -        spec->format < 0) +        spec->format < 0))          return 0;      return 1; diff --git a/src/pulsecore/cpu-arm.c b/src/pulsecore/cpu-arm.c new file mode 100644 index 00000000..5a994b71 --- /dev/null +++ b/src/pulsecore/cpu-arm.c @@ -0,0 +1,140 @@ +/*** +  This file is part of PulseAudio. + +  Copyright 2004-2006 Lennart Poettering +  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk>  + +  PulseAudio is free software; you can redistribute it and/or modify +  it under the terms of the GNU Lesser General Public License as published +  by the Free Software Foundation; either version 2.1 of the License, +  or (at your option) any later version. + +  PulseAudio is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU Lesser General Public License +  along with PulseAudio; if not, write to the Free Software +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +  USA. +***/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdint.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include <pulse/xmalloc.h> +#include <pulsecore/log.h> + +#include "cpu-arm.h" + +#if defined (__arm__) && defined (__linux__) + +#define MAX_BUFFER  4096 +static char * +get_cpuinfo_line (char *cpuinfo, const char *tag) { +    char *line, *end, *colon; + +    if (!(line = strstr (cpuinfo, tag))) +        return NULL; + +    if (!(end = strchr (line, '\n'))) +        return NULL; + +    if (!(colon = strchr (line, ':'))) +        return NULL; + +    if (++colon >= end) +        return NULL; + +    return pa_xstrndup (colon, end - colon); +} + +static char *get_cpuinfo(void) { +    char *cpuinfo; +    int n, fd; + +    if (!(cpuinfo = malloc(MAX_BUFFER))) +         return NULL; + +    if ((fd = open("/proc/cpuinfo", O_RDONLY)) < 0) { +        free (cpuinfo); +        return NULL; +    } + +    if ((n = read(fd, cpuinfo, MAX_BUFFER-1)) < 0) { +        free (cpuinfo); +        close (fd); +        return NULL; +    } +    cpuinfo[n] = 0; +    close (fd); + +    return cpuinfo; +} +#endif /* defined (__arm__) && defined (__linux__) */ + +void pa_cpu_init_arm (void) { +#if defined (__arm__) +#if defined (__linux__) +    char *cpuinfo, *line; +    int arch; +    pa_cpu_arm_flag_t flags = 0; + +    /* We need to read the CPU flags from /proc/cpuinfo because there is no user +     * space support to get the CPU features. This only works on linux AFAIK. */ +    if (!(cpuinfo = get_cpuinfo ())) { +        pa_log ("Can't read cpuinfo"); +        return; +    } + +    /* get the CPU architecture */ +    if ((line = get_cpuinfo_line (cpuinfo, "CPU architecture"))) { +        arch = strtoul (line, NULL, 0); +        if (arch >= 6) +            flags |= PA_CPU_ARM_V6; +        if (arch >= 7) +            flags |= PA_CPU_ARM_V7; + +        free (line); +    } +    /* get the CPU features */ +    if ((line = get_cpuinfo_line (cpuinfo, "Features"))) { +        char *state = NULL, *current; + +        while ((current = pa_split_spaces (line, &state))) { +            if (!strcmp (current, "vfp")) +                flags |= PA_CPU_ARM_VFP; +            else if (!strcmp (current, "edsp")) +                flags |= PA_CPU_ARM_EDSP; +            else if (!strcmp (current, "neon")) +                flags |= PA_CPU_ARM_NEON; +            else if (!strcmp (current, "vfpv3")) +                flags |= PA_CPU_ARM_VFPV3; + +            free (current); +        } +    } +    free (cpuinfo); + +    pa_log_info ("CPU flags: %s%s%s%s%s%s", +          (flags & PA_CPU_ARM_V6) ? "V6 " : "", +          (flags & PA_CPU_ARM_V7) ? "V7 " : "", +          (flags & PA_CPU_ARM_VFP) ? "VFP " : "", +          (flags & PA_CPU_ARM_EDSP) ? "EDSP " : "", +          (flags & PA_CPU_ARM_NEON) ? "NEON " : "", +          (flags & PA_CPU_ARM_VFPV3) ? "VFPV3 " : ""); +#else /* defined (__linux__) */ +    pa_log ("ARM cpu features not yet supported on this OS"); +#endif /* defined (__linux__) */ + +    if (flags & PA_CPU_ARM_V6) +        pa_volume_func_init_arm (flags); +#endif /* defined (__arm__) */ +} diff --git a/src/pulsecore/cpu-arm.h b/src/pulsecore/cpu-arm.h new file mode 100644 index 00000000..3ccd0708 --- /dev/null +++ b/src/pulsecore/cpu-arm.h @@ -0,0 +1,42 @@ +#ifndef foocpuarmhfoo +#define foocpuarmhfoo + +/*** +  This file is part of PulseAudio. + +  Copyright 2004-2006 Lennart Poettering +  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk>  + +  PulseAudio is free software; you can redistribute it and/or modify +  it under the terms of the GNU Lesser General Public License as published +  by the Free Software Foundation; either version 2.1 of the License, +  or (at your option) any later version. + +  PulseAudio is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU Lesser General Public License +  along with PulseAudio; if not, write to the Free Software +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +  USA. +***/ + +#include <stdint.h> + +typedef enum pa_cpu_arm_flag { +    PA_CPU_ARM_V6       = (1 << 0), +    PA_CPU_ARM_V7       = (1 << 1), +    PA_CPU_ARM_VFP      = (1 << 2), +    PA_CPU_ARM_EDSP     = (1 << 3), +    PA_CPU_ARM_NEON     = (1 << 4), +    PA_CPU_ARM_VFPV3    = (1 << 5) +} pa_cpu_arm_flag_t; + +void pa_cpu_init_arm (void); + +/* some optimized functions */ +void pa_volume_func_init_arm(pa_cpu_arm_flag_t flags); + +#endif /* foocpuarmhfoo */ diff --git a/src/pulsecore/cpu-x86.c b/src/pulsecore/cpu-x86.c new file mode 100644 index 00000000..bc093ec0 --- /dev/null +++ b/src/pulsecore/cpu-x86.c @@ -0,0 +1,122 @@ +/*** +  This file is part of PulseAudio. + +  Copyright 2004-2006 Lennart Poettering +  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk> + +  PulseAudio is free software; you can redistribute it and/or modify +  it under the terms of the GNU Lesser General Public License as published +  by the Free Software Foundation; either version 2.1 of the License, +  or (at your option) any later version. + +  PulseAudio is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU Lesser General Public License +  along with PulseAudio; if not, write to the Free Software +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +  USA. +***/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdint.h> + +#include <pulsecore/log.h> + +#include "cpu-x86.h" + +#if defined (__i386__) || defined (__amd64__) +static void +get_cpuid (uint32_t op, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) +{ +    __asm__ __volatile__ ( +        "  push %%"PA_REG_b"   \n\t" +        "  cpuid               \n\t" +        "  mov %%ebx, %%esi    \n\t" +        "  pop %%"PA_REG_b"    \n\t" + +        : "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d) +        : "0" (op) +    ); +} +#endif + +void pa_cpu_init_x86 (void) { +#if defined (__i386__) || defined (__amd64__) +    uint32_t eax, ebx, ecx, edx; +    uint32_t level; +    pa_cpu_x86_flag_t flags = 0; + +    /* get standard level */ +    get_cpuid (0x00000000, &level, &ebx, &ecx, &edx); +    if (level >= 1) { +        get_cpuid (0x00000001, &eax, &ebx, &ecx, &edx); + +        if (edx & (1<<23)) +          flags |= PA_CPU_X86_MMX; + +        if (edx & (1<<25)) +          flags |= PA_CPU_X86_SSE; + +        if (edx & (1<<26)) +          flags |= PA_CPU_X86_SSE2; + +        if (ecx & (1<<0)) +          flags |= PA_CPU_X86_SSE3; + +        if (ecx & (1<<9)) +          flags |= PA_CPU_X86_SSSE3; + +        if (ecx & (1<<19)) +          flags |= PA_CPU_X86_SSE4_1; + +        if (ecx & (1<<20)) +          flags |= PA_CPU_X86_SSE4_2; +    } + +    /* get extended level */ +    get_cpuid (0x80000000, &level, &ebx, &ecx, &edx); +    if (level >= 0x80000001) { +        get_cpuid (0x80000001, &eax, &ebx, &ecx, &edx); + +        if (edx & (1<<22)) +          flags |= PA_CPU_X86_MMXEXT; + +        if (edx & (1<<23)) +          flags |= PA_CPU_X86_MMX; + +        if (edx & (1<<30)) +          flags |= PA_CPU_X86_3DNOWEXT; + +        if (edx & (1<<31)) +          flags |= PA_CPU_X86_3DNOW; +    } + +    pa_log_info ("CPU flags: %s%s%s%s%s%s%s%s%s%s", +    (flags & PA_CPU_X86_MMX) ? "MMX " : "", +    (flags & PA_CPU_X86_SSE) ? "SSE " : "", +    (flags & PA_CPU_X86_SSE2) ? "SSE2 " : "", +    (flags & PA_CPU_X86_SSE3) ? "SSE3 " : "", +    (flags & PA_CPU_X86_SSSE3) ? "SSSE3 " : "", +    (flags & PA_CPU_X86_SSE4_1) ? "SSE4_1 " : "", +    (flags & PA_CPU_X86_SSE4_2) ? "SSE4_2 " : "", +    (flags & PA_CPU_X86_MMXEXT) ? "MMXEXT " : "", +    (flags & PA_CPU_X86_3DNOW) ? "3DNOW " : "", +    (flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : ""); + +    /* activate various optimisations */ +    if (flags & PA_CPU_X86_MMX) { +        pa_volume_func_init_mmx (flags); +        pa_remap_func_init_mmx (flags); +    } + +    if (flags & PA_CPU_X86_SSE) +        pa_volume_func_init_sse (flags); + +#endif /* defined (__i386__) || defined (__amd64__) */ +} diff --git a/src/pulsecore/cpu-x86.h b/src/pulsecore/cpu-x86.h new file mode 100644 index 00000000..b11ef6ea --- /dev/null +++ b/src/pulsecore/cpu-x86.h @@ -0,0 +1,68 @@ +#ifndef foocpux86hfoo +#define foocpux86hfoo + +/*** +  This file is part of PulseAudio. + +  Copyright 2004-2006 Lennart Poettering +  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk>  + +  PulseAudio is free software; you can redistribute it and/or modify +  it under the terms of the GNU Lesser General Public License as published +  by the Free Software Foundation; either version 2.1 of the License, +  or (at your option) any later version. + +  PulseAudio is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU Lesser General Public License +  along with PulseAudio; if not, write to the Free Software +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +  USA. +***/ + +#include <stdint.h> + +typedef enum pa_cpu_x86_flag { +    PA_CPU_X86_MMX       = (1 << 0), +    PA_CPU_X86_MMXEXT    = (1 << 1), +    PA_CPU_X86_SSE       = (1 << 2), +    PA_CPU_X86_SSE2      = (1 << 3), +    PA_CPU_X86_SSE3      = (1 << 4), +    PA_CPU_X86_SSSE3     = (1 << 5), +    PA_CPU_X86_SSE4_1    = (1 << 6), +    PA_CPU_X86_SSE4_2    = (1 << 7), +    PA_CPU_X86_3DNOW     = (1 << 8), +    PA_CPU_X86_3DNOWEXT  = (1 << 9) +} pa_cpu_x86_flag_t; + +void pa_cpu_init_x86 (void); + + +#if defined (__i386__) +typedef int32_t pa_reg_x86; +#define PA_REG_a "eax" +#define PA_REG_b "ebx" +#define PA_REG_c "ecx" +#define PA_REG_d "edx" +#define PA_REG_D "edi" +#define PA_REG_S "esi" +#elif defined (__amd64__) +typedef int64_t pa_reg_x86; +#define PA_REG_a "rax" +#define PA_REG_b "rbx" +#define PA_REG_c "rcx" +#define PA_REG_d "rdx" +#define PA_REG_D "rdi" +#define PA_REG_S "rsi" +#endif + +/* some optimized functions */ +void pa_volume_func_init_mmx(pa_cpu_x86_flag_t flags); +void pa_volume_func_init_sse(pa_cpu_x86_flag_t flags); + +void pa_remap_func_init_mmx(pa_cpu_x86_flag_t flags); + +#endif /* foocpux86hfoo */ diff --git a/src/pulsecore/remap.c b/src/pulsecore/remap.c new file mode 100644 index 00000000..a0fc85b9 --- /dev/null +++ b/src/pulsecore/remap.c @@ -0,0 +1,204 @@ +/*** +  This file is part of PulseAudio. + +  Copyright 2004-2006 Lennart Poettering +  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk.com> + +  PulseAudio is free software; you can redistribute it and/or modify +  it under the terms of the GNU Lesser General Public License as published +  by the Free Software Foundation; either version 2.1 of the License, +  or (at your option) any later version. + +  PulseAudio is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU Lesser General Public License +  along with PulseAudio; if not, write to the Free Software +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +  USA. +***/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <string.h> + +#include <pulse/sample.h> +#include <pulsecore/log.h> +#include <pulsecore/macro.h> + +#include "remap.h" + +static void remap_mono_to_stereo_c (pa_remap_t *m, void *dst, const void *src, unsigned n) { +    unsigned i; + +    switch (*m->format) { +        case PA_SAMPLE_FLOAT32NE: +        { +            float *d, *s; + +            d = (float *) dst; +            s = (float *) src; + +            for (i = n >> 2; i; i--) { +                d[0] = d[1] = s[0]; +                d[2] = d[3] = s[1]; +                d[4] = d[5] = s[2]; +                d[6] = d[7] = s[3]; +                s += 4; +                d += 8; +            } +            for (i = n & 3; i; i--) { +                d[0] = d[1] = s[0]; +                s++; +                d += 2; +            } +            break; +        } +        case PA_SAMPLE_S16NE: +        { +            int16_t *d, *s; + +            d = (int16_t *) dst; +            s = (int16_t *) src; + +            for (i = n >> 2; i; i--) { +                d[0] = d[1] = s[0]; +                d[2] = d[3] = s[1]; +                d[4] = d[5] = s[2]; +                d[6] = d[7] = s[3]; +                s += 4; +                d += 8; +            } +            for (i = n & 3; i; i--) { +                d[0] = d[1] = s[0]; +                s++; +                d += 2; +            } +            break; +        } +        default: +            pa_assert_not_reached(); +    } +} + +static void remap_channels_matrix_c (pa_remap_t *m, void *dst, const void *src, unsigned n) { +    unsigned oc, ic, i; +    unsigned n_ic, n_oc; + +    n_ic = m->i_ss->channels; +    n_oc = m->o_ss->channels; + +    switch (*m->format) { +        case PA_SAMPLE_FLOAT32NE: +        { +            float *d, *s; + +            memset(dst, 0, n * sizeof (float) * n_oc); + +            for (oc = 0; oc < n_oc; oc++) { + +                for (ic = 0; ic < n_ic; ic++) { +                    float vol; + +                    vol = m->map_table_f[oc][ic]; + +                    if (vol <= 0.0) +                        continue; + +                    d = (float *)dst + oc; +                    s = (float *)src + ic; + +                    if (vol >= 1.0) { +                        for (i = n; i > 0; i--, s += n_ic, d += n_oc) +                            *d += *s; +                    } else { +                        for (i = n; i > 0; i--, s += n_ic, d += n_oc) +                            *d += *s * vol; +                    } +                } +            } + +            break; +        } +        case PA_SAMPLE_S16NE: +        { +            int16_t *d, *s; + +            memset(dst, 0, n * sizeof (int16_t) * n_oc); + +            for (oc = 0; oc < n_oc; oc++) { + +                for (ic = 0; ic < n_ic; ic++) { +                    int32_t vol; + +                    vol = m->map_table_i[oc][ic]; + +                    if (vol <= 0) +                        continue; + +                    d = (int16_t *)dst + oc; +                    s = (int16_t *)src + ic; + +                    if (vol >= 0x10000) { +                        for (i = n; i > 0; i--, s += n_ic, d += n_oc) +                            *d += *s; +                    } else { +                        for (i = n; i > 0; i--, s += n_ic, d += n_oc) +                            *d += (int16_t) (((int32_t)*s * vol) >> 16); +                    } +                } +            } +            break; +        } +        default: +            pa_assert_not_reached(); +    } +} + +/* set the function that will execute the remapping based on the matrices */ +static void init_remap_c (pa_remap_t *m) { +    unsigned n_oc, n_ic; + +    n_oc = m->o_ss->channels; +    n_ic = m->i_ss->channels; + +    /* find some common channel remappings, fall back to full matrix operation. */ +    if (n_ic == 1 && n_oc == 2 && +            m->map_table_f[0][0] >= 1.0 && m->map_table_f[1][0] >= 1.0) { +        m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo_c; +        pa_log_info("Using mono to stereo remapping"); +    } else { +        m->do_remap = (pa_do_remap_func_t) remap_channels_matrix_c; +        pa_log_info("Using generic matrix remapping"); +    } +} + + +/* default C implementation */ +static pa_init_remap_func_t remap_func = init_remap_c; + +void pa_init_remap (pa_remap_t *m) { +    pa_assert (remap_func); + +    m->do_remap = NULL; + +    /* call the installed remap init function */ +    remap_func (m); + +    if (m->do_remap == NULL) { +        /* nothing was installed, fallback to C version */ +        init_remap_c (m); +    } +} + +pa_init_remap_func_t pa_get_init_remap_func(void) { +    return remap_func; +} + +void pa_set_init_remap_func(pa_init_remap_func_t func) { +    remap_func = func; +} diff --git a/src/pulsecore/remap.h b/src/pulsecore/remap.h new file mode 100644 index 00000000..32a67cdd --- /dev/null +++ b/src/pulsecore/remap.h @@ -0,0 +1,48 @@ +#ifndef fooremapfoo +#define fooremapfoo + +/*** +  This file is part of PulseAudio. + +  Copyright 2004-2006 Lennart Poettering +  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk.com> + +  PulseAudio is free software; you can redistribute it and/or modify +  it under the terms of the GNU Lesser General Public License as published +  by the Free Software Foundation; either version 2.1 of the License, +  or (at your option) any later version. + +  PulseAudio is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU Lesser General Public License +  along with PulseAudio; if not, write to the Free Software +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +  USA. +***/ + +#include <pulse/sample.h> + +typedef struct pa_remap pa_remap_t; + +typedef void (*pa_do_remap_func_t) (pa_remap_t *m, void *d, const void *s, unsigned n); + +struct pa_remap { +    pa_sample_format_t *format; +    pa_sample_spec *i_ss, *o_ss; +    float map_table_f[PA_CHANNELS_MAX][PA_CHANNELS_MAX]; +    int32_t map_table_i[PA_CHANNELS_MAX][PA_CHANNELS_MAX]; +    pa_do_remap_func_t do_remap; +}; + +void pa_init_remap (pa_remap_t *m); + +/* custom installation of init functions */ +typedef void (*pa_init_remap_func_t) (pa_remap_t *m); + +pa_init_remap_func_t pa_get_init_remap_func(void); +void pa_set_init_remap_func(pa_init_remap_func_t func); + +#endif /* fooremapfoo */ diff --git a/src/pulsecore/remap_mmx.c b/src/pulsecore/remap_mmx.c new file mode 100644 index 00000000..bfcae6c5 --- /dev/null +++ b/src/pulsecore/remap_mmx.c @@ -0,0 +1,148 @@ +/*** +  This file is part of PulseAudio. + +  Copyright 2004-2006 Lennart Poettering +  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk.com> + +  PulseAudio is free software; you can redistribute it and/or modify +  it under the terms of the GNU Lesser General Public License as published +  by the Free Software Foundation; either version 2.1 of the License, +  or (at your option) any later version. + +  PulseAudio is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU Lesser General Public License +  along with PulseAudio; if not, write to the Free Software +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +  USA. +***/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <string.h> + +#include <pulse/sample.h> +#include <pulsecore/log.h> +#include <pulsecore/macro.h> + +#include "cpu-x86.h" +#include "remap.h" + +#define LOAD_SAMPLES                                   \ +                " movq (%1), %%mm0              \n\t"  \ +                " movq 8(%1), %%mm2             \n\t"  \ +                " movq 16(%1), %%mm4            \n\t"  \ +                " movq 24(%1), %%mm6            \n\t"  \ +                " movq %%mm0, %%mm1             \n\t"  \ +                " movq %%mm2, %%mm3             \n\t"  \ +                " movq %%mm4, %%mm5             \n\t"  \ +                " movq %%mm6, %%mm7             \n\t" + +#define UNPACK_SAMPLES(s)                              \ +                " punpckl"#s" %%mm0, %%mm0      \n\t"  \ +                " punpckh"#s" %%mm1, %%mm1      \n\t"  \ +                " punpckl"#s" %%mm2, %%mm2      \n\t"  \ +                " punpckh"#s" %%mm3, %%mm3      \n\t"  \ +                " punpckl"#s" %%mm4, %%mm4      \n\t"  \ +                " punpckh"#s" %%mm5, %%mm5      \n\t"  \ +                " punpckl"#s" %%mm6, %%mm6      \n\t"  \ +                " punpckh"#s" %%mm7, %%mm7      \n\t"  \ + +#define STORE_SAMPLES                                  \ +                " movq %%mm0, (%0)              \n\t"  \ +                " movq %%mm1, 8(%0)             \n\t"  \ +                " movq %%mm2, 16(%0)            \n\t"  \ +                " movq %%mm3, 24(%0)            \n\t"  \ +                " movq %%mm4, 32(%0)            \n\t"  \ +                " movq %%mm5, 40(%0)            \n\t"  \ +                " movq %%mm6, 48(%0)            \n\t"  \ +                " movq %%mm7, 56(%0)            \n\t"  \ +                " add $32, %1                   \n\t"  \ +                " add $64, %0                   \n\t" + +#define HANDLE_SINGLE(s)                               \ +                " movd (%1), %%mm0              \n\t"  \ +                " movq %%mm0, %%mm1             \n\t"  \ +                " punpckl"#s" %%mm0, %%mm0      \n\t"  \ +                " movq %%mm0, (%0)              \n\t"  \ +                " add $4, %1                    \n\t"  \ +                " add $8, %0                    \n\t" + +#define MONO_TO_STEREO(s)                               \ +                " mov %3, %2                    \n\t"   \ +                " sar $3, %2                    \n\t"   \ +                " cmp $0, %2                    \n\t"   \ +                " je 2f                         \n\t"   \ +                "1:                             \n\t"   \ +                LOAD_SAMPLES                            \ +                UNPACK_SAMPLES(s)                       \ +                STORE_SAMPLES                           \ +                " dec %2                        \n\t"   \ +                " jne 1b                        \n\t"   \ +                "2:                             \n\t"   \ +                " mov %3, %2                    \n\t"   \ +                " and $7, %2                    \n\t"   \ +                " je 4f                         \n\t"   \ +                "3:                             \n\t"   \ +                HANDLE_SINGLE(s)                        \ +                " dec %2                        \n\t"   \ +                " jne 3b                        \n\t"   \ +                "4:                             \n\t"   \ +                " emms                          \n\t" + +static void remap_mono_to_stereo_mmx (pa_remap_t *m, void *dst, const void *src, unsigned n) { +    pa_reg_x86 temp; + +    switch (*m->format) { +        case PA_SAMPLE_FLOAT32NE: +        { +            __asm__ __volatile__ ( +                MONO_TO_STEREO(dq) /* do doubles to quads */ +                : "+r" (dst), "+r" (src), "=&r" (temp) +                : "r" ((pa_reg_x86)n) +                : "cc" +            ); +            break; +        } +        case PA_SAMPLE_S16NE: +        { +            __asm__ __volatile__ ( +                MONO_TO_STEREO(wd) /* do words to doubles */ +                : "+r" (dst), "+r" (src), "=&r" (temp) +                : "r" ((pa_reg_x86)n) +                : "cc" +            ); +            break; +        } +        default: +            pa_assert_not_reached(); +    } +} + +/* set the function that will execute the remapping based on the matrices */ +static void init_remap_mmx (pa_remap_t *m) { +    unsigned n_oc, n_ic; + +    n_oc = m->o_ss->channels; +    n_ic = m->i_ss->channels; + +    /* find some common channel remappings, fall back to full matrix operation. */ +    if (n_ic == 1 && n_oc == 2 && +            m->map_table_f[0][0] >= 1.0 && m->map_table_f[1][0] >= 1.0) { +        m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo_mmx; +        pa_log_info("Using MMX mono to stereo remapping"); +    } +} + +void pa_remap_func_init_mmx (pa_cpu_x86_flag_t flags) { +#if defined (__i386__) || defined (__amd64__) +    pa_log_info("Initialising MMX optimized remappers."); + +    pa_set_init_remap_func ((pa_init_remap_func_t) init_remap_mmx); +#endif /* defined (__i386__) || defined (__amd64__) */ +} diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c index 59e0a0c1..f1bfa156 100644 --- a/src/pulsecore/resampler.c +++ b/src/pulsecore/resampler.c @@ -31,9 +31,6 @@  #include <speex/speex_resampler.h> -#include <liboil/liboilfuncs.h> -#include <liboil/liboil.h> -  #include <pulse/xmalloc.h>  #include <pulsecore/sconv.h>  #include <pulsecore/log.h> @@ -43,6 +40,7 @@  #include "ffmpeg/avcodec.h"  #include "resampler.h" +#include "remap.h"  /* Number of samples of extra space we allow the resamplers to return */  #define EXTRA_FRAMES 128 @@ -64,7 +62,7 @@ struct pa_resampler {      pa_convert_func_t to_work_format_func;      pa_convert_func_t from_work_format_func; -    float map_table[PA_CHANNELS_MAX][PA_CHANNELS_MAX]; +    pa_remap_t remap;      pa_bool_t map_required;      void (*impl_free)(pa_resampler *r); @@ -214,6 +212,11 @@ pa_resampler* pa_resampler_new(      r->i_ss = *a;      r->o_ss = *b; +    /* set up the remap structure */ +    r->remap.i_ss = &r->i_ss; +    r->remap.o_ss = &r->o_ss; +    r->remap.format = &r->work_format; +      if (am)          r->i_cm = *am;      else if (!pa_channel_map_init_auto(&r->i_cm, r->i_ss.channels, PA_CHANNEL_MAP_DEFAULT)) @@ -580,32 +583,41 @@ static int front_rear_side(pa_channel_position_t p) {  static void calc_map_table(pa_resampler *r) {      unsigned oc, ic; +    unsigned n_oc, n_ic;      pa_bool_t ic_connected[PA_CHANNELS_MAX];      pa_bool_t remix;      pa_strbuf *s;      char *t; +    pa_remap_t *m;      pa_assert(r);      if (!(r->map_required = (r->i_ss.channels != r->o_ss.channels || (!(r->flags & PA_RESAMPLER_NO_REMAP) && !pa_channel_map_equal(&r->i_cm, &r->o_cm)))))          return; -    memset(r->map_table, 0, sizeof(r->map_table)); +    m = &r->remap; + +    n_oc = r->o_ss.channels; +    n_ic = r->i_ss.channels; + +    memset(m->map_table_f, 0, sizeof(m->map_table_f)); +    memset(m->map_table_i, 0, sizeof(m->map_table_i)); +      memset(ic_connected, 0, sizeof(ic_connected));      remix = (r->flags & (PA_RESAMPLER_NO_REMAP|PA_RESAMPLER_NO_REMIX)) == 0; -    for (oc = 0; oc < r->o_ss.channels; oc++) { +    for (oc = 0; oc < n_oc; oc++) {          pa_bool_t oc_connected = FALSE;          pa_channel_position_t b = r->o_cm.map[oc]; -        for (ic = 0; ic < r->i_ss.channels; ic++) { +        for (ic = 0; ic < n_ic; ic++) {              pa_channel_position_t a = r->i_cm.map[ic];              if (r->flags & PA_RESAMPLER_NO_REMAP) {                  /* We shall not do any remapping. Hence, just check by index */                  if (ic == oc) -                    r->map_table[oc][ic] = 1.0; +                    m->map_table_f[oc][ic] = 1.0;                  continue;              } @@ -614,7 +626,7 @@ static void calc_map_table(pa_resampler *r) {                  /* We shall not do any remixing. Hence, just check by name */                  if (a == b) -                    r->map_table[oc][ic] = 1.0; +                    m->map_table_f[oc][ic] = 1.0;                  continue;              } @@ -689,7 +701,7 @@ static void calc_map_table(pa_resampler *r) {               */              if (a == b || a == PA_CHANNEL_POSITION_MONO || b == PA_CHANNEL_POSITION_MONO) { -                r->map_table[oc][ic] = 1.0; +                m->map_table_f[oc][ic] = 1.0;                  oc_connected = TRUE;                  ic_connected[ic] = TRUE; @@ -707,14 +719,14 @@ static void calc_map_table(pa_resampler *r) {                  /* We are not connected and on the left side, let's                   * average all left side input channels. */ -                for (ic = 0; ic < r->i_ss.channels; ic++) +                for (ic = 0; ic < n_ic; ic++)                      if (on_left(r->i_cm.map[ic]))                          n++;                  if (n > 0) -                    for (ic = 0; ic < r->i_ss.channels; ic++) +                    for (ic = 0; ic < n_ic; ic++)                          if (on_left(r->i_cm.map[ic])) { -                            r->map_table[oc][ic] = 1.0f / (float) n; +                            m->map_table_f[oc][ic] = 1.0f / (float) n;                              ic_connected[ic] = TRUE;                          } @@ -728,14 +740,14 @@ static void calc_map_table(pa_resampler *r) {                  /* We are not connected and on the right side, let's                   * average all right side input channels. */ -                for (ic = 0; ic < r->i_ss.channels; ic++) +                for (ic = 0; ic < n_ic; ic++)                      if (on_right(r->i_cm.map[ic]))                          n++;                  if (n > 0) -                    for (ic = 0; ic < r->i_ss.channels; ic++) +                    for (ic = 0; ic < n_ic; ic++)                          if (on_right(r->i_cm.map[ic])) { -                            r->map_table[oc][ic] = 1.0f / (float) n; +                            m->map_table_f[oc][ic] = 1.0f / (float) n;                              ic_connected[ic] = TRUE;                          } @@ -749,14 +761,14 @@ static void calc_map_table(pa_resampler *r) {                  /* We are not connected and at the center. Let's                   * average all center input channels. */ -                for (ic = 0; ic < r->i_ss.channels; ic++) +                for (ic = 0; ic < n_ic; ic++)                      if (on_center(r->i_cm.map[ic]))                          n++;                  if (n > 0) { -                    for (ic = 0; ic < r->i_ss.channels; ic++) +                    for (ic = 0; ic < n_ic; ic++)                          if (on_center(r->i_cm.map[ic])) { -                            r->map_table[oc][ic] = 1.0f / (float) n; +                            m->map_table_f[oc][ic] = 1.0f / (float) n;                              ic_connected[ic] = TRUE;                          }                  } else { @@ -766,14 +778,14 @@ static void calc_map_table(pa_resampler *r) {                      n = 0; -                    for (ic = 0; ic < r->i_ss.channels; ic++) +                    for (ic = 0; ic < n_ic; ic++)                          if (on_left(r->i_cm.map[ic]) || on_right(r->i_cm.map[ic]))                              n++;                      if (n > 0) -                        for (ic = 0; ic < r->i_ss.channels; ic++) +                        for (ic = 0; ic < n_ic; ic++)                              if (on_left(r->i_cm.map[ic]) || on_right(r->i_cm.map[ic])) { -                                r->map_table[oc][ic] = 1.0f / (float) n; +                                m->map_table_f[oc][ic] = 1.0f / (float) n;                                  ic_connected[ic] = TRUE;                              } @@ -787,12 +799,12 @@ static void calc_map_table(pa_resampler *r) {                  /* We are not connected and an LFE. Let's average all                   * channels for LFE. */ -                for (ic = 0; ic < r->i_ss.channels; ic++) { +                for (ic = 0; ic < n_ic; ic++) {                      if (!(r->flags & PA_RESAMPLER_NO_LFE)) -                        r->map_table[oc][ic] = 1.0f / (float) r->i_ss.channels; +                        m->map_table_f[oc][ic] = 1.0f / (float) n_ic;                      else -                        r->map_table[oc][ic] = 0; +                        m->map_table_f[oc][ic] = 0;                      /* Please note that a channel connected to LFE                       * doesn't really count as connected. */ @@ -808,7 +820,7 @@ static void calc_map_table(pa_resampler *r) {              ic_unconnected_center = 0,              ic_unconnected_lfe = 0; -        for (ic = 0; ic < r->i_ss.channels; ic++) { +        for (ic = 0; ic < n_ic; ic++) {              pa_channel_position_t a = r->i_cm.map[ic];              if (ic_connected[ic]) @@ -831,20 +843,20 @@ static void calc_map_table(pa_resampler *r) {               * the left side by .9 and add in our averaged unconnected               * channels multplied by .1 */ -            for (oc = 0; oc < r->o_ss.channels; oc++) { +            for (oc = 0; oc < n_oc; oc++) {                  if (!on_left(r->o_cm.map[oc]))                      continue; -                for (ic = 0; ic < r->i_ss.channels; ic++) { +                for (ic = 0; ic < n_ic; ic++) {                      if (ic_connected[ic]) { -                        r->map_table[oc][ic] *= .9f; +                        m->map_table_f[oc][ic] *= .9f;                          continue;                      }                      if (on_left(r->i_cm.map[ic])) -                        r->map_table[oc][ic] = .1f / (float) ic_unconnected_left; +                        m->map_table_f[oc][ic] = .1f / (float) ic_unconnected_left;                  }              }          } @@ -856,20 +868,20 @@ static void calc_map_table(pa_resampler *r) {               * the right side by .9 and add in our averaged unconnected               * channels multplied by .1 */ -            for (oc = 0; oc < r->o_ss.channels; oc++) { +            for (oc = 0; oc < n_oc; oc++) {                  if (!on_right(r->o_cm.map[oc]))                      continue; -                for (ic = 0; ic < r->i_ss.channels; ic++) { +                for (ic = 0; ic < n_ic; ic++) {                      if (ic_connected[ic]) { -                        r->map_table[oc][ic] *= .9f; +                        m->map_table_f[oc][ic] *= .9f;                          continue;                      }                      if (on_right(r->i_cm.map[ic])) -                        r->map_table[oc][ic] = .1f / (float) ic_unconnected_right; +                        m->map_table_f[oc][ic] = .1f / (float) ic_unconnected_right;                  }              }          } @@ -882,20 +894,20 @@ static void calc_map_table(pa_resampler *r) {               * the center side by .9 and add in our averaged unconnected               * channels multplied by .1 */ -            for (oc = 0; oc < r->o_ss.channels; oc++) { +            for (oc = 0; oc < n_oc; oc++) {                  if (!on_center(r->o_cm.map[oc]))                      continue; -                for (ic = 0; ic < r->i_ss.channels; ic++)  { +                for (ic = 0; ic < n_ic; ic++)  {                      if (ic_connected[ic]) { -                        r->map_table[oc][ic] *= .9f; +                        m->map_table_f[oc][ic] *= .9f;                          continue;                      }                      if (on_center(r->i_cm.map[ic])) { -                        r->map_table[oc][ic] = .1f / (float) ic_unconnected_center; +                        m->map_table_f[oc][ic] = .1f / (float) ic_unconnected_center;                          mixed_in = TRUE;                      }                  } @@ -913,7 +925,7 @@ static void calc_map_table(pa_resampler *r) {                     it into left and right. Using .375 and 0.75 as                     factors. */ -                for (ic = 0; ic < r->i_ss.channels; ic++) { +                for (ic = 0; ic < n_ic; ic++) {                      if (ic_connected[ic])                          continue; @@ -921,7 +933,7 @@ static void calc_map_table(pa_resampler *r) {                      if (!on_center(r->i_cm.map[ic]))                          continue; -                    for (oc = 0; oc < r->o_ss.channels; oc++) { +                    for (oc = 0; oc < n_oc; oc++) {                          if (!on_left(r->o_cm.map[oc]) && !on_right(r->o_cm.map[oc]))                              continue; @@ -932,7 +944,7 @@ static void calc_map_table(pa_resampler *r) {                          }                      } -                    for (oc = 0; oc < r->o_ss.channels; oc++) { +                    for (oc = 0; oc < n_oc; oc++) {                          if (!on_left(r->o_cm.map[oc]) && !on_right(r->o_cm.map[oc]))                              continue; @@ -942,7 +954,7 @@ static void calc_map_table(pa_resampler *r) {                      }                  } -                for (oc = 0; oc < r->o_ss.channels; oc++) { +                for (oc = 0; oc < n_oc; oc++) {                      if (!on_left(r->o_cm.map[oc]) && !on_right(r->o_cm.map[oc]))                          continue; @@ -950,10 +962,10 @@ static void calc_map_table(pa_resampler *r) {                      if (ncenter[oc] <= 0)                          continue; -                    for (ic = 0; ic < r->i_ss.channels; ic++)  { +                    for (ic = 0; ic < n_ic; ic++)  {                          if (ic_connected[ic]) { -                            r->map_table[oc][ic] *= .75f; +                            m->map_table_f[oc][ic] *= .75f;                              continue;                          } @@ -961,7 +973,7 @@ static void calc_map_table(pa_resampler *r) {                              continue;                          if (!found_frs[ic] || front_rear_side(r->i_cm.map[ic]) == front_rear_side(r->o_cm.map[oc])) -                            r->map_table[oc][ic] = .375f / (float) ncenter[oc]; +                            m->map_table_f[oc][ic] = .375f / (float) ncenter[oc];                      }                  }              } @@ -972,40 +984,46 @@ static void calc_map_table(pa_resampler *r) {              /* OK, so there is an unconnected LFE channel. Let's mix               * it into all channels, with factor 0.375 */ -            for (ic = 0; ic < r->i_ss.channels; ic++)  { +            for (ic = 0; ic < n_ic; ic++)  {                  if (!on_lfe(r->i_cm.map[ic]))                      continue; -                for (oc = 0; oc < r->o_ss.channels; oc++) -                    r->map_table[oc][ic] = 0.375f / (float) ic_unconnected_lfe; +                for (oc = 0; oc < n_oc; oc++) +                    m->map_table_f[oc][ic] = 0.375f / (float) ic_unconnected_lfe;              }          }      } - +    /* make an 16:16 int version of the matrix */ +    for (oc = 0; oc < n_oc; oc++) +        for (ic = 0; ic < n_ic; ic++) +            m->map_table_i[oc][ic] = (int32_t) (m->map_table_f[oc][ic] * 0x10000);      s = pa_strbuf_new();      pa_strbuf_printf(s, "     "); -    for (ic = 0; ic < r->i_ss.channels; ic++) +    for (ic = 0; ic < n_ic; ic++)          pa_strbuf_printf(s, "  I%02u ", ic);      pa_strbuf_puts(s, "\n    +"); -    for (ic = 0; ic < r->i_ss.channels; ic++) +    for (ic = 0; ic < n_ic; ic++)          pa_strbuf_printf(s, "------");      pa_strbuf_puts(s, "\n"); -    for (oc = 0; oc < r->o_ss.channels; oc++) { +    for (oc = 0; oc < n_oc; oc++) {          pa_strbuf_printf(s, "O%02u |", oc); -        for (ic = 0; ic < r->i_ss.channels; ic++) -            pa_strbuf_printf(s, " %1.3f", r->map_table[oc][ic]); +        for (ic = 0; ic < n_ic; ic++) +            pa_strbuf_printf(s, " %1.3f", m->map_table_f[oc][ic]);          pa_strbuf_puts(s, "\n");      }      pa_log_debug("Channel matrix:\n%s", t = pa_strbuf_tostring_free(s));      pa_xfree(t); + +    /* initialize the remapping function */ +    pa_init_remap (m);  }  static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input) { @@ -1045,41 +1063,10 @@ static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input)      return &r->buf1;  } -static void vectoradd_s16_with_fraction( -        int16_t *d, int dstr, -        const int16_t *s1, int sstr1, -        const int16_t *s2, int sstr2, -        int n, -        float s3, float s4) { - -    int32_t i3, i4; - -    i3 = (int32_t) (s3 * 0x10000); -    i4 = (int32_t) (s4 * 0x10000); - -    for (; n > 0; n--) { -        int32_t a, b; - -        a = *s1; -        b = *s2; - -        a = (a * i3) / 0x10000; -        b = (b * i4) / 0x10000; - -        *d = (int16_t) (a + b); - -        s1 = (const int16_t*) ((const uint8_t*) s1 + sstr1); -        s2 = (const int16_t*) ((const uint8_t*) s2 + sstr2); -        d = (int16_t*) ((uint8_t*) d + dstr); - -    } -} -  static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) {      unsigned in_n_samples, out_n_samples, n_frames; -    int i_skip, o_skip; -    unsigned oc;      void *src, *dst; +    pa_remap_t *remap;      pa_assert(r);      pa_assert(input); @@ -1108,76 +1095,14 @@ static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) {      src = ((uint8_t*) pa_memblock_acquire(input->memblock) + input->index);      dst = pa_memblock_acquire(r->buf2.memblock); -    memset(dst, 0, r->buf2.length); - -    o_skip = (int) (r->w_sz * r->o_ss.channels); -    i_skip = (int) (r->w_sz * r->i_ss.channels); - -    switch (r->work_format) { -        case PA_SAMPLE_FLOAT32NE: - -            for (oc = 0; oc < r->o_ss.channels; oc++) { -                unsigned ic; -                static const float one = 1.0; - -                for (ic = 0; ic < r->i_ss.channels; ic++) { +    remap = &r->remap; -                    if (r->map_table[oc][ic] <= 0.0) -                        continue; - -                    oil_vectoradd_f32( -                            (float*) dst + oc, o_skip, -                            (float*) dst + oc, o_skip, -                            (float*) src + ic, i_skip, -                            (int) n_frames, -                            &one, &r->map_table[oc][ic]); -                } -            } - -            break; - -        case PA_SAMPLE_S16NE: - -            for (oc = 0; oc < r->o_ss.channels; oc++) { -                unsigned ic; - -                for (ic = 0; ic < r->i_ss.channels; ic++) { - -                    if (r->map_table[oc][ic] <= 0.0) -                        continue; - -                    if (r->map_table[oc][ic] >= 1.0) { -                        static const int16_t one = 1; - -                        oil_vectoradd_s16( -                                (int16_t*) dst + oc, o_skip, -                                (int16_t*) dst + oc, o_skip, -                                (int16_t*) src + ic, i_skip, -                                (int) n_frames, -                                &one, &one); - -                    } else - -                        vectoradd_s16_with_fraction( -                                (int16_t*) dst + oc, o_skip, -                                (int16_t*) dst + oc, o_skip, -                                (int16_t*) src + ic, i_skip, -                                (int) n_frames, -                                1.0f, r->map_table[oc][ic]); -                } -            } - -            break; - -        default: -            pa_assert_not_reached(); -    } +    pa_assert (remap->do_remap); +    remap->do_remap (remap, dst, src, n_frames);      pa_memblock_release(input->memblock);      pa_memblock_release(r->buf2.memblock); -    r->buf2.length = out_n_samples * r->w_sz; -      return &r->buf2;  } @@ -1469,7 +1394,7 @@ static void trivial_resample(pa_resampler *r, const pa_memchunk *input, unsigned          pa_assert(o_index * fz < pa_memblock_get_length(output->memblock)); -        oil_memcpy((uint8_t*) dst + fz * o_index, +        memcpy((uint8_t*) dst + fz * o_index,                     (uint8_t*) src + fz * j, (int) fz);      } diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c index 5b8ccf59..6e97e5a9 100644 --- a/src/pulsecore/sample-util.c +++ b/src/pulsecore/sample-util.c @@ -30,9 +30,6 @@  #include <stdio.h>  #include <errno.h> -#include <liboil/liboilfuncs.h> -#include <liboil/liboil.h> -  #include <pulse/timeval.h>  #include <pulsecore/log.h> @@ -106,24 +103,36 @@ void* pa_silence_memory(void *p, size_t length, const pa_sample_spec *spec) {      return p;  } +#define VOLUME_PADDING 32 +  static void calc_linear_integer_volume(int32_t linear[], const pa_cvolume *volume) { -    unsigned channel; +    unsigned channel, nchannels, padding;      pa_assert(linear);      pa_assert(volume); -    for (channel = 0; channel < volume->channels; channel++) +    nchannels = volume->channels; + +    for (channel = 0; channel < nchannels; channel++)          linear[channel] = (int32_t) lrint(pa_sw_volume_to_linear(volume->values[channel]) * 0x10000); + +    for (padding = 0; padding < VOLUME_PADDING; padding++, channel++) +        linear[channel] = linear[padding];  }  static void calc_linear_float_volume(float linear[], const pa_cvolume *volume) { -    unsigned channel; +    unsigned channel, nchannels, padding;      pa_assert(linear);      pa_assert(volume); -    for (channel = 0; channel < volume->channels; channel++) +    nchannels = volume->channels; + +    for (channel = 0; channel < nchannels; channel++)          linear[channel] = (float) pa_sw_volume_to_linear(volume->values[channel]); + +    for (padding = 0; padding < VOLUME_PADDING; padding++, channel++) +        linear[channel] = linear[padding];  }  static void calc_linear_integer_stream_volumes(pa_mix_info streams[], unsigned nstreams, const pa_cvolume *volume, const pa_sample_spec *spec) { @@ -690,6 +699,28 @@ size_t pa_mix(      return length;  } +typedef union { +  float f; +  uint32_t i; +} volume_val; + +typedef void (*pa_calc_volume_func_t) (void *volumes, const pa_cvolume *volume); + +static const pa_calc_volume_func_t calc_volume_table[] = { +  [PA_SAMPLE_U8]        = (pa_calc_volume_func_t) calc_linear_integer_volume, +  [PA_SAMPLE_ALAW]      = (pa_calc_volume_func_t) calc_linear_integer_volume, +  [PA_SAMPLE_ULAW]      = (pa_calc_volume_func_t) calc_linear_integer_volume, +  [PA_SAMPLE_S16LE]     = (pa_calc_volume_func_t) calc_linear_integer_volume, +  [PA_SAMPLE_S16BE]     = (pa_calc_volume_func_t) calc_linear_integer_volume, +  [PA_SAMPLE_FLOAT32LE] = (pa_calc_volume_func_t) calc_linear_float_volume, +  [PA_SAMPLE_FLOAT32BE] = (pa_calc_volume_func_t) calc_linear_float_volume, +  [PA_SAMPLE_S32LE]     = (pa_calc_volume_func_t) calc_linear_integer_volume, +  [PA_SAMPLE_S32BE]     = (pa_calc_volume_func_t) calc_linear_integer_volume, +  [PA_SAMPLE_S24LE]     = (pa_calc_volume_func_t) calc_linear_integer_volume, +  [PA_SAMPLE_S24BE]     = (pa_calc_volume_func_t) calc_linear_integer_volume, +  [PA_SAMPLE_S24_32LE]  = (pa_calc_volume_func_t) calc_linear_integer_volume, +  [PA_SAMPLE_S24_32BE]  = (pa_calc_volume_func_t) calc_linear_integer_volume +};  void pa_volume_memchunk(          pa_memchunk*c, @@ -697,6 +728,8 @@ void pa_volume_memchunk(          const pa_cvolume *volume) {      void *ptr; +    volume_val linear[PA_CHANNELS_MAX + VOLUME_PADDING]; +    pa_do_volume_func_t do_volume;      pa_assert(c);      pa_assert(spec); @@ -714,337 +747,19 @@ void pa_volume_memchunk(          return;      } -    ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index; - -    switch (spec->format) { - -        case PA_SAMPLE_S16NE: { -            int16_t *d, *e; -            unsigned channel; -            int32_t linear[PA_CHANNELS_MAX]; - -            calc_linear_integer_volume(linear, volume); - -            e = (int16_t*) ptr + c->length/sizeof(int16_t); - -            for (channel = 0, d = ptr; d < e; d++) { -                int32_t t, hi, lo; - -                /* Multiplying the 32bit volume factor with the 16bit -                 * sample might result in an 48bit value. We want to -                 * do without 64 bit integers and hence do the -                 * multiplication independantly for the HI and LO part -                 * of the volume. */ - -                hi = linear[channel] >> 16; -                lo = linear[channel] & 0xFFFF; - -                t = (int32_t)(*d); -                t = ((t * lo) >> 16) + (t * hi); -                t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); -                *d = (int16_t) t; - -                if (PA_UNLIKELY(++channel >= spec->channels)) -                    channel = 0; -            } - -            break; -        } - -        case PA_SAMPLE_S16RE: { -            int16_t *d, *e; -            unsigned channel; -            int32_t linear[PA_CHANNELS_MAX]; - -            calc_linear_integer_volume(linear, volume); - -            e = (int16_t*) ptr + c->length/sizeof(int16_t); - -            for (channel = 0, d = ptr; d < e; d++) { -                int32_t t, hi, lo; - -                hi = linear[channel] >> 16; -                lo = linear[channel] & 0xFFFF; - -                t = (int32_t) PA_INT16_SWAP(*d); -                t = ((t * lo) >> 16) + (t * hi); -                t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); -                *d = PA_INT16_SWAP((int16_t) t); - -                if (PA_UNLIKELY(++channel >= spec->channels)) -                    channel = 0; -            } - -            break; -        } - -        case PA_SAMPLE_S32NE: { -            int32_t *d, *e; -            unsigned channel; -            int32_t linear[PA_CHANNELS_MAX]; - -            calc_linear_integer_volume(linear, volume); - -            e = (int32_t*) ptr + c->length/sizeof(int32_t); - -            for (channel = 0, d = ptr; d < e; d++) { -                int64_t t; - -                t = (int64_t)(*d); -                t = (t * linear[channel]) >> 16; -                t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); -                *d = (int32_t) t; - -                if (PA_UNLIKELY(++channel >= spec->channels)) -                    channel = 0; -            } -            break; -        } - -        case PA_SAMPLE_S32RE: { -            int32_t *d, *e; -            unsigned channel; -            int32_t linear[PA_CHANNELS_MAX]; - -            calc_linear_integer_volume(linear, volume); - -            e = (int32_t*) ptr + c->length/sizeof(int32_t); - -            for (channel = 0, d = ptr; d < e; d++) { -                int64_t t; - -                t = (int64_t) PA_INT32_SWAP(*d); -                t = (t * linear[channel]) >> 16; -                t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); -                *d = PA_INT32_SWAP((int32_t) t); - -                if (PA_UNLIKELY(++channel >= spec->channels)) -                    channel = 0; -            } -            break; -        } - -        case PA_SAMPLE_S24NE: { -            uint8_t *d, *e; -            unsigned channel; -            int32_t linear[PA_CHANNELS_MAX]; - -            calc_linear_integer_volume(linear, volume); - -            e = (uint8_t*) ptr + c->length; - -            for (channel = 0, d = ptr; d < e; d += 3) { -                int64_t t; - -                t = (int64_t)((int32_t) (PA_READ24NE(d) << 8)); -                t = (t * linear[channel]) >> 16; -                t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); -                PA_WRITE24NE(d, ((uint32_t) (int32_t) t) >> 8); - -                if (PA_UNLIKELY(++channel >= spec->channels)) -                    channel = 0; -            } -            break; -        } - -        case PA_SAMPLE_S24RE: { -            uint8_t *d, *e; -            unsigned channel; -            int32_t linear[PA_CHANNELS_MAX]; - -            calc_linear_integer_volume(linear, volume); - -            e = (uint8_t*) ptr + c->length; - -            for (channel = 0, d = ptr; d < e; d += 3) { -                int64_t t; - -                t = (int64_t)((int32_t) (PA_READ24RE(d) << 8)); -                t = (t * linear[channel]) >> 16; -                t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); -                PA_WRITE24RE(d, ((uint32_t) (int32_t) t) >> 8); - -                if (PA_UNLIKELY(++channel >= spec->channels)) -                    channel = 0; -            } -            break; -        } - -        case PA_SAMPLE_S24_32NE: { -            uint32_t *d, *e; -            unsigned channel; -            int32_t linear[PA_CHANNELS_MAX]; - -            calc_linear_integer_volume(linear, volume); - -            e = (uint32_t*) ptr + c->length/sizeof(uint32_t); - -            for (channel = 0, d = ptr; d < e; d++) { -                int64_t t; - -                t = (int64_t) ((int32_t) (*d << 8)); -                t = (t * linear[channel]) >> 16; -                t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); -                *d = ((uint32_t) ((int32_t) t)) >> 8; - -                if (PA_UNLIKELY(++channel >= spec->channels)) -                    channel = 0; -            } -            break; -        } - -        case PA_SAMPLE_S24_32RE: { -            uint32_t *d, *e; -            unsigned channel; -            int32_t linear[PA_CHANNELS_MAX]; - -            calc_linear_integer_volume(linear, volume); - -            e = (uint32_t*) ptr + c->length/sizeof(uint32_t); - -            for (channel = 0, d = ptr; d < e; d++) { -                int64_t t; - -                t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*d) << 8)); -                t = (t * linear[channel]) >> 16; -                t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); -                *d = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8); - -                if (PA_UNLIKELY(++channel >= spec->channels)) -                    channel = 0; -            } -            break; -        } - -        case PA_SAMPLE_U8: { -            uint8_t *d, *e; -            unsigned channel; -            int32_t linear[PA_CHANNELS_MAX]; - -            calc_linear_integer_volume(linear, volume); - -            e = (uint8_t*) ptr + c->length; - -            for (channel = 0, d = ptr; d < e; d++) { -                int32_t t, hi, lo; - -                hi = linear[channel] >> 16; -                lo = linear[channel] & 0xFFFF; - -                t = (int32_t) *d - 0x80; -                t = ((t * lo) >> 16) + (t * hi); -                t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F); -                *d = (uint8_t) (t + 0x80); - -                if (PA_UNLIKELY(++channel >= spec->channels)) -                    channel = 0; -            } -            break; -        } - -        case PA_SAMPLE_ULAW: { -            uint8_t *d, *e; -            unsigned channel; -            int32_t linear[PA_CHANNELS_MAX]; - -            calc_linear_integer_volume(linear, volume); - -            e = (uint8_t*) ptr + c->length; - -            for (channel = 0, d = ptr; d < e; d++) { -                int32_t t, hi, lo; - -                hi = linear[channel] >> 16; -                lo = linear[channel] & 0xFFFF; - -                t = (int32_t) st_ulaw2linear16(*d); -                t = ((t * lo) >> 16) + (t * hi); -                t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); -                *d = (uint8_t) st_14linear2ulaw((int16_t) t >> 2); - -                if (PA_UNLIKELY(++channel >= spec->channels)) -                    channel = 0; -            } -            break; -        } - -        case PA_SAMPLE_ALAW: { -            uint8_t *d, *e; -            unsigned channel; -            int32_t linear[PA_CHANNELS_MAX]; - -            calc_linear_integer_volume(linear, volume); - -            e = (uint8_t*) ptr + c->length; - -            for (channel = 0, d = ptr; d < e; d++) { -                int32_t t, hi, lo; - -                hi = linear[channel] >> 16; -                lo = linear[channel] & 0xFFFF; - -                t = (int32_t) st_alaw2linear16(*d); -                t = ((t * lo) >> 16) + (t * hi); -                t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); -                *d = (uint8_t) st_13linear2alaw((int16_t) t >> 3); - -                if (PA_UNLIKELY(++channel >= spec->channels)) -                    channel = 0; -            } -            break; -        } - -        case PA_SAMPLE_FLOAT32NE: { -            float *d; -            int skip; -            unsigned n; -            unsigned channel; - -            d = ptr; -            skip = (int) (spec->channels * sizeof(float)); -            n = (unsigned) (c->length/sizeof(float)/spec->channels); - -            for (channel = 0; channel < spec->channels; channel ++) { -                float v, *t; - -                if (PA_UNLIKELY(volume->values[channel] == PA_VOLUME_NORM)) -                    continue; - -                v = (float) pa_sw_volume_to_linear(volume->values[channel]); -                t = d + channel; -                oil_scalarmult_f32(t, skip, t, skip, &v, (int) n); -            } -            break; -        } - -        case PA_SAMPLE_FLOAT32RE: { -            float *d, *e; -            unsigned channel; -            float linear[PA_CHANNELS_MAX]; - -            calc_linear_float_volume(linear, volume); - -            e = (float*) ptr + c->length/sizeof(float); - -            for (channel = 0, d = ptr; d < e; d++) { -                float t; +    if (spec->format < 0 || spec->format > PA_SAMPLE_MAX) { +      pa_log_warn(" Unable to change volume of format %s.", pa_sample_format_to_string(spec->format)); +      return; +    } -                t = PA_FLOAT32_SWAP(*d); -                t *= linear[channel]; -                *d = PA_FLOAT32_SWAP(t); +    do_volume = pa_get_volume_func (spec->format); +    pa_assert(do_volume); -                if (PA_UNLIKELY(++channel >= spec->channels)) -                    channel = 0; -            } - -            break; -        } +    calc_volume_table[spec->format] ((void *)linear, volume); +    ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index; -        default: -            pa_log_warn(" Unable to change volume of format %s.", pa_sample_format_to_string(spec->format)); -            /* If we cannot change the volume, we just don't do it */ -    } +    do_volume (ptr, (void *)linear, spec->channels, c->length);      pa_memblock_release(c->memblock);  } @@ -1090,7 +805,7 @@ void pa_interleave(const void *src[], unsigned channels, void *dst, size_t ss, u          d = (uint8_t*) dst + c * ss;          for (j = 0; j < n; j ++) { -            oil_memcpy(d, s, (int) ss); +            memcpy(d, s, (int) ss);              s = (uint8_t*) s + ss;              d = (uint8_t*) d + fs;          } @@ -1118,7 +833,7 @@ void pa_deinterleave(const void *src, void *dst[], unsigned channels, size_t ss,          d = dst[c];          for (j = 0; j < n; j ++) { -            oil_memcpy(d, s, (int) ss); +            memcpy(d, s, (int) ss);              s = (uint8_t*) s + fs;              d = (uint8_t*) d + ss;          } @@ -1227,10 +942,15 @@ void pa_sample_clamp(pa_sample_format_t format, void *dst, size_t dstr, const vo      s = src; d = dst;      if (format == PA_SAMPLE_FLOAT32NE) { +        for (; n > 0; n--) { +            float f; -        float minus_one = -1.0, plus_one = 1.0; -        oil_clip_f32(d, (int) dstr, s, (int) sstr, (int) n, &minus_one, &plus_one); +            f = *s; +            *d = PA_CLAMP_UNLIKELY(f, -1.0f, 1.0f); +            s = (const float*) ((const uint8_t*) s + sstr); +            d = (float*) ((uint8_t*) d + dstr); +        }      } else {          pa_assert(format == PA_SAMPLE_FLOAT32RE); diff --git a/src/pulsecore/sample-util.h b/src/pulsecore/sample-util.h index 6a306c11..34df5cf3 100644 --- a/src/pulsecore/sample-util.h +++ b/src/pulsecore/sample-util.h @@ -86,6 +86,11 @@ void pa_memchunk_dump_to_file(pa_memchunk *c, const char *fn);  void pa_memchunk_sine(pa_memchunk *c, pa_mempool *pool, unsigned rate, unsigned freq); +typedef void (*pa_do_volume_func_t) (void *samples, void *volumes, unsigned channels, unsigned length); + +pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f); +void pa_set_volume_func(pa_sample_format_t f, pa_do_volume_func_t func); +  #define PA_CHANNEL_POSITION_MASK_LEFT                                   \      (PA_CHANNEL_POSITION_MASK(PA_CHANNEL_POSITION_FRONT_LEFT)           \       | PA_CHANNEL_POSITION_MASK(PA_CHANNEL_POSITION_REAR_LEFT)          \ diff --git a/src/pulsecore/sconv-s16le.c b/src/pulsecore/sconv-s16le.c index 43b8cb3e..0fefdf1c 100644 --- a/src/pulsecore/sconv-s16le.c +++ b/src/pulsecore/sconv-s16le.c @@ -28,8 +28,6 @@  #include <inttypes.h>  #include <stdio.h> -#include <liboil/liboilfuncs.h> -  #include <pulsecore/sconv.h>  #include <pulsecore/macro.h>  #include <pulsecore/log.h> @@ -86,17 +84,13 @@ void pa_sconv_s16le_to_float32ne(unsigned n, const int16_t *a, float *b) {      pa_assert(b);  #if SWAP_WORDS == 1 -      for (; n > 0; n--) {          int16_t s = *(a++);          *(b++) = ((float) INT16_FROM(s))/(float) 0x7FFF;      } -  #else -{ -    static const double add = 0, factor = 1.0/0x7FFF; -    oil_scaleconv_f32_s16(b, a, (int) n, &add, &factor); -} +    for (; n > 0; n--) +        *(b++) = ((float) (*(a++)))/(float) 0x7FFF;  #endif  } @@ -105,17 +99,13 @@ void pa_sconv_s32le_to_float32ne(unsigned n, const int32_t *a, float *b) {      pa_assert(b);  #if SWAP_WORDS == 1 -      for (; n > 0; n--) {          int32_t s = *(a++);          *(b++) = (float) (((double) INT32_FROM(s))/0x7FFFFFFF);      } -  #else -{ -    static const double add = 0, factor = 1.0/0x7FFFFFFF; -    oil_scaleconv_f32_s32(b, a, (int) n, &add, &factor); -} +    for (; n > 0; n--) +        *(b++) = (float) (((double) (*(a++)))/0x7FFFFFFF);  #endif  } @@ -124,7 +114,6 @@ void pa_sconv_s16le_from_float32ne(unsigned n, const float *a, int16_t *b) {      pa_assert(b);  #if SWAP_WORDS == 1 -      for (; n > 0; n--) {          int16_t s;          float v = *(a++); @@ -133,12 +122,13 @@ void pa_sconv_s16le_from_float32ne(unsigned n, const float *a, int16_t *b) {          s = (int16_t) lrintf(v * 0x7FFF);          *(b++) = INT16_TO(s);      } -  #else -{ -    static const double add = 0, factor = 0x7FFF; -    oil_scaleconv_s16_f32(b, a, (int) n, &add, &factor); -} +    for (; n > 0; n--) { +        float v = *(a++); + +        v = PA_CLAMP_UNLIKELY(v, -1.0f, 1.f); +        *(b++) = (int16_t) lrintf(v * 0x7FFF); +    }  #endif  } @@ -147,7 +137,6 @@ void pa_sconv_s32le_from_float32ne(unsigned n, const float *a, int32_t *b) {      pa_assert(b);  #if SWAP_WORDS == 1 -      for (; n > 0; n--) {          int32_t s;          float v = *(a++); @@ -156,12 +145,13 @@ void pa_sconv_s32le_from_float32ne(unsigned n, const float *a, int32_t *b) {          s = (int32_t) lrint((double) v * (double) 0x7FFFFFFF);          *(b++) = INT32_TO(s);      } -  #else -{ -    static const double add = 0, factor = 0x7FFFFFFF; -    oil_scaleconv_s32_f32(b, a, (int) n, &add, &factor); -} +    for (; n > 0; n--) { +        float v = *(a++); + +        v = PA_CLAMP_UNLIKELY(v, -1.0f, 1.0f); +        *(b++) = (int32_t) lrint((double) v * (double) 0x7FFFFFFF); +    }  #endif  } diff --git a/src/pulsecore/sconv.c b/src/pulsecore/sconv.c index d89f4283..d06d6985 100644 --- a/src/pulsecore/sconv.c +++ b/src/pulsecore/sconv.c @@ -27,9 +27,6 @@  #include <stdio.h>  #include <stdlib.h> -#include <liboil/liboilfuncs.h> -#include <liboil/liboil.h> -  #include <pulsecore/g711.h>  #include <pulsecore/macro.h> @@ -41,32 +38,31 @@  /* u8 */  static void u8_to_float32ne(unsigned n, const uint8_t *a, float *b) { -    static const double add = -1, factor = 1.0/128.0; -      pa_assert(a);      pa_assert(b); -    oil_scaleconv_f32_u8(b, a, (int) n, &add, &factor); +    for (; n > 0; n--, a++, b++) +        *b = (*a * 1.0/128.0) - 1.0;  }  static void u8_from_float32ne(unsigned n, const float *a, uint8_t *b) { -    static const double add = 128, factor = 127.0; -      pa_assert(a);      pa_assert(b); -    oil_scaleconv_u8_f32(b, a, (int) n, &add, &factor); +    for (; n > 0; n--, a++, b++) { +        float v; +        v = (*a * 127.0) + 128.0; +	v = PA_CLAMP_UNLIKELY (v, 0.0, 255.0); +	*b = rint (v); +    }  }  static void u8_to_s16ne(unsigned n, const uint8_t *a, int16_t *b) { -    static const int16_t add = -0x80, factor = 0x100; -      pa_assert(a);      pa_assert(b); -    oil_conv_s16_u8(b, 2, a, 1, (int) n); -    oil_scalaradd_s16(b, 2, b, 2, &add, (int) n); -    oil_scalarmult_s16(b, 2, b, 2, &factor, (int) n); +    for (; n > 0; n--, a++, b++) +        *b = (((int16_t)*a) - 128) << 8;  }  static void u8_from_s16ne(unsigned n, const int16_t *a, uint8_t *b) { @@ -84,7 +80,7 @@ static void float32ne_to_float32ne(unsigned n, const float *a, float *b) {      pa_assert(a);      pa_assert(b); -    oil_memcpy(b, a, (int) (sizeof(float) * n)); +    memcpy(b, a, (int) (sizeof(float) * n));  }  static void float32re_to_float32ne(unsigned n, const float *a, float *b) { @@ -101,7 +97,7 @@ static void s16ne_to_s16ne(unsigned n, const int16_t *a, int16_t *b) {      pa_assert(a);      pa_assert(b); -    oil_memcpy(b, a, (int) (sizeof(int16_t) * n)); +    memcpy(b, a, (int) (sizeof(int16_t) * n));  }  static void s16re_to_s16ne(unsigned n, const int16_t *a, int16_t *b) { @@ -188,98 +184,130 @@ static void alaw_from_s16ne(unsigned n, const int16_t *a, uint8_t *b) {          *b = st_13linear2alaw(*a >> 3);  } +static pa_convert_func_t to_float32ne_table[] = { +    [PA_SAMPLE_U8]        = (pa_convert_func_t) u8_to_float32ne, +    [PA_SAMPLE_ALAW]      = (pa_convert_func_t) alaw_to_float32ne, +    [PA_SAMPLE_ULAW]      = (pa_convert_func_t) ulaw_to_float32ne, +    [PA_SAMPLE_S16LE]     = (pa_convert_func_t) pa_sconv_s16le_to_float32ne, +    [PA_SAMPLE_S16BE]     = (pa_convert_func_t) pa_sconv_s16be_to_float32ne, +    [PA_SAMPLE_S32LE]     = (pa_convert_func_t) pa_sconv_s32le_to_float32ne, +    [PA_SAMPLE_S32BE]     = (pa_convert_func_t) pa_sconv_s32be_to_float32ne, +    [PA_SAMPLE_S24LE]     = (pa_convert_func_t) pa_sconv_s24le_to_float32ne, +    [PA_SAMPLE_S24BE]     = (pa_convert_func_t) pa_sconv_s24be_to_float32ne, +    [PA_SAMPLE_S24_32LE]  = (pa_convert_func_t) pa_sconv_s24_32le_to_float32ne, +    [PA_SAMPLE_S24_32BE]  = (pa_convert_func_t) pa_sconv_s24_32be_to_float32ne, +    [PA_SAMPLE_FLOAT32NE] = (pa_convert_func_t) float32ne_to_float32ne, +    [PA_SAMPLE_FLOAT32RE] = (pa_convert_func_t) float32re_to_float32ne, +}; +  pa_convert_func_t pa_get_convert_to_float32ne_function(pa_sample_format_t f) { -    static const pa_convert_func_t table[] = { -        [PA_SAMPLE_U8]        = (pa_convert_func_t) u8_to_float32ne, -        [PA_SAMPLE_ALAW]      = (pa_convert_func_t) alaw_to_float32ne, -        [PA_SAMPLE_ULAW]      = (pa_convert_func_t) ulaw_to_float32ne, -        [PA_SAMPLE_S16LE]     = (pa_convert_func_t) pa_sconv_s16le_to_float32ne, -        [PA_SAMPLE_S16BE]     = (pa_convert_func_t) pa_sconv_s16be_to_float32ne, -        [PA_SAMPLE_S32LE]     = (pa_convert_func_t) pa_sconv_s32le_to_float32ne, -        [PA_SAMPLE_S32BE]     = (pa_convert_func_t) pa_sconv_s32be_to_float32ne, -        [PA_SAMPLE_S24LE]     = (pa_convert_func_t) pa_sconv_s24le_to_float32ne, -        [PA_SAMPLE_S24BE]     = (pa_convert_func_t) pa_sconv_s24be_to_float32ne, -        [PA_SAMPLE_S24_32LE]  = (pa_convert_func_t) pa_sconv_s24_32le_to_float32ne, -        [PA_SAMPLE_S24_32BE]  = (pa_convert_func_t) pa_sconv_s24_32be_to_float32ne, -        [PA_SAMPLE_FLOAT32NE] = (pa_convert_func_t) float32ne_to_float32ne, -        [PA_SAMPLE_FLOAT32RE] = (pa_convert_func_t) float32re_to_float32ne, -    }; +    pa_assert(f >= 0); +    pa_assert(f < PA_SAMPLE_MAX); + +    return to_float32ne_table[f]; +} + +void pa_set_convert_to_float32ne_function(pa_sample_format_t f, pa_convert_func_t func) {      pa_assert(f >= 0);      pa_assert(f < PA_SAMPLE_MAX); -    return table[f]; +    to_float32ne_table[f] = func;  } +static pa_convert_func_t from_float32ne_table[] = { +    [PA_SAMPLE_U8]        = (pa_convert_func_t) u8_from_float32ne, +    [PA_SAMPLE_S16LE]     = (pa_convert_func_t) pa_sconv_s16le_from_float32ne, +    [PA_SAMPLE_S16BE]     = (pa_convert_func_t) pa_sconv_s16be_from_float32ne, +    [PA_SAMPLE_S32LE]     = (pa_convert_func_t) pa_sconv_s32le_from_float32ne, +    [PA_SAMPLE_S32BE]     = (pa_convert_func_t) pa_sconv_s32be_from_float32ne, +    [PA_SAMPLE_S24LE]     = (pa_convert_func_t) pa_sconv_s24le_from_float32ne, +    [PA_SAMPLE_S24BE]     = (pa_convert_func_t) pa_sconv_s24be_from_float32ne, +    [PA_SAMPLE_S24_32LE]  = (pa_convert_func_t) pa_sconv_s24_32le_from_float32ne, +    [PA_SAMPLE_S24_32BE]  = (pa_convert_func_t) pa_sconv_s24_32be_from_float32ne, +    [PA_SAMPLE_FLOAT32NE] = (pa_convert_func_t) float32ne_to_float32ne, +    [PA_SAMPLE_FLOAT32RE] = (pa_convert_func_t) float32re_to_float32ne, +    [PA_SAMPLE_ALAW]      = (pa_convert_func_t) alaw_from_float32ne, +    [PA_SAMPLE_ULAW]      = (pa_convert_func_t) ulaw_from_float32ne +}; +  pa_convert_func_t pa_get_convert_from_float32ne_function(pa_sample_format_t f) { -    static const pa_convert_func_t table[] = { -        [PA_SAMPLE_U8]        = (pa_convert_func_t) u8_from_float32ne, -        [PA_SAMPLE_S16LE]     = (pa_convert_func_t) pa_sconv_s16le_from_float32ne, -        [PA_SAMPLE_S16BE]     = (pa_convert_func_t) pa_sconv_s16be_from_float32ne, -        [PA_SAMPLE_S32LE]     = (pa_convert_func_t) pa_sconv_s32le_from_float32ne, -        [PA_SAMPLE_S32BE]     = (pa_convert_func_t) pa_sconv_s32be_from_float32ne, -        [PA_SAMPLE_S24LE]     = (pa_convert_func_t) pa_sconv_s24le_from_float32ne, -        [PA_SAMPLE_S24BE]     = (pa_convert_func_t) pa_sconv_s24be_from_float32ne, -        [PA_SAMPLE_S24_32LE]  = (pa_convert_func_t) pa_sconv_s24_32le_from_float32ne, -        [PA_SAMPLE_S24_32BE]  = (pa_convert_func_t) pa_sconv_s24_32be_from_float32ne, -        [PA_SAMPLE_FLOAT32NE] = (pa_convert_func_t) float32ne_to_float32ne, -        [PA_SAMPLE_FLOAT32RE] = (pa_convert_func_t) float32re_to_float32ne, -        [PA_SAMPLE_ALAW]      = (pa_convert_func_t) alaw_from_float32ne, -        [PA_SAMPLE_ULAW]      = (pa_convert_func_t) ulaw_from_float32ne -    }; +    pa_assert(f >= 0); +    pa_assert(f < PA_SAMPLE_MAX); + +    return from_float32ne_table[f]; +} + +void pa_set_convert_from_float32ne_function(pa_sample_format_t f, pa_convert_func_t func) {      pa_assert(f >= 0);      pa_assert(f < PA_SAMPLE_MAX); -    return table[f]; +    from_float32ne_table[f] = func;  } +static pa_convert_func_t to_s16ne_table[] = { +    [PA_SAMPLE_U8]        = (pa_convert_func_t) u8_to_s16ne, +    [PA_SAMPLE_S16NE]     = (pa_convert_func_t) s16ne_to_s16ne, +    [PA_SAMPLE_S16RE]     = (pa_convert_func_t) s16re_to_s16ne, +    [PA_SAMPLE_FLOAT32BE] = (pa_convert_func_t) pa_sconv_float32be_to_s16ne, +    [PA_SAMPLE_FLOAT32LE] = (pa_convert_func_t) pa_sconv_float32le_to_s16ne, +    [PA_SAMPLE_S32BE]     = (pa_convert_func_t) pa_sconv_s32be_to_s16ne, +    [PA_SAMPLE_S32LE]     = (pa_convert_func_t) pa_sconv_s32le_to_s16ne, +    [PA_SAMPLE_S24BE]     = (pa_convert_func_t) pa_sconv_s24be_to_s16ne, +    [PA_SAMPLE_S24LE]     = (pa_convert_func_t) pa_sconv_s24le_to_s16ne, +    [PA_SAMPLE_S24_32BE]  = (pa_convert_func_t) pa_sconv_s24_32be_to_s16ne, +    [PA_SAMPLE_S24_32LE]  = (pa_convert_func_t) pa_sconv_s24_32le_to_s16ne, +    [PA_SAMPLE_ALAW]      = (pa_convert_func_t) alaw_to_s16ne, +    [PA_SAMPLE_ULAW]      = (pa_convert_func_t) ulaw_to_s16ne +}; +  pa_convert_func_t pa_get_convert_to_s16ne_function(pa_sample_format_t f) { -    static const pa_convert_func_t table[] = { -        [PA_SAMPLE_U8]        = (pa_convert_func_t) u8_to_s16ne, -        [PA_SAMPLE_S16NE]     = (pa_convert_func_t) s16ne_to_s16ne, -        [PA_SAMPLE_S16RE]     = (pa_convert_func_t) s16re_to_s16ne, -        [PA_SAMPLE_FLOAT32BE] = (pa_convert_func_t) pa_sconv_float32be_to_s16ne, -        [PA_SAMPLE_FLOAT32LE] = (pa_convert_func_t) pa_sconv_float32le_to_s16ne, -        [PA_SAMPLE_S32BE]     = (pa_convert_func_t) pa_sconv_s32be_to_s16ne, -        [PA_SAMPLE_S32LE]     = (pa_convert_func_t) pa_sconv_s32le_to_s16ne, -        [PA_SAMPLE_S24BE]     = (pa_convert_func_t) pa_sconv_s24be_to_s16ne, -        [PA_SAMPLE_S24LE]     = (pa_convert_func_t) pa_sconv_s24le_to_s16ne, -        [PA_SAMPLE_S24_32BE]  = (pa_convert_func_t) pa_sconv_s24_32be_to_s16ne, -        [PA_SAMPLE_S24_32LE]  = (pa_convert_func_t) pa_sconv_s24_32le_to_s16ne, -        [PA_SAMPLE_ALAW]      = (pa_convert_func_t) alaw_to_s16ne, -        [PA_SAMPLE_ULAW]      = (pa_convert_func_t) ulaw_to_s16ne -    }; +    pa_assert(f >= 0); +    pa_assert(f < PA_SAMPLE_MAX); + +    return to_s16ne_table[f]; +} + +void pa_set_convert_to_s16ne_function(pa_sample_format_t f, pa_convert_func_t func) {      pa_assert(f >= 0);      pa_assert(f < PA_SAMPLE_MAX); -    return table[f]; +    to_s16ne_table[f] = func;  } +static pa_convert_func_t from_s16ne_table[] = { +    [PA_SAMPLE_U8]        = (pa_convert_func_t) u8_from_s16ne, +    [PA_SAMPLE_S16NE]     = (pa_convert_func_t) s16ne_to_s16ne, +    [PA_SAMPLE_S16RE]     = (pa_convert_func_t) s16re_to_s16ne, +    [PA_SAMPLE_FLOAT32BE] = (pa_convert_func_t) pa_sconv_float32be_from_s16ne, +    [PA_SAMPLE_FLOAT32LE] = (pa_convert_func_t) pa_sconv_float32le_from_s16ne, +    [PA_SAMPLE_S32BE]     = (pa_convert_func_t) pa_sconv_s32be_from_s16ne, +    [PA_SAMPLE_S32LE]     = (pa_convert_func_t) pa_sconv_s32le_from_s16ne, +    [PA_SAMPLE_S24BE]     = (pa_convert_func_t) pa_sconv_s24be_from_s16ne, +    [PA_SAMPLE_S24LE]     = (pa_convert_func_t) pa_sconv_s24le_from_s16ne, +    [PA_SAMPLE_S24_32BE]  = (pa_convert_func_t) pa_sconv_s24_32be_from_s16ne, +    [PA_SAMPLE_S24_32LE]  = (pa_convert_func_t) pa_sconv_s24_32le_from_s16ne, +    [PA_SAMPLE_ALAW]      = (pa_convert_func_t) alaw_from_s16ne, +    [PA_SAMPLE_ULAW]      = (pa_convert_func_t) ulaw_from_s16ne, +}; +  pa_convert_func_t pa_get_convert_from_s16ne_function(pa_sample_format_t f) { -    static const pa_convert_func_t table[] = { -        [PA_SAMPLE_U8]        = (pa_convert_func_t) u8_from_s16ne, -        [PA_SAMPLE_S16NE]     = (pa_convert_func_t) s16ne_to_s16ne, -        [PA_SAMPLE_S16RE]     = (pa_convert_func_t) s16re_to_s16ne, -        [PA_SAMPLE_FLOAT32BE] = (pa_convert_func_t) pa_sconv_float32be_from_s16ne, -        [PA_SAMPLE_FLOAT32LE] = (pa_convert_func_t) pa_sconv_float32le_from_s16ne, -        [PA_SAMPLE_S32BE]     = (pa_convert_func_t) pa_sconv_s32be_from_s16ne, -        [PA_SAMPLE_S32LE]     = (pa_convert_func_t) pa_sconv_s32le_from_s16ne, -        [PA_SAMPLE_S24BE]     = (pa_convert_func_t) pa_sconv_s24be_from_s16ne, -        [PA_SAMPLE_S24LE]     = (pa_convert_func_t) pa_sconv_s24le_from_s16ne, -        [PA_SAMPLE_S24_32BE]  = (pa_convert_func_t) pa_sconv_s24_32be_from_s16ne, -        [PA_SAMPLE_S24_32LE]  = (pa_convert_func_t) pa_sconv_s24_32le_from_s16ne, -        [PA_SAMPLE_ALAW]      = (pa_convert_func_t) alaw_from_s16ne, -        [PA_SAMPLE_ULAW]      = (pa_convert_func_t) ulaw_from_s16ne, -    }; +    pa_assert(f >= 0); +    pa_assert(f < PA_SAMPLE_MAX); + +    return from_s16ne_table[f]; +} + +void pa_set_convert_from_s16ne_function(pa_sample_format_t f, pa_convert_func_t func) {      pa_assert(f >= 0);      pa_assert(f < PA_SAMPLE_MAX); -    return table[f]; +    from_s16ne_table[f] = func;  } diff --git a/src/pulsecore/sconv.h b/src/pulsecore/sconv.h index b00a16a4..cd937559 100644 --- a/src/pulsecore/sconv.h +++ b/src/pulsecore/sconv.h @@ -33,4 +33,10 @@ pa_convert_func_t pa_get_convert_from_float32ne_function(pa_sample_format_t f) P  pa_convert_func_t pa_get_convert_to_s16ne_function(pa_sample_format_t f) PA_GCC_PURE;  pa_convert_func_t pa_get_convert_from_s16ne_function(pa_sample_format_t f) PA_GCC_PURE; +void pa_set_convert_to_float32ne_function(pa_sample_format_t f, pa_convert_func_t func); +void pa_set_convert_from_float32ne_function(pa_sample_format_t f, pa_convert_func_t func); + +void pa_set_convert_to_s16ne_function(pa_sample_format_t f, pa_convert_func_t func); +void pa_set_convert_from_s16ne_function(pa_sample_format_t f, pa_convert_func_t func); +  #endif diff --git a/src/pulsecore/svolume_arm.c b/src/pulsecore/svolume_arm.c new file mode 100644 index 00000000..5bd1448f --- /dev/null +++ b/src/pulsecore/svolume_arm.c @@ -0,0 +1,195 @@ +/*** +  This file is part of PulseAudio. + +  Copyright 2004-2006 Lennart Poettering +  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk> + +  PulseAudio is free software; you can redistribute it and/or modify +  it under the terms of the GNU Lesser General Public License as published +  by the Free Software Foundation; either version 2.1 of the License, +  or (at your option) any later version. + +  PulseAudio is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU Lesser General Public License +  along with PulseAudio; if not, write to the Free Software +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +  USA. +***/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <pulse/timeval.h> +#include <pulsecore/random.h> +#include <pulsecore/macro.h> +#include <pulsecore/g711.h> +#include <pulsecore/core-util.h> + +#include "cpu-arm.h" + +#include "sample-util.h" +#include "endianmacros.h" + +#if defined (__arm__) + +#define MOD_INC() \ +    " subs  r0, r6, %2              \n\t" \ +    " addcs r0, %1                  \n\t" \ +    " movcs r6, r0                  \n\t" + +static void +pa_volume_s16ne_arm (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ +    int32_t *ve; + +    channels = PA_MAX (4U, channels); +    ve = volumes + channels; + +    __asm__ __volatile__ ( +        " mov r6, %1                      \n\t" +        " mov %3, %3, LSR #1              \n\t" /* length /= sizeof (int16_t) */ +        " tst %3, #1                      \n\t" /* check for odd samples */ +        " beq  2f                         \n\t" + +        "1:                               \n\t" +        " ldr  r0, [r6], #4               \n\t" /* odd samples volumes */ +        " ldrh r2, [%0]                   \n\t" + +        " smulwb r0, r0, r2               \n\t" +        " ssat r0, #16, r0                \n\t" + +        " strh r0, [%0], #2               \n\t" + +        MOD_INC() + +        "2:                               \n\t" +        " mov %3, %3, LSR #1              \n\t" +        " tst %3, #1                      \n\t" /* check for odd samples */ +        " beq  4f                         \n\t" + +        "3:                               \n\t" +        " ldrd r2, [r6], #8               \n\t" /* 2 samples at a time */ +        " ldr  r0, [%0]                   \n\t" + +        " smulwt r2, r2, r0               \n\t" +        " smulwb r3, r3, r0               \n\t" + +        " ssat r2, #16, r2                \n\t" +        " ssat r3, #16, r3                \n\t" + +        " pkhbt r0, r3, r2, LSL #16       \n\t" +        " str  r0, [%0], #4               \n\t" + +        MOD_INC() + +        "4:                               \n\t" +        " movs %3, %3, LSR #1             \n\t" +        " beq  6f                         \n\t" + +        "5:                               \n\t" +        " ldrd r2, [r6], #8               \n\t" /* 4 samples at a time */ +        " ldrd r4, [r6], #8               \n\t" +        " ldrd r0, [%0]                   \n\t" + +        " smulwt r2, r2, r0               \n\t" +        " smulwb r3, r3, r0               \n\t" +        " smulwt r4, r4, r1               \n\t" +        " smulwb r5, r5, r1               \n\t" + +        " ssat r2, #16, r2                \n\t" +        " ssat r3, #16, r3                \n\t" +        " ssat r4, #16, r4                \n\t" +        " ssat r5, #16, r5                \n\t" + +        " pkhbt r0, r3, r2, LSL #16       \n\t" +        " pkhbt r1, r5, r4, LSL #16       \n\t" +        " strd  r0, [%0], #8              \n\t" + +        MOD_INC() + +        " subs %3, %3, #1                 \n\t" +        " bne 5b                          \n\t" +        "6:                               \n\t" + +        : "+r" (samples), "+r" (volumes), "+r" (ve), "+r" (length) +        : +        : "r6", "r5", "r4", "r3", "r2", "r1", "r0", "cc" +    ); +} + +#undef RUN_TEST + +#ifdef RUN_TEST +#define CHANNELS 2 +#define SAMPLES 1023 +#define TIMES 1000 +#define PADDING 16 + +static void run_test (void) { +    int16_t samples[SAMPLES]; +    int16_t samples_ref[SAMPLES]; +    int16_t samples_orig[SAMPLES]; +    int32_t volumes[CHANNELS + PADDING]; +    int i, j, padding; +    pa_do_volume_func_t func; +    pa_usec_t start, stop; + +    func = pa_get_volume_func (PA_SAMPLE_S16NE); + +    printf ("checking ARM %zd\n", sizeof (samples)); + +    pa_random (samples, sizeof (samples)); +    memcpy (samples_ref, samples, sizeof (samples)); +    memcpy (samples_orig, samples, sizeof (samples)); + +    for (i = 0; i < CHANNELS; i++) +        volumes[i] = rand() >> 1; +    for (padding = 0; padding < PADDING; padding++, i++) +        volumes[i] = volumes[padding]; + +    func (samples_ref, volumes, CHANNELS, sizeof (samples)); +    pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples)); +    for (i = 0; i < SAMPLES; i++) { +        if (samples[i] != samples_ref[i]) { +            printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], +                  samples_orig[i], volumes[i % CHANNELS]); +        } +    } + +    start = pa_rtclock_now(); +    for (j = 0; j < TIMES; j++) { +        memcpy (samples, samples_orig, sizeof (samples)); +        pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples)); +    } +    stop = pa_rtclock_now(); +    pa_log_info("ARM: %llu usec.", (long long unsigned int) (stop - start)); + +    start = pa_rtclock_now(); +    for (j = 0; j < TIMES; j++) { +        memcpy (samples_ref, samples_orig, sizeof (samples)); +        func (samples_ref, volumes, CHANNELS, sizeof (samples)); +    } +    stop = pa_rtclock_now(); +    pa_log_info("ref: %llu usec.", (long long unsigned int) (stop - start)); +} +#endif + +#endif /* defined (__arm__) */ + + +void pa_volume_func_init_arm (pa_cpu_arm_flag_t flags) { +#if defined (__arm__) +    pa_log_info("Initialising ARM optimized functions."); + +#ifdef RUN_TEST +    run_test (); +#endif + +    pa_set_volume_func (PA_SAMPLE_S16NE,     (pa_do_volume_func_t) pa_volume_s16ne_arm); +#endif /* defined (__arm__) */ +} diff --git a/src/pulsecore/svolume_c.c b/src/pulsecore/svolume_c.c new file mode 100644 index 00000000..5fc052b8 --- /dev/null +++ b/src/pulsecore/svolume_c.c @@ -0,0 +1,335 @@ +/*** +  This file is part of PulseAudio. + +  Copyright 2004-2006 Lennart Poettering +  Copyright 2006 Pierre Ossman <ossman@cendio.se> for Cendio AB + +  PulseAudio is free software; you can redistribute it and/or modify +  it under the terms of the GNU Lesser General Public License as published +  by the Free Software Foundation; either version 2.1 of the License, +  or (at your option) any later version. + +  PulseAudio is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU Lesser General Public License +  along with PulseAudio; if not, write to the Free Software +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +  USA. +***/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + + +#include <pulsecore/macro.h> +#include <pulsecore/g711.h> +#include <pulsecore/core-util.h> + +#include "sample-util.h" +#include "endianmacros.h" + +static void +pa_volume_u8_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ +    unsigned channel; + +    for (channel = 0; length; length--) { +        int32_t t, hi, lo; + +        hi = volumes[channel] >> 16; +        lo = volumes[channel] & 0xFFFF; + +        t = (int32_t) *samples - 0x80; +        t = ((t * lo) >> 16) + (t * hi); +        t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F); +        *samples++ = (uint8_t) (t + 0x80); + +        if (PA_UNLIKELY(++channel >= channels)) +            channel = 0; +    } +} + +static void +pa_volume_alaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ +    unsigned channel; + +    for (channel = 0; length; length--) { +        int32_t t, hi, lo; + +        hi = volumes[channel] >> 16; +        lo = volumes[channel] & 0xFFFF; + +        t = (int32_t) st_alaw2linear16(*samples); +        t = ((t * lo) >> 16) + (t * hi); +        t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); +        *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3); + +        if (PA_UNLIKELY(++channel >= channels)) +            channel = 0; +    } +} + +static void +pa_volume_ulaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ +    unsigned channel; + +    for (channel = 0; length; length--) { +        int32_t t, hi, lo; + +        hi = volumes[channel] >> 16; +        lo = volumes[channel] & 0xFFFF; + +        t = (int32_t) st_ulaw2linear16(*samples); +        t = ((t * lo) >> 16) + (t * hi); +        t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); +        *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2); + +        if (PA_UNLIKELY(++channel >= channels)) +            channel = 0; +    } +} + +static void +pa_volume_s16ne_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ +    unsigned channel; + +    length /= sizeof (int16_t); + +    for (channel = 0; length; length--) { +        int32_t t, hi, lo; + +        /* Multiplying the 32bit volume factor with the 16bit +         * sample might result in an 48bit value. We want to +         * do without 64 bit integers and hence do the +         * multiplication independantly for the HI and LO part +         * of the volume. */ + +        hi = volumes[channel] >> 16; +        lo = volumes[channel] & 0xFFFF; + +        t = (int32_t)(*samples); +        t = ((t * lo) >> 16) + (t * hi); +        t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); +        *samples++ = (int16_t) t; + +        if (PA_UNLIKELY(++channel >= channels)) +            channel = 0; +    } +} + +static void +pa_volume_s16re_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ +    unsigned channel; + +    length /= sizeof (int16_t); + +    for (channel = 0; length; length--) { +        int32_t t, hi, lo; + +        hi = volumes[channel] >> 16; +        lo = volumes[channel] & 0xFFFF; + +        t = (int32_t) PA_INT16_SWAP(*samples); +        t = ((t * lo) >> 16) + (t * hi); +        t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); +        *samples++ = PA_INT16_SWAP((int16_t) t); + +        if (PA_UNLIKELY(++channel >= channels)) +            channel = 0; +    } +} + +static void +pa_volume_float32ne_c (float *samples, float *volumes, unsigned channels, unsigned length) +{ +    unsigned channel; + +    length /= sizeof (float); + +    for (channel = 0; length; length--) { +        *samples++ *= volumes[channel]; + +        if (PA_UNLIKELY(++channel >= channels)) +            channel = 0; +    } +} + +static void +pa_volume_float32re_c (float *samples, float *volumes, unsigned channels, unsigned length) +{ +    unsigned channel; + +    length /= sizeof (float); + +    for (channel = 0; length; length--) { +        float t; + +        t = PA_FLOAT32_SWAP(*samples); +        t *= volumes[channel]; +        *samples++ = PA_FLOAT32_SWAP(t); + +        if (PA_UNLIKELY(++channel >= channels)) +            channel = 0; +    } +} + +static void +pa_volume_s32ne_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ +    unsigned channel; + +    length /= sizeof (int32_t); + +    for (channel = 0; length; length--) { +        int64_t t; + +        t = (int64_t)(*samples); +        t = (t * volumes[channel]) >> 16; +        t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); +        *samples++ = (int32_t) t; + +        if (PA_UNLIKELY(++channel >= channels)) +            channel = 0; +    } +} + +static void +pa_volume_s32re_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ +    unsigned channel; + +    length /= sizeof (int32_t); + +    for (channel = 0; length; length--) { +        int64_t t; + +        t = (int64_t) PA_INT32_SWAP(*samples); +        t = (t * volumes[channel]) >> 16; +        t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); +        *samples++ = PA_INT32_SWAP((int32_t) t); + +        if (PA_UNLIKELY(++channel >= channels)) +            channel = 0; +    } +} + +static void +pa_volume_s24ne_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ +    unsigned channel; +    uint8_t *e; + +    e = samples + length; + +    for (channel = 0; samples < e; samples += 3) { +        int64_t t; + +        t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8)); +        t = (t * volumes[channel]) >> 16; +        t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); +        PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8); + +        if (PA_UNLIKELY(++channel >= channels)) +            channel = 0; +    } +} + +static void +pa_volume_s24re_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ +    unsigned channel; +    uint8_t *e; + +    e = samples + length; + +    for (channel = 0; samples < e; samples += 3) { +        int64_t t; + +        t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8)); +        t = (t * volumes[channel]) >> 16; +        t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); +        PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8); + +        if (PA_UNLIKELY(++channel >= channels)) +            channel = 0; +    } +} + +static void +pa_volume_s24_32ne_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ +    unsigned channel; + +    length /= sizeof (uint32_t); + +    for (channel = 0; length; length--) { +        int64_t t; + +        t = (int64_t) ((int32_t) (*samples << 8)); +        t = (t * volumes[channel]) >> 16; +        t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); +        *samples++ = ((uint32_t) ((int32_t) t)) >> 8; + +        if (PA_UNLIKELY(++channel >= channels)) +            channel = 0; +    } +} + +static void +pa_volume_s24_32re_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ +    unsigned channel; + +    length /= sizeof (uint32_t); + +    for (channel = 0; length; length--) { +        int64_t t; + +        t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8)); +        t = (t * volumes[channel]) >> 16; +        t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); +        *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8); + +        if (PA_UNLIKELY(++channel >= channels)) +            channel = 0; +    } +} + +static pa_do_volume_func_t do_volume_table[] = +{ +    [PA_SAMPLE_U8]        = (pa_do_volume_func_t) pa_volume_u8_c, +    [PA_SAMPLE_ALAW]      = (pa_do_volume_func_t) pa_volume_alaw_c, +    [PA_SAMPLE_ULAW]      = (pa_do_volume_func_t) pa_volume_ulaw_c, +    [PA_SAMPLE_S16NE]     = (pa_do_volume_func_t) pa_volume_s16ne_c, +    [PA_SAMPLE_S16RE]     = (pa_do_volume_func_t) pa_volume_s16re_c, +    [PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c, +    [PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c, +    [PA_SAMPLE_S32NE]     = (pa_do_volume_func_t) pa_volume_s32ne_c, +    [PA_SAMPLE_S32RE]     = (pa_do_volume_func_t) pa_volume_s32re_c, +    [PA_SAMPLE_S24NE]     = (pa_do_volume_func_t) pa_volume_s24ne_c, +    [PA_SAMPLE_S24RE]     = (pa_do_volume_func_t) pa_volume_s24re_c, +    [PA_SAMPLE_S24_32NE]  = (pa_do_volume_func_t) pa_volume_s24_32ne_c, +    [PA_SAMPLE_S24_32RE]  = (pa_do_volume_func_t) pa_volume_s24_32re_c +}; + +pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f) { +    pa_assert(f >= 0); +    pa_assert(f < PA_SAMPLE_MAX); + +    return do_volume_table[f]; +} + +void pa_set_volume_func(pa_sample_format_t f, pa_do_volume_func_t func) { +    pa_assert(f >= 0); +    pa_assert(f < PA_SAMPLE_MAX); + +    do_volume_table[f] = func; +} diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c new file mode 100644 index 00000000..8510b0c4 --- /dev/null +++ b/src/pulsecore/svolume_mmx.c @@ -0,0 +1,313 @@ +/*** +  This file is part of PulseAudio. + +  Copyright 2004-2006 Lennart Poettering +  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk> + +  PulseAudio is free software; you can redistribute it and/or modify +  it under the terms of the GNU Lesser General Public License as published +  by the Free Software Foundation; either version 2.1 of the License, +  or (at your option) any later version. + +  PulseAudio is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU Lesser General Public License +  along with PulseAudio; if not, write to the Free Software +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +  USA. +***/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <pulse/timeval.h> +#include <pulsecore/random.h> +#include <pulsecore/macro.h> +#include <pulsecore/g711.h> +#include <pulsecore/core-util.h> + +#include "cpu-x86.h" + +#include "sample-util.h" +#include "endianmacros.h" + +#if defined (__i386__) || defined (__amd64__) +/* in s: 2 int16_t samples + * in v: 2 int32_t volumes, fixed point 16:16 + * out s: contains scaled and clamped int16_t samples. + * + * We calculate the high 32 bits of a 32x16 multiply which we then + * clamp to 16 bits. The calulcation is: + * + *  vl = (v & 0xffff) + *  vh = (v >> 16) + *  s = ((s * vl) >> 16) + (s * vh); + * + * For the first multiply we have to do a sign correction as we need to + * multiply a signed int with an unsigned int. Hacker's delight 8-3 gives a + * simple formula to correct the sign of the high word after the signed + * multiply. + */ +#define VOLUME_32x16(s,v)                  /* .. |   vh  |   vl  | */                   \ +      " pxor  %%mm4, %%mm4           \n\t" /* .. |    0  |    0  | */                   \ +      " punpcklwd %%mm4, "#s"        \n\t" /* .. |    0  |   p0  | */                   \ +      " pcmpgtw "#v", %%mm4          \n\t" /* .. |    0  | s(vl) | */                   \ +      " pand "#s", %%mm4             \n\t" /* .. |    0  |  (p0) |  (vl >> 15) & p */   \ +      " movq %%mm6, %%mm5            \n\t" /* .. |  ffff |   0   | */                   \ +      " pand "#v", %%mm5             \n\t" /* .. |   vh  |   0   | */                   \ +      " por %%mm5, %%mm4             \n\t" /* .. |   vh  |  (p0) | */                   \ +      " pmulhw "#s", "#v"            \n\t" /* .. |    0  | vl*p0 | */                   \ +      " paddw %%mm4, "#v"            \n\t" /* .. |   vh  | vl*p0 | vh + sign correct */ \ +      " pslld $16, "#s"              \n\t" /* .. |   p0  |    0  | */                   \ +      " por %%mm7, "#s"              \n\t" /* .. |   p0  |    1  | */                   \ +      " pmaddwd "#s", "#v"           \n\t" /* .. |    p0 * v0    | */                   \ +      " packssdw "#v", "#v"          \n\t" /* .. | p1*v1 | p0*v0 | */ + +/* approximately advances %3 = (%3 + a) % b. This function requires that + * a <= b. */ +#define MOD_ADD(a,b) \ +      " add "#a", %3                 \n\t" \ +      " mov %3, %4                   \n\t" \ +      " sub "#b", %4                 \n\t" \ +      " cmovae %4, %3                \n\t" + +/* swap 16 bits */ +#define SWAP_16(s) \ +      " movq "#s", %%mm4             \n\t" /* .. |  h  l |  */ \ +      " psrlw $8, %%mm4              \n\t" /* .. |  0  h |  */ \ +      " psllw $8, "#s"               \n\t" /* .. |  l  0 |  */ \ +      " por %%mm4, "#s"              \n\t" /* .. |  l  h |  */ + +/* swap 2 registers 16 bits for better pairing */ +#define SWAP_16_2(s1,s2) \ +      " movq "#s1", %%mm4            \n\t" /* .. |  h  l |  */ \ +      " movq "#s2", %%mm5            \n\t"                     \ +      " psrlw $8, %%mm4              \n\t" /* .. |  0  h |  */ \ +      " psrlw $8, %%mm5              \n\t"                     \ +      " psllw $8, "#s1"              \n\t" /* .. |  l  0 |  */ \ +      " psllw $8, "#s2"              \n\t"                     \ +      " por %%mm4, "#s1"             \n\t" /* .. |  l  h |  */ \ +      " por %%mm5, "#s2"             \n\t" + +static void +pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ +    pa_reg_x86 channel, temp; + +    /* the max number of samples we process at a time, this is also the max amount +     * we overread the volume array, which should have enough padding. */ +    channels = PA_MAX (4U, channels); + +    __asm__ __volatile__ ( +        " xor %3, %3                    \n\t" +        " sar $1, %2                    \n\t" /* length /= sizeof (int16_t) */ +        " pcmpeqw %%mm6, %%mm6          \n\t" /* .. |  ffff |  ffff | */ +        " pcmpeqw %%mm7, %%mm7          \n\t" /* .. |  ffff |  ffff | */ +        " pslld  $16, %%mm6             \n\t" /* .. |  ffff |     0 | */ +        " psrld  $31, %%mm7             \n\t" /* .. |     0 |     1 | */ + +        " test $1, %2                   \n\t" /* check for odd samples */ +        " je 2f                         \n\t" + +        " movd (%1, %3, 4), %%mm0       \n\t" /* |  v0h  |  v0l  | */ +        " movw (%0), %w4                \n\t" /*     ..  |  p0   | */ +        " movd %4, %%mm1                \n\t" +        VOLUME_32x16 (%%mm1, %%mm0) +        " movd %%mm0, %4                \n\t" /*     ..  | p0*v0 | */ +        " movw %w4, (%0)                \n\t" +        " add $2, %0                    \n\t" +        MOD_ADD ($1, %5) + +        "2:                             \n\t" +        " sar $1, %2                    \n\t" /* prepare for processing 2 samples at a time */ +        " test $1, %2                   \n\t" /* check for odd samples */ +        " je 4f                         \n\t" + +        "3:                             \n\t" /* do samples in groups of 2 */ +        " movq (%1, %3, 4), %%mm0       \n\t" /* |  v1h  |  v1l  |  v0h  |  v0l  | */ +        " movd (%0), %%mm1              \n\t" /*              .. |   p1  |  p0   | */ +        VOLUME_32x16 (%%mm1, %%mm0) +        " movd %%mm0, (%0)              \n\t" /*              .. | p1*v1 | p0*v0 | */ +        " add $4, %0                    \n\t" +        MOD_ADD ($2, %5) + +        "4:                             \n\t" +        " sar $1, %2                    \n\t" /* prepare for processing 4 samples at a time */ +        " cmp $0, %2                    \n\t" +        " je 6f                         \n\t" + +        "5:                             \n\t" /* do samples in groups of 4 */ +        " movq (%1, %3, 4), %%mm0       \n\t" /* |  v1h  |  v1l  |  v0h  |  v0l  | */ +        " movq 8(%1, %3, 4), %%mm2      \n\t" /* |  v3h  |  v3l  |  v2h  |  v2l  | */ +        " movd (%0), %%mm1              \n\t" /*              .. |   p1  |  p0   | */ +        " movd 4(%0), %%mm3             \n\t" /*              .. |   p3  |  p2   | */ +        VOLUME_32x16 (%%mm1, %%mm0) +        VOLUME_32x16 (%%mm3, %%mm2) +        " movd %%mm0, (%0)              \n\t" /*              .. | p1*v1 | p0*v0 | */ +        " movd %%mm2, 4(%0)             \n\t" /*              .. | p3*v3 | p2*v2 | */ +        " add $8, %0                    \n\t" +        MOD_ADD ($4, %5) +        " dec %2                        \n\t" +        " jne 5b                        \n\t" + +        "6:                             \n\t" +        " emms                          \n\t" + +        : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp) +        : "r" ((pa_reg_x86)channels) +        : "cc" +    ); +} + +static void +pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ +    pa_reg_x86 channel, temp; + +    /* the max number of samples we process at a time, this is also the max amount +     * we overread the volume array, which should have enough padding. */ +    channels = PA_MAX (4U, channels); + +    __asm__ __volatile__ ( +        " xor %3, %3                    \n\t" +        " sar $1, %2                    \n\t" /* length /= sizeof (int16_t) */ +        " pcmpeqw %%mm6, %%mm6          \n\t" /* .. |  ffff |  ffff | */ +        " pcmpeqw %%mm7, %%mm7          \n\t" /* .. |  ffff |  ffff | */ +        " pslld  $16, %%mm6             \n\t" /* .. |  ffff |     0 | */ +        " psrld  $31, %%mm7             \n\t" /* .. |     0 |     1 | */ + +        " test $1, %2                   \n\t" /* check for odd samples */ +        " je 2f                         \n\t" + +        " movd (%1, %3, 4), %%mm0       \n\t" /* |  v0h  |  v0l  | */ +        " movw (%0), %w4                \n\t" /*     ..  |  p0   | */ +        " rorw $8, %w4                  \n\t" +        " movd %4, %%mm1                \n\t" +        VOLUME_32x16 (%%mm1, %%mm0) +        " movd %%mm0, %4                \n\t" /*     ..  | p0*v0 | */ +        " rorw $8, %w4                  \n\t" +        " movw %w4, (%0)                \n\t" +        " add $2, %0                    \n\t" +        MOD_ADD ($1, %5) + +        "2:                             \n\t" +        " sar $1, %2                    \n\t" /* prepare for processing 2 samples at a time */ +        " test $1, %2                   \n\t" /* check for odd samples */ +        " je 4f                         \n\t" + +        "3:                             \n\t" /* do samples in groups of 2 */ +        " movq (%1, %3, 4), %%mm0       \n\t" /* |  v1h  |  v1l  |  v0h  |  v0l  | */ +        " movd (%0), %%mm1              \n\t" /*              .. |   p1  |  p0   | */ +        SWAP_16 (%%mm1) +        VOLUME_32x16 (%%mm1, %%mm0) +        SWAP_16 (%%mm0) +        " movd %%mm0, (%0)              \n\t" /*              .. | p1*v1 | p0*v0 | */ +        " add $4, %0                    \n\t" +        MOD_ADD ($2, %5) + +        "4:                             \n\t" +        " sar $1, %2                    \n\t" /* prepare for processing 4 samples at a time */ +        " cmp $0, %2                    \n\t" +        " je 6f                         \n\t" + +        "5:                             \n\t" /* do samples in groups of 4 */ +        " movq (%1, %3, 4), %%mm0       \n\t" /* |  v1h  |  v1l  |  v0h  |  v0l  | */ +        " movq 8(%1, %3, 4), %%mm2      \n\t" /* |  v3h  |  v3l  |  v2h  |  v2l  | */ +        " movd (%0), %%mm1              \n\t" /*              .. |   p1  |  p0   | */ +        " movd 4(%0), %%mm3             \n\t" /*              .. |   p3  |  p2   | */ +        SWAP_16_2 (%%mm1, %%mm3) +        VOLUME_32x16 (%%mm1, %%mm0) +        VOLUME_32x16 (%%mm3, %%mm2) +        SWAP_16_2 (%%mm0, %%mm2) +        " movd %%mm0, (%0)              \n\t" /*              .. | p1*v1 | p0*v0 | */ +        " movd %%mm2, 4(%0)             \n\t" /*              .. | p3*v3 | p2*v2 | */ +        " add $8, %0                    \n\t" +        MOD_ADD ($4, %5) +        " dec %2                        \n\t" +        " jne 5b                        \n\t" + +        "6:                             \n\t" +        " emms                          \n\t" + +        : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp) +        : "r" ((pa_reg_x86)channels) +        : "cc" +    ); +} + +#undef RUN_TEST + +#ifdef RUN_TEST +#define CHANNELS 2 +#define SAMPLES 1021 +#define TIMES 1000 +#define PADDING 16 + +static void run_test (void) { +    int16_t samples[SAMPLES]; +    int16_t samples_ref[SAMPLES]; +    int16_t samples_orig[SAMPLES]; +    int32_t volumes[CHANNELS + PADDING]; +    int i, j, padding; +    pa_do_volume_func_t func; +    pa_usec_t start, stop; + +    func = pa_get_volume_func (PA_SAMPLE_S16NE); + +    printf ("checking MMX %zd\n", sizeof (samples)); + +    pa_random (samples, sizeof (samples)); +    memcpy (samples_ref, samples, sizeof (samples)); +    memcpy (samples_orig, samples, sizeof (samples)); + +    for (i = 0; i < CHANNELS; i++) +        volumes[i] = rand() >> 1; +    for (padding = 0; padding < PADDING; padding++, i++) +        volumes[i] = volumes[padding]; + +    func (samples_ref, volumes, CHANNELS, sizeof (samples)); +    pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples)); +    for (i = 0; i < SAMPLES; i++) { +        if (samples[i] != samples_ref[i]) { +            printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], +                  samples_orig[i], volumes[i % CHANNELS]); +        } +    } + +    start = pa_rtclock_now(); +    for (j = 0; j < TIMES; j++) { +        memcpy (samples, samples_orig, sizeof (samples)); +        pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples)); +    } +    stop = pa_rtclock_now(); +    pa_log_info("MMX: %llu usec.", (long long unsigned int)(stop - start)); + +    start = pa_rtclock_now(); +    for (j = 0; j < TIMES; j++) { +        memcpy (samples_ref, samples_orig, sizeof (samples)); +        func (samples_ref, volumes, CHANNELS, sizeof (samples)); +    } +    stop = pa_rtclock_now(); +    pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); +} +#endif + +#endif /* defined (__i386__) || defined (__amd64__) */ + + +void pa_volume_func_init_mmx (pa_cpu_x86_flag_t flags) { +#if defined (__i386__) || defined (__amd64__) +    pa_log_info("Initialising MMX optimized functions."); + +#ifdef RUN_TEST +    run_test (); +#endif + +    pa_set_volume_func (PA_SAMPLE_S16NE,     (pa_do_volume_func_t) pa_volume_s16ne_mmx); +    pa_set_volume_func (PA_SAMPLE_S16RE,     (pa_do_volume_func_t) pa_volume_s16re_mmx); +#endif /* defined (__i386__) || defined (__amd64__) */ +} diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c new file mode 100644 index 00000000..54af4a57 --- /dev/null +++ b/src/pulsecore/svolume_sse.c @@ -0,0 +1,314 @@ +/*** +  This file is part of PulseAudio. + +  Copyright 2004-2006 Lennart Poettering +  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk> + +  PulseAudio is free software; you can redistribute it and/or modify +  it under the terms of the GNU Lesser General Public License as published +  by the Free Software Foundation; either version 2.1 of the License, +  or (at your option) any later version. + +  PulseAudio is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU Lesser General Public License +  along with PulseAudio; if not, write to the Free Software +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +  USA. +***/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <pulse/timeval.h> +#include <pulsecore/random.h> +#include <pulsecore/macro.h> +#include <pulsecore/g711.h> +#include <pulsecore/core-util.h> + +#include "cpu-x86.h" + +#include "sample-util.h" +#include "endianmacros.h" + +#if defined (__i386__) || defined (__amd64__) + +#define VOLUME_32x16(s,v)                  /* .. |   vh  |   vl  | */                   \ +      " pxor %%xmm4, %%xmm4          \n\t" /* .. |    0  |    0  | */                   \ +      " punpcklwd %%xmm4, "#s"       \n\t" /* .. |    0  |   p0  | */                   \ +      " pcmpgtw "#s", %%xmm4         \n\t" /* .. |    0  | s(p0) | */                   \ +      " pand "#v", %%xmm4            \n\t" /* .. |    0  |  (vl) | */                   \ +      " movdqa "#s", %%xmm5          \n\t"                                              \ +      " pmulhuw "#v", "#s"           \n\t" /* .. |    0  | vl*p0 | */                   \ +      " psubd %%xmm4, "#s"           \n\t" /* .. |    0  | vl*p0 | + sign correct */    \ +      " psrld $16, "#v"              \n\t" /* .. |   p0  |    0  | */                   \ +      " pmaddwd %%xmm5, "#v"         \n\t" /* .. |    p0 * vh    | */                   \ +      " paddd "#s", "#v"             \n\t" /* .. |    p0 * v0    | */                   \ +      " packssdw "#v", "#v"          \n\t" /* .. | p1*v1 | p0*v0 | */ + +#define MOD_ADD(a,b) \ +      " add "#a", %3                 \n\t" /* channel += inc           */ \ +      " mov %3, %4                   \n\t"                                \ +      " sub "#b", %4                 \n\t" /* tmp = channel - channels */ \ +      " cmovae %4, %3                \n\t" /* if (tmp >= 0) channel = tmp  */ + +/* swap 16 bits */ +#define SWAP_16(s) \ +      " movdqa "#s", %%xmm4          \n\t" /* .. |  h  l |  */ \ +      " psrlw $8, %%xmm4             \n\t" /* .. |  0  h |  */ \ +      " psllw $8, "#s"               \n\t" /* .. |  l  0 |  */ \ +      " por %%xmm4, "#s"             \n\t" /* .. |  l  h |  */ + +/* swap 2 registers 16 bits for better pairing */ +#define SWAP_16_2(s1,s2) \ +      " movdqa "#s1", %%xmm4         \n\t" /* .. |  h  l |  */ \ +      " movdqa "#s2", %%xmm5         \n\t"                     \ +      " psrlw $8, %%xmm4             \n\t" /* .. |  0  h |  */ \ +      " psrlw $8, %%xmm5             \n\t"                     \ +      " psllw $8, "#s1"              \n\t" /* .. |  l  0 |  */ \ +      " psllw $8, "#s2"              \n\t"                     \ +      " por %%xmm4, "#s1"            \n\t" /* .. |  l  h |  */ \ +      " por %%xmm5, "#s2"            \n\t" + +static void +pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ +    pa_reg_x86 channel, temp; + +    /* the max number of samples we process at a time, this is also the max amount +     * we overread the volume array, which should have enough padding. */ +    channels = PA_MAX (8U, channels); + +    __asm__ __volatile__ ( +        " xor %3, %3                    \n\t" +        " sar $1, %2                    \n\t" /* length /= sizeof (int16_t) */ + +        " test $1, %2                   \n\t" /* check for odd samples */ +        " je 2f                         \n\t" + +        " movd (%1, %3, 4), %%xmm0      \n\t" /* |  v0h  |  v0l  | */ +        " movw (%0), %w4                \n\t" /*     ..  |   p0  | */ +        " movd %4, %%xmm1               \n\t" +        VOLUME_32x16 (%%xmm1, %%xmm0) +        " movd %%xmm0, %4               \n\t" /*     ..  | p0*v0 | */ +        " movw %w4, (%0)                \n\t" +        " add $2, %0                    \n\t" +        MOD_ADD ($1, %5) + +        "2:                             \n\t" +        " sar $1, %2                    \n\t" /* prepare for processing 2 samples at a time */ +        " test $1, %2                   \n\t" +        " je 4f                         \n\t" + +        "3:                             \n\t" /* do samples in groups of 2 */ +        " movq (%1, %3, 4), %%xmm0      \n\t" /* |  v1h  |  v1l  |  v0h  |  v0l  | */ +        " movd (%0), %%xmm1             \n\t" /*              .. |   p1  |  p0   | */ +        VOLUME_32x16 (%%xmm1, %%xmm0) +        " movd %%xmm0, (%0)             \n\t" /*              .. | p1*v1 | p0*v0 | */ +        " add $4, %0                    \n\t" +        MOD_ADD ($2, %5) + +        "4:                             \n\t" +        " sar $1, %2                    \n\t" /* prepare for processing 4 samples at a time */ +        " test $1, %2                   \n\t" +        " je 6f                         \n\t" + +        /* FIXME, we can do aligned access of the volume values if we can guarantee +         * that the array is 16 bytes aligned, we probably have to do the odd values +         * after this then. */ +        "5:                             \n\t" /* do samples in groups of 4 */ +        " movdqu (%1, %3, 4), %%xmm0    \n\t" /* |  v3h  |  v3l  ..  v0h  |  v0l  | */ +        " movq (%0), %%xmm1             \n\t" /*              .. |   p3  ..  p0   | */ +        VOLUME_32x16 (%%xmm1, %%xmm0) +        " movq %%xmm0, (%0)             \n\t" /*              .. | p3*v3 .. p0*v0 | */ +        " add $8, %0                    \n\t" +        MOD_ADD ($4, %5) + +        "6:                             \n\t" +        " sar $1, %2                    \n\t" /* prepare for processing 8 samples at a time */ +        " cmp $0, %2                    \n\t" +        " je 8f                         \n\t" + +        "7:                             \n\t" /* do samples in groups of 8 */ +        " movdqu (%1, %3, 4), %%xmm0    \n\t" /* |  v3h  |  v3l  ..  v0h  |  v0l  | */ +        " movdqu 16(%1, %3, 4), %%xmm2  \n\t" /* |  v7h  |  v7l  ..  v4h  |  v4l  | */ +        " movq (%0), %%xmm1             \n\t" /*              .. |   p3  ..  p0   | */ +        " movq 8(%0), %%xmm3            \n\t" /*              .. |   p7  ..  p4   | */ +        VOLUME_32x16 (%%xmm1, %%xmm0) +        VOLUME_32x16 (%%xmm3, %%xmm2) +        " movq %%xmm0, (%0)             \n\t" /*              .. | p3*v3 .. p0*v0 | */ +        " movq %%xmm2, 8(%0)            \n\t" /*              .. | p7*v7 .. p4*v4 | */ +        " add $16, %0                   \n\t" +        MOD_ADD ($8, %5) +        " dec %2                        \n\t" +        " jne 7b                        \n\t" +        "8:                             \n\t" + +        : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp) +        : "r" ((pa_reg_x86)channels) +        : "cc" +    ); +} + +static void +pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ +    pa_reg_x86 channel, temp; + +    /* the max number of samples we process at a time, this is also the max amount +     * we overread the volume array, which should have enough padding. */ +    channels = PA_MAX (8U, channels); + +    __asm__ __volatile__ ( +        " xor %3, %3                    \n\t" +        " sar $1, %2                    \n\t" /* length /= sizeof (int16_t) */ + +        " test $1, %2                   \n\t" /* check for odd samples */ +        " je 2f                         \n\t" + +        " movd (%1, %3, 4), %%xmm0      \n\t" /* |  v0h  |  v0l  | */ +        " movw (%0), %w4                \n\t" /*     ..  |   p0  | */ +        " rorw $8, %w4                  \n\t" +        " movd %4, %%xmm1               \n\t" +        VOLUME_32x16 (%%xmm1, %%xmm0) +        " movd %%xmm0, %4               \n\t" /*     ..  | p0*v0 | */ +        " rorw $8, %w4                  \n\t" +        " movw %w4, (%0)                \n\t" +        " add $2, %0                    \n\t" +        MOD_ADD ($1, %5) + +        "2:                             \n\t" +        " sar $1, %2                    \n\t" /* prepare for processing 2 samples at a time */ +        " test $1, %2                   \n\t" +        " je 4f                         \n\t" + +        "3:                             \n\t" /* do samples in groups of 2 */ +        " movq (%1, %3, 4), %%xmm0      \n\t" /* |  v1h  |  v1l  |  v0h  |  v0l  | */ +        " movd (%0), %%xmm1             \n\t" /*              .. |   p1  |  p0   | */ +        SWAP_16 (%%xmm1) +        VOLUME_32x16 (%%xmm1, %%xmm0) +        SWAP_16 (%%xmm0) +        " movd %%xmm0, (%0)             \n\t" /*              .. | p1*v1 | p0*v0 | */ +        " add $4, %0                    \n\t" +        MOD_ADD ($2, %5) + +        "4:                             \n\t" +        " sar $1, %2                    \n\t" /* prepare for processing 4 samples at a time */ +        " test $1, %2                   \n\t" +        " je 6f                         \n\t" + +        /* FIXME, we can do aligned access of the volume values if we can guarantee +         * that the array is 16 bytes aligned, we probably have to do the odd values +         * after this then. */ +        "5:                             \n\t" /* do samples in groups of 4 */ +        " movdqu (%1, %3, 4), %%xmm0    \n\t" /* |  v3h  |  v3l  ..  v0h  |  v0l  | */ +        " movq (%0), %%xmm1             \n\t" /*              .. |   p3  ..  p0   | */ +        SWAP_16 (%%xmm1) +        VOLUME_32x16 (%%xmm1, %%xmm0) +        SWAP_16 (%%xmm0) +        " movq %%xmm0, (%0)             \n\t" /*              .. | p3*v3 .. p0*v0 | */ +        " add $8, %0                    \n\t" +        MOD_ADD ($4, %5) + +        "6:                             \n\t" +        " sar $1, %2                    \n\t" /* prepare for processing 8 samples at a time */ +        " cmp $0, %2                    \n\t" +        " je 8f                         \n\t" + +        "7:                             \n\t" /* do samples in groups of 8 */ +        " movdqu (%1, %3, 4), %%xmm0    \n\t" /* |  v3h  |  v3l  ..  v0h  |  v0l  | */ +        " movdqu 16(%1, %3, 4), %%xmm2  \n\t" /* |  v7h  |  v7l  ..  v4h  |  v4l  | */ +        " movq (%0), %%xmm1             \n\t" /*              .. |   p3  ..  p0   | */ +        " movq 8(%0), %%xmm3            \n\t" /*              .. |   p7  ..  p4   | */ +        SWAP_16_2 (%%xmm1, %%xmm3) +        VOLUME_32x16 (%%xmm1, %%xmm0) +        VOLUME_32x16 (%%xmm3, %%xmm2) +        SWAP_16_2 (%%xmm0, %%xmm2) +        " movq %%xmm0, (%0)             \n\t" /*              .. | p3*v3 .. p0*v0 | */ +        " movq %%xmm2, 8(%0)            \n\t" /*              .. | p7*v7 .. p4*v4 | */ +        " add $16, %0                   \n\t" +        MOD_ADD ($8, %5) +        " dec %2                        \n\t" +        " jne 7b                        \n\t" +        "8:                             \n\t" + +        : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp) +        : "r" ((pa_reg_x86)channels) +        : "cc" +    ); +} + +#undef RUN_TEST + +#ifdef RUN_TEST +#define CHANNELS 2 +#define SAMPLES 1021 +#define TIMES 1000 +#define PADDING 16 + +static void run_test (void) { +    int16_t samples[SAMPLES]; +    int16_t samples_ref[SAMPLES]; +    int16_t samples_orig[SAMPLES]; +    int32_t volumes[CHANNELS + PADDING]; +    int i, j, padding; +    pa_do_volume_func_t func; +    pa_usec_t start, stop; + +    func = pa_get_volume_func (PA_SAMPLE_S16NE); + +    printf ("checking SSE %zd\n", sizeof (samples)); + +    pa_random (samples, sizeof (samples)); +    memcpy (samples_ref, samples, sizeof (samples)); +    memcpy (samples_orig, samples, sizeof (samples)); + +    for (i = 0; i < CHANNELS; i++) +        volumes[i] = rand() >> 1; +    for (padding = 0; padding < PADDING; padding++, i++) +        volumes[i] = volumes[padding]; + +    func (samples_ref, volumes, CHANNELS, sizeof (samples)); +    pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples)); +    for (i = 0; i < SAMPLES; i++) { +        if (samples[i] != samples_ref[i]) { +            printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], +                      samples_orig[i], volumes[i % CHANNELS]); +        } +    } + +    start = pa_rtclock_now(); +    for (j = 0; j < TIMES; j++) { +        memcpy (samples, samples_orig, sizeof (samples)); +        pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples)); +    } +    stop = pa_rtclock_now(); +    pa_log_info("SSE: %llu usec.", (long long unsigned int)(stop - start)); + +    start = pa_rtclock_now(); +    for (j = 0; j < TIMES; j++) { +        memcpy (samples_ref, samples_orig, sizeof (samples)); +        func (samples_ref, volumes, CHANNELS, sizeof (samples)); +    } +    stop = pa_rtclock_now(); +    pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); +} +#endif +#endif /* defined (__i386__) || defined (__amd64__) */ + +void pa_volume_func_init_sse (pa_cpu_x86_flag_t flags) { +#if defined (__i386__) || defined (__amd64__) +    pa_log_info("Initialising SSE optimized functions."); + +#ifdef RUN_TEST +    run_test (); +#endif + +    pa_set_volume_func (PA_SAMPLE_S16NE,     (pa_do_volume_func_t) pa_volume_s16ne_sse); +    pa_set_volume_func (PA_SAMPLE_S16RE,     (pa_do_volume_func_t) pa_volume_s16re_sse); +#endif /* defined (__i386__) || defined (__amd64__) */ +} diff --git a/src/tests/envelope-test.c b/src/tests/envelope-test.c index 3af3044e..9382040b 100644 --- a/src/tests/envelope-test.c +++ b/src/tests/envelope-test.c @@ -34,8 +34,6 @@  #include <pulsecore/memblock.h>  #include <pulsecore/sample-util.h> -#include <liboil/liboil.h> -  const pa_envelope_def ramp_down = {      .n_points = 2,      .points_x = { 100*PA_USEC_PER_MSEC, 300*PA_USEC_PER_MSEC }, @@ -202,7 +200,6 @@ int main(int argc, char *argv[]) {          .values = { PA_VOLUME_NORM, PA_VOLUME_NORM/2 }      }; -    oil_init();      pa_log_set_level(PA_LOG_DEBUG);      pa_assert_se(pool = pa_mempool_new(FALSE, 0)); diff --git a/src/tests/mix-test.c b/src/tests/mix-test.c index f9f76da3..457c4acd 100644 --- a/src/tests/mix-test.c +++ b/src/tests/mix-test.c @@ -32,8 +32,6 @@  #include <pulsecore/memblock.h>  #include <pulsecore/sample-util.h> -#include <liboil/liboil.h> -  static float swap_float(float a) {      uint32_t *b = (uint32_t*) &a;      *b = PA_UINT32_SWAP(*b); @@ -211,7 +209,6 @@ int main(int argc, char *argv[]) {      pa_sample_spec a;      pa_cvolume v; -    oil_init();      pa_log_set_level(PA_LOG_DEBUG);      pa_assert_se(pool = pa_mempool_new(FALSE, 0)); diff --git a/src/tests/remix-test.c b/src/tests/remix-test.c index 9d110d6b..4990bf93 100644 --- a/src/tests/remix-test.c +++ b/src/tests/remix-test.c @@ -32,8 +32,6 @@  #include <pulsecore/memblock.h>  #include <pulsecore/sample-util.h> -#include <liboil/liboil.h> -  int main(int argc, char *argv[]) {      static const pa_channel_map maps[] = { @@ -55,7 +53,6 @@ int main(int argc, char *argv[]) {      unsigned i, j;      pa_mempool *pool; -    oil_init();      pa_log_set_level(PA_LOG_DEBUG);      pa_assert_se(pool = pa_mempool_new(FALSE, 0)); diff --git a/src/tests/resampler-test.c b/src/tests/resampler-test.c index 7236265a..82198b5e 100644 --- a/src/tests/resampler-test.c +++ b/src/tests/resampler-test.c @@ -32,8 +32,6 @@  #include <pulsecore/memblock.h>  #include <pulsecore/sample-util.h> -#include <liboil/liboil.h> -  static void dump_block(const pa_sample_spec *ss, const pa_memchunk *chunk) {      void *d;      unsigned i; @@ -248,7 +246,6 @@ int main(int argc, char *argv[]) {      pa_sample_spec a, b;      pa_cvolume v; -    oil_init();      pa_log_set_level(PA_LOG_DEBUG);      pa_assert_se(pool = pa_mempool_new(FALSE, 0));  | 
