diff options
| -rw-r--r-- | src/Makefile.am | 3 | ||||
| -rw-r--r-- | src/pulsecore/cpu-x86.c | 5 | ||||
| -rw-r--r-- | src/pulsecore/cpu-x86.h | 3 | ||||
| -rw-r--r-- | src/pulsecore/macro.h | 6 | ||||
| -rw-r--r-- | src/pulsecore/remap_mmx.c | 43 | ||||
| -rw-r--r-- | src/pulsecore/remap_sse.c | 146 | ||||
| -rw-r--r-- | src/pulsecore/sample-util.c | 4 | ||||
| -rw-r--r-- | src/pulsecore/sconv.c | 4 | ||||
| -rw-r--r-- | src/pulsecore/sconv_sse.c | 235 | 
9 files changed, 421 insertions, 28 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index e65662c7..654dc41a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -832,7 +832,7 @@ libpulsecore_@PA_MAJORMINORMICRO@_la_SOURCES = \  		pulsecore/play-memblockq.c pulsecore/play-memblockq.h \  		pulsecore/play-memchunk.c pulsecore/play-memchunk.h \  		pulsecore/remap.c pulsecore/remap.h \ -		pulsecore/remap_mmx.c \ +		pulsecore/remap_mmx.c pulsecore/remap_sse.c \  		pulsecore/resampler.c pulsecore/resampler.h \  		pulsecore/rtpoll.c pulsecore/rtpoll.h \  		pulsecore/sample-util.c pulsecore/sample-util.h \ @@ -842,6 +842,7 @@ libpulsecore_@PA_MAJORMINORMICRO@_la_SOURCES = \  		pulsecore/svolume_mmx.c pulsecore/svolume_sse.c \  		pulsecore/sconv-s16be.c pulsecore/sconv-s16be.h \  		pulsecore/sconv-s16le.c pulsecore/sconv-s16le.h \ +		pulsecore/sconv_sse.c \  		pulsecore/sconv.c pulsecore/sconv.h \  		pulsecore/shared.c pulsecore/shared.h \  		pulsecore/shm.c pulsecore/shm.h \ diff --git a/src/pulsecore/cpu-x86.c b/src/pulsecore/cpu-x86.c index bc093ec0..1ba9f1a4 100644 --- a/src/pulsecore/cpu-x86.c +++ b/src/pulsecore/cpu-x86.c @@ -115,8 +115,11 @@ void pa_cpu_init_x86 (void) {          pa_remap_func_init_mmx (flags);      } -    if (flags & PA_CPU_X86_SSE) +    if (flags & PA_CPU_X86_SSE) {          pa_volume_func_init_sse (flags); +        pa_remap_func_init_sse (flags); +        pa_convert_func_init_sse (flags); +    }  #endif /* defined (__i386__) || defined (__amd64__) */  } diff --git a/src/pulsecore/cpu-x86.h b/src/pulsecore/cpu-x86.h index b11ef6ea..b40eb5ce 100644 --- a/src/pulsecore/cpu-x86.h +++ b/src/pulsecore/cpu-x86.h @@ -64,5 +64,8 @@ void pa_volume_func_init_mmx(pa_cpu_x86_flag_t flags);  void pa_volume_func_init_sse(pa_cpu_x86_flag_t flags);  void pa_remap_func_init_mmx(pa_cpu_x86_flag_t flags); +void pa_remap_func_init_sse(pa_cpu_x86_flag_t flags); + +void pa_convert_func_init_sse (pa_cpu_x86_flag_t flags);  #endif /* foocpux86hfoo */ diff --git a/src/pulsecore/macro.h b/src/pulsecore/macro.h index 87684ad3..bffcc264 100644 --- a/src/pulsecore/macro.h +++ b/src/pulsecore/macro.h @@ -80,6 +80,12 @@ static inline size_t PA_PAGE_ALIGN(size_t l) {  #define PA_ELEMENTSOF(x) (sizeof(x)/sizeof((x)[0])) +#if defined(__GNUC__) +    #define PA_DECLARE_ALIGNED(n,t,v)      t v __attribute__ ((aligned (n))) +#else +    #define PA_DECLARE_ALIGNED(n,t,v)      t v +#endif +  /* The users of PA_MIN and PA_MAX, PA_CLAMP, PA_ROUND_UP should be   * aware that these macros on non-GCC executed code with side effects   * twice. It is thus considered misuse to use code with side effects diff --git a/src/pulsecore/remap_mmx.c b/src/pulsecore/remap_mmx.c index 00252dac..b5fe82ee 100644 --- a/src/pulsecore/remap_mmx.c +++ b/src/pulsecore/remap_mmx.c @@ -51,7 +51,7 @@                  " punpckl"#s" %%mm4, %%mm4      \n\t"  \                  " punpckh"#s" %%mm5, %%mm5      \n\t"  \                  " punpckl"#s" %%mm6, %%mm6      \n\t"  \ -                " punpckh"#s" %%mm7, %%mm7      \n\t"  \ +                " punpckh"#s" %%mm7, %%mm7      \n\t"  #define STORE_SAMPLES                                  \                  " movq %%mm0, (%0)              \n\t"  \ @@ -67,32 +67,31 @@  #define HANDLE_SINGLE(s)                               \                  " movd (%1), %%mm0              \n\t"  \ -                " movq %%mm0, %%mm1             \n\t"  \                  " punpckl"#s" %%mm0, %%mm0      \n\t"  \                  " movq %%mm0, (%0)              \n\t"  \                  " add $4, %1                    \n\t"  \                  " add $8, %0                    \n\t" -#define MONO_TO_STEREO(s)                               \ -                " mov %3, %2                    \n\t"   \ -                " sar $3, %2                    \n\t"   \ -                " cmp $0, %2                    \n\t"   \ -                " je 2f                         \n\t"   \ -                "1:                             \n\t"   \ -                LOAD_SAMPLES                            \ -                UNPACK_SAMPLES(s)                       \ -                STORE_SAMPLES                           \ -                " dec %2                        \n\t"   \ -                " jne 1b                        \n\t"   \ -                "2:                             \n\t"   \ -                " mov %3, %2                    \n\t"   \ -                " and $7, %2                    \n\t"   \ -                " je 4f                         \n\t"   \ -                "3:                             \n\t"   \ -                HANDLE_SINGLE(s)                        \ -                " dec %2                        \n\t"   \ -                " jne 3b                        \n\t"   \ -                "4:                             \n\t"   \ +#define MONO_TO_STEREO(s)                              \ +                " mov %3, %2                    \n\t"  \ +                " sar $3, %2                    \n\t"  \ +                " cmp $0, %2                    \n\t"  \ +                " je 2f                         \n\t"  \ +                "1:                             \n\t"  \ +                LOAD_SAMPLES                           \ +                UNPACK_SAMPLES(s)                      \ +                STORE_SAMPLES                          \ +                " dec %2                        \n\t"  \ +                " jne 1b                        \n\t"  \ +                "2:                             \n\t"  \ +                " mov %3, %2                    \n\t"  \ +                " and $7, %2                    \n\t"  \ +                " je 4f                         \n\t"  \ +                "3:                             \n\t"  \ +                HANDLE_SINGLE(s)                       \ +                " dec %2                        \n\t"  \ +                " jne 3b                        \n\t"  \ +                "4:                             \n\t"  \                  " emms                          \n\t"  #if defined (__i386__) || defined (__amd64__) diff --git a/src/pulsecore/remap_sse.c b/src/pulsecore/remap_sse.c new file mode 100644 index 00000000..97f2476e --- /dev/null +++ b/src/pulsecore/remap_sse.c @@ -0,0 +1,146 @@ +/*** +  This file is part of PulseAudio. + +  Copyright 2004-2006 Lennart Poettering +  Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk.com> + +  PulseAudio is free software; you can redistribute it and/or modify +  it under the terms of the GNU Lesser General Public License as published +  by the Free Software Foundation; either version 2.1 of the License, +  or (at your option) any later version. + +  PulseAudio is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU Lesser General Public License +  along with PulseAudio; if not, write to the Free Software +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +  USA. +***/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <string.h> + +#include <pulse/sample.h> +#include <pulsecore/log.h> +#include <pulsecore/macro.h> + +#include "cpu-x86.h" +#include "remap.h" + +#define LOAD_SAMPLES                                   \ +                " movdqu (%1), %%xmm0           \n\t"  \ +                " movdqu 16(%1), %%xmm2         \n\t"  \ +                " movdqu 32(%1), %%xmm4         \n\t"  \ +                " movdqu 48(%1), %%xmm6         \n\t"  \ +                " movdqa %%xmm0, %%xmm1         \n\t"  \ +                " movdqa %%xmm2, %%xmm3         \n\t"  \ +                " movdqa %%xmm4, %%xmm5         \n\t"  \ +                " movdqa %%xmm6, %%xmm7         \n\t" + +#define UNPACK_SAMPLES(s)                              \ +                " punpckl"#s" %%xmm0, %%xmm0    \n\t"  \ +                " punpckh"#s" %%xmm1, %%xmm1    \n\t"  \ +                " punpckl"#s" %%xmm2, %%xmm2    \n\t"  \ +                " punpckh"#s" %%xmm3, %%xmm3    \n\t"  \ +                " punpckl"#s" %%xmm4, %%xmm4    \n\t"  \ +                " punpckh"#s" %%xmm5, %%xmm5    \n\t"  \ +                " punpckl"#s" %%xmm6, %%xmm6    \n\t"  \ +                " punpckh"#s" %%xmm7, %%xmm7    \n\t" + +#define STORE_SAMPLES                                  \ +                " movdqu %%xmm0, (%0)           \n\t"  \ +                " movdqu %%xmm1, 16(%0)         \n\t"  \ +                " movdqu %%xmm2, 32(%0)         \n\t"  \ +                " movdqu %%xmm3, 48(%0)         \n\t"  \ +                " movdqu %%xmm4, 64(%0)         \n\t"  \ +                " movdqu %%xmm5, 80(%0)         \n\t"  \ +                " movdqu %%xmm6, 96(%0)         \n\t"  \ +                " movdqu %%xmm7, 112(%0)        \n\t"  \ +                " add $64, %1                   \n\t"  \ +                " add $128, %0                  \n\t" + +#define HANDLE_SINGLE(s)                               \ +                " movd (%1), %%xmm0             \n\t"  \ +                " punpckl"#s" %%xmm0, %%xmm0    \n\t"  \ +                " movq %%xmm0, (%0)             \n\t"  \ +                " add $4, %1                    \n\t"  \ +                " add $8, %0                    \n\t" + +#define MONO_TO_STEREO(s)                               \ +                " mov %3, %2                    \n\t"   \ +                " sar $4, %2                    \n\t"   \ +                " cmp $0, %2                    \n\t"   \ +                " je 2f                         \n\t"   \ +                "1:                             \n\t"   \ +                LOAD_SAMPLES                            \ +                UNPACK_SAMPLES(s)                       \ +                STORE_SAMPLES                           \ +                " dec %2                        \n\t"   \ +                " jne 1b                        \n\t"   \ +                "2:                             \n\t"   \ +                " mov %3, %2                    \n\t"   \ +                " and $15, %2                   \n\t"   \ +                " je 4f                         \n\t"   \ +                "3:                             \n\t"   \ +                HANDLE_SINGLE(s)                        \ +                " dec %2                        \n\t"   \ +                " jne 3b                        \n\t"   \ +                "4:                             \n\t" + +static void remap_mono_to_stereo_sse (pa_remap_t *m, void *dst, const void *src, unsigned n) { +    pa_reg_x86 temp; + +    switch (*m->format) { +        case PA_SAMPLE_FLOAT32NE: +        { +            __asm__ __volatile__ ( +                MONO_TO_STEREO(dq) /* do doubles to quads */ +                : "+r" (dst), "+r" (src), "=&r" (temp) +                : "r" ((pa_reg_x86)n) +                : "cc" +            ); +            break; +        } +        case PA_SAMPLE_S16NE: +        { +            __asm__ __volatile__ ( +                MONO_TO_STEREO(wd) /* do words to doubles */ +                : "+r" (dst), "+r" (src), "=&r" (temp) +                : "r" ((pa_reg_x86)n) +                : "cc" +            ); +            break; +        } +        default: +            pa_assert_not_reached(); +    } +} + +/* set the function that will execute the remapping based on the matrices */ +static void init_remap_sse (pa_remap_t *m) { +    unsigned n_oc, n_ic; + +    n_oc = m->o_ss->channels; +    n_ic = m->i_ss->channels; + +    /* find some common channel remappings, fall back to full matrix operation. */ +    if (n_ic == 1 && n_oc == 2 && +            m->map_table_f[0][0] >= 1.0 && m->map_table_f[1][0] >= 1.0) { +        m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo_sse; +        pa_log_info("Using SSE mono to stereo remapping"); +    } +} + +void pa_remap_func_init_sse (pa_cpu_x86_flag_t flags) { +#if defined (__i386__) || defined (__amd64__) +    pa_log_info("Initialising SSE optimized remappers."); + +    pa_set_init_remap_func ((pa_init_remap_func_t) init_remap_sse); +#endif /* defined (__i386__) || defined (__amd64__) */ +} diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c index 6e97e5a9..5fae1928 100644 --- a/src/pulsecore/sample-util.c +++ b/src/pulsecore/sample-util.c @@ -137,7 +137,7 @@ static void calc_linear_float_volume(float linear[], const pa_cvolume *volume) {  static void calc_linear_integer_stream_volumes(pa_mix_info streams[], unsigned nstreams, const pa_cvolume *volume, const pa_sample_spec *spec) {      unsigned k, channel; -    float linear[PA_CHANNELS_MAX]; +    float linear[PA_CHANNELS_MAX + VOLUME_PADDING];      pa_assert(streams);      pa_assert(spec); @@ -156,7 +156,7 @@ static void calc_linear_integer_stream_volumes(pa_mix_info streams[], unsigned n  static void calc_linear_float_stream_volumes(pa_mix_info streams[], unsigned nstreams, const pa_cvolume *volume, const pa_sample_spec *spec) {      unsigned k, channel; -    float linear[PA_CHANNELS_MAX]; +    float linear[PA_CHANNELS_MAX + VOLUME_PADDING];      pa_assert(streams);      pa_assert(spec); diff --git a/src/pulsecore/sconv.c b/src/pulsecore/sconv.c index d06d6985..301f08b4 100644 --- a/src/pulsecore/sconv.c +++ b/src/pulsecore/sconv.c @@ -52,8 +52,8 @@ static void u8_from_float32ne(unsigned n, const float *a, uint8_t *b) {      for (; n > 0; n--, a++, b++) {          float v;          v = (*a * 127.0) + 128.0; -	v = PA_CLAMP_UNLIKELY (v, 0.0, 255.0); -	*b = rint (v); +        v = PA_CLAMP_UNLIKELY (v, 0.0, 255.0); +        *b = rint (v);      }  } diff --git a/src/pulsecore/sconv_sse.c b/src/pulsecore/sconv_sse.c new file mode 100644 index 00000000..b213d991 --- /dev/null +++ b/src/pulsecore/sconv_sse.c @@ -0,0 +1,235 @@ +/*** +  This file is part of PulseAudio. + +  Copyright 2004-2006 Lennart Poettering +  Copyright 2006 Pierre Ossman <ossman@cendio.se> for Cendio AB + +  PulseAudio is free software; you can redistribute it and/or modify +  it under the terms of the GNU Lesser General Public License as published +  by the Free Software Foundation; either version 2.1 of the License, +  or (at your option) any later version. + +  PulseAudio is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +  General Public License for more details. + +  You should have received a copy of the GNU Lesser General Public License +  along with PulseAudio; if not, write to the Free Software +  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +  USA. +***/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdio.h> +#include <stdlib.h> + +#include <pulsecore/g711.h> +#include <pulsecore/macro.h> + +#include "endianmacros.h" + +#include "cpu-x86.h" +#include "sconv.h" + +static pa_convert_func_t func; + +#if defined (__i386__) || defined (__amd64__) + +static const PA_DECLARE_ALIGNED (16, float, one[4]) = { 1.0, 1.0, 1.0, 1.0 }; +static const PA_DECLARE_ALIGNED (16, float, mone[4]) = { -1.0, -1.0, -1.0, -1.0 }; +static const PA_DECLARE_ALIGNED (16, float, scale[4]) = { 0x7fff, 0x7fff, 0x7fff, 0x7fff }; + +static void pa_sconv_s16le_from_f32ne_sse(unsigned n, const float *a, int16_t *b) { +    pa_reg_x86 temp, i; + +    __asm__ __volatile__ ( +        " movaps %5, %%xmm5             \n\t" +        " movaps %6, %%xmm6             \n\t" +        " movaps %7, %%xmm7             \n\t" +        " xor %0, %0                    \n\t" + +        " mov %4, %1                    \n\t" +        " sar $3, %1                    \n\t" /* 8 floats at a time */ +        " cmp $0, %1                    \n\t" +        " je 2f                         \n\t" + +        "1:                             \n\t" +        " movups (%2, %0, 2), %%xmm0    \n\t" /* read 8 floats */ +        " movups 16(%2, %0, 2), %%xmm2  \n\t" +        " minps  %%xmm5, %%xmm0         \n\t" /* clamp to 1.0 */ +        " minps  %%xmm5, %%xmm2         \n\t" +        " maxps  %%xmm6, %%xmm0         \n\t" /* clamp to -1.0 */ +        " maxps  %%xmm6, %%xmm2         \n\t" +        " mulps  %%xmm7, %%xmm0         \n\t" /* *= 0x7fff */ +        " mulps  %%xmm7, %%xmm2         \n\t" + +        " cvtps2pi %%xmm0, %%mm0        \n\t" /* low part to int */ +        " cvtps2pi %%xmm2, %%mm2        \n\t" +        " movhlps  %%xmm0, %%xmm0       \n\t" /* bring high part in position */ +        " movhlps  %%xmm2, %%xmm2       \n\t" +        " cvtps2pi %%xmm0, %%mm1        \n\t" /* high part to int */ +        " cvtps2pi %%xmm2, %%mm3        \n\t" + +        " packssdw %%mm1, %%mm0         \n\t" /* pack parts */ +        " packssdw %%mm3, %%mm2         \n\t" +        " movq     %%mm0, (%3, %0)      \n\t" +        " movq    %%mm2, 8(%3, %0)     \n\t" + +        " add $16, %0                   \n\t" +        " dec %1                        \n\t" +        " jne 1b                        \n\t" + +        "2:                             \n\t" +        " mov %4, %1                    \n\t" /* prepare for leftovers */ +        " and $15, %1                   \n\t" +        " je 4f                         \n\t" + +        "3:                             \n\t" +        " movss (%2, %0, 2), %%xmm0     \n\t" +        " minss  %%xmm5, %%xmm0         \n\t" +        " maxss  %%xmm6, %%xmm0         \n\t" +        " mulss  %%xmm7, %%xmm0         \n\t" +        " cvtss2si %%xmm0, %4           \n\t" +        " movw  %w4, (%3, %0)           \n\t" +        " add $2, %0                    \n\t" +        " dec %1                        \n\t" +        " jne 3b                        \n\t" + +        "4:                             \n\t" +        " emms                          \n\t" + +        : "=&r" (i), "=&r" (temp) +        : "r" (a), "r" (b), "r" ((pa_reg_x86)n), "m" (*one), "m" (*mone), "m" (*scale) +        : "cc", "memory" +    ); +} + +static void pa_sconv_s16le_from_f32ne_sse2(unsigned n, const float *a, int16_t *b) { +    pa_reg_x86 temp, i; + +    __asm__ __volatile__ ( +        " movaps %5, %%xmm5             \n\t" +        " movaps %6, %%xmm6             \n\t" +        " movaps %7, %%xmm7             \n\t" +        " xor %0, %0                    \n\t" + +        " mov %4, %1                    \n\t" +        " sar $3, %1                    \n\t" /* 8 floats at a time */ +        " cmp $0, %1                    \n\t" +        " je 2f                         \n\t" + +        "1:                             \n\t" +        " movups (%2, %0, 2), %%xmm0    \n\t" /* read 8 floats */ +        " movups 16(%2, %0, 2), %%xmm2  \n\t" +        " minps  %%xmm5, %%xmm0         \n\t" /* clamp to 1.0 */ +        " minps  %%xmm5, %%xmm2         \n\t" +        " maxps  %%xmm6, %%xmm0         \n\t" /* clamp to -1.0 */ +        " maxps  %%xmm6, %%xmm2         \n\t" +        " mulps  %%xmm7, %%xmm0         \n\t" /* *= 0x7fff */ +        " mulps  %%xmm7, %%xmm2         \n\t" + +        " cvtps2dq %%xmm0, %%xmm0       \n\t" +        " cvtps2dq %%xmm2, %%xmm2       \n\t" + +        " packssdw %%xmm2, %%xmm0       \n\t" +        " movdqu   %%xmm0, (%3, %0)     \n\t" + +        " add $16, %0                   \n\t" +        " dec %1                        \n\t" +        " jne 1b                        \n\t" + +        "2:                             \n\t" +        " mov %4, %1                    \n\t" /* prepare for leftovers */ +        " and $15, %1                   \n\t" +        " je 4f                         \n\t" + +        "3:                             \n\t" +        " movss (%2, %0, 2), %%xmm0     \n\t" +        " minss  %%xmm5, %%xmm0         \n\t" +        " maxss  %%xmm6, %%xmm0         \n\t" +        " mulss  %%xmm7, %%xmm0         \n\t" +        " cvtss2si %%xmm0, %4           \n\t" +        " movw  %w4, (%3, %0)           \n\t" +        " add $2, %0                    \n\t" +        " dec %1                        \n\t" +        " jne 3b                        \n\t" + +        "4:                             \n\t" + +        : "=&r" (i), "=&r" (temp) +        : "r" (a), "r" (b), "r" ((pa_reg_x86)n), "m" (*one), "m" (*mone), "m" (*scale) +        : "cc", "memory" +    ); +} + +#undef RUN_TEST + +#ifdef RUN_TEST +#define SAMPLES 1019 +#define TIMES 1000 + +static void run_test (void) { +    int16_t samples[SAMPLES]; +    int16_t samples_ref[SAMPLES]; +    float floats[SAMPLES]; +    int i; +    pa_usec_t start, stop; + +    printf ("checking SSE %zd\n", sizeof (samples)); + +    memset (samples_ref, 0, sizeof (samples_ref)); +    memset (samples, 0, sizeof (samples)); + +    for (i = 0; i < SAMPLES; i++) { +        floats[i] = (rand()/(RAND_MAX+2.2)) - 1.1; +    } + +    func = pa_get_convert_from_float32ne_function (PA_SAMPLE_S16LE); +    func (SAMPLES, floats, samples_ref); +    pa_sconv_s16le_from_f32ne_sse2 (SAMPLES, floats, samples); + +    for (i = 0; i < SAMPLES; i++) { +        if (samples[i] != samples_ref[i]) { +            printf ("%d: %04x != %04x (%f)\n", i, samples[i], samples_ref[i], +                      floats[i]); +        } +    } + +    start = pa_rtclock_now(); +    for (i = 0; i < TIMES; i++) { +        pa_sconv_s16le_from_f32ne_sse2 (SAMPLES, floats, samples); +    } +    stop = pa_rtclock_now(); +    pa_log_info("SSE: %llu usec.", (long long unsigned int)(stop - start)); + +    start = pa_rtclock_now(); +    for (i = 0; i < TIMES; i++) { +        func (SAMPLES, floats, samples_ref); +    } +    stop = pa_rtclock_now(); +    pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); +} +#endif +#endif /* defined (__i386__) || defined (__amd64__) */ + + +void pa_convert_func_init_sse (pa_cpu_x86_flag_t flags) { +#if defined (__i386__) || defined (__amd64__) +    pa_log_info("Initialising SSE optimized conversions."); + +#ifdef RUN_TEST +    run_test (); +#endif + +    if (flags & PA_CPU_X86_SSE2) +      pa_set_convert_from_float32ne_function (PA_SAMPLE_S16LE, (pa_convert_func_t) pa_sconv_s16le_from_f32ne_sse2); +    else +      pa_set_convert_from_float32ne_function (PA_SAMPLE_S16LE, (pa_convert_func_t) pa_sconv_s16le_from_f32ne_sse); + +#endif /* defined (__i386__) || defined (__amd64__) */ +} +  | 
