diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/.gitignore | 1 | ||||
-rw-r--r-- | src/Makefile.am | 9 | ||||
-rw-r--r-- | src/pulsecore/sample-util.c | 102 | ||||
-rw-r--r-- | src/pulsecore/vector.h | 97 | ||||
-rw-r--r-- | src/tests/rtstutter.c | 16 | ||||
-rw-r--r-- | src/tests/vector-test.c | 83 |
6 files changed, 270 insertions, 38 deletions
diff --git a/src/.gitignore b/src/.gitignore index 72c38cc6..66738d0a 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -58,3 +58,4 @@ thread-test utf8-test voltest start-pulseaudio-x11 +vector-test diff --git a/src/Makefile.am b/src/Makefile.am index d77f4dc1..24623d3f 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -232,6 +232,7 @@ TESTS = \ strlist-test \ close-test \ voltest \ + vector-test \ memblockq-test \ channelmap-test \ thread-mainloop-test \ @@ -262,6 +263,7 @@ TESTS_BINARIES = \ strlist-test \ close-test \ voltest \ + vector-test \ memblockq-test \ sync-playback \ interpol-test \ @@ -407,6 +409,11 @@ voltest_CFLAGS = $(AM_CFLAGS) voltest_LDADD = $(AM_LDADD) libpulse.la voltest_LDFLAGS = $(AM_LDFLAGS) $(BINLDFLAGS) +vector_test_SOURCES = tests/vector-test.c +vector_test_CFLAGS = $(AM_CFLAGS) +vector_test_LDADD = $(AM_LDADD) libpulsecore-@PA_MAJORMINORMICRO@.la libpulsecommon-@PA_MAJORMINORMICRO@.la +vector_test_LDFLAGS = $(AM_LDFLAGS) $(BINLDFLAGS) + channelmap_test_SOURCES = tests/channelmap-test.c channelmap_test_CFLAGS = $(AM_CFLAGS) channelmap_test_LDADD = $(AM_LDADD) libpulse.la @@ -526,7 +533,7 @@ libpulsecommon_@PA_MAJORMINORMICRO@_la_SOURCES = \ pulsecore/llist.h \ pulsecore/lock-autospawn.c pulsecore/lock-autospawn.h \ pulsecore/log.c pulsecore/log.h \ - pulsecore/macro.h \ + pulsecore/macro.h pulsecore/vector.h \ pulsecore/mcalign.c pulsecore/mcalign.h \ pulsecore/memblock.c pulsecore/memblock.h \ pulsecore/memblockq.c pulsecore/memblockq.h \ diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c index cf7b4d58..905ba5df 100644 --- a/src/pulsecore/sample-util.c +++ b/src/pulsecore/sample-util.c @@ -213,13 +213,22 @@ size_t pa_mix( for (i = 0; i < nstreams; i++) { pa_mix_info *m = streams + i; - int32_t v, cv = m->linear[channel].i; + int32_t v, lo, hi, cv = m->linear[channel].i; if (PA_UNLIKELY(cv <= 0)) continue; + /* Multiplying the 32bit volume factor with the + * 16bit sample might result in an 48bit value. We + * want to do without 64 bit integers and hence do + * the multiplication independantly for the HI and + * LO part of the volume. */ + + hi = cv >> 16; + lo = cv & 0xFFFF; + v = *((int16_t*) m->ptr); - v = (v * cv) / 0x10000; + v = ((v * lo) >> 16) + (v * hi); sum += v; m->ptr = (uint8_t*) m->ptr + sizeof(int16_t); @@ -248,13 +257,16 @@ size_t pa_mix( for (i = 0; i < nstreams; i++) { pa_mix_info *m = streams + i; - int32_t v, cv = m->linear[channel].i; + int32_t v, lo, hi, cv = m->linear[channel].i; if (PA_UNLIKELY(cv <= 0)) continue; + hi = cv >> 16; + lo = cv & 0xFFFF; + v = PA_INT16_SWAP(*((int16_t*) m->ptr)); - v = (v * cv) / 0x10000; + v = ((v * lo) >> 16) + (v * hi); sum += v; m->ptr = (uint8_t*) m->ptr + sizeof(int16_t); @@ -290,7 +302,7 @@ size_t pa_mix( continue; v = *((int32_t*) m->ptr); - v = (v * cv) / 0x10000; + v = (v * cv) >> 16; sum += v; m->ptr = (uint8_t*) m->ptr + sizeof(int32_t); @@ -326,7 +338,7 @@ size_t pa_mix( continue; v = PA_INT32_SWAP(*((int32_t*) m->ptr)); - v = (v * cv) / 0x10000; + v = (v * cv) >> 16; sum += v; m->ptr = (uint8_t*) m->ptr + sizeof(int32_t); @@ -362,7 +374,7 @@ size_t pa_mix( continue; v = (int32_t) (PA_READ24NE(m->ptr) << 8); - v = (v * cv) / 0x10000; + v = (v * cv) >> 16; sum += v; m->ptr = (uint8_t*) m->ptr + 3; @@ -398,7 +410,7 @@ size_t pa_mix( continue; v = (int32_t) (PA_READ24RE(m->ptr) << 8); - v = (v * cv) / 0x10000; + v = (v * cv) >> 16; sum += v; m->ptr = (uint8_t*) m->ptr + 3; @@ -434,7 +446,7 @@ size_t pa_mix( continue; v = (int32_t) (*((uint32_t*)m->ptr) << 8); - v = (v * cv) / 0x10000; + v = (v * cv) >> 16; sum += v; m->ptr = (uint8_t*) m->ptr + sizeof(int32_t); @@ -470,7 +482,7 @@ size_t pa_mix( continue; v = (int32_t) (PA_UINT32_SWAP(*((uint32_t*) m->ptr)) << 8); - v = (v * cv) / 0x10000; + v = (v * cv) >> 16; sum += v; m->ptr = (uint8_t*) m->ptr + 3; @@ -505,7 +517,7 @@ size_t pa_mix( continue; v = (int32_t) *((uint8_t*) m->ptr) - 0x80; - v = (v * cv) / 0x10000; + v = (v * cv) >> 16; sum += v; m->ptr = (uint8_t*) m->ptr + 1; @@ -534,13 +546,16 @@ size_t pa_mix( for (i = 0; i < nstreams; i++) { pa_mix_info *m = streams + i; - int32_t v, cv = m->linear[channel].i; + int32_t v, hi, lo, cv = m->linear[channel].i; if (PA_UNLIKELY(cv <= 0)) continue; + hi = cv >> 16; + lo = cv & 0xFFFF; + v = (int32_t) st_ulaw2linear16(*((uint8_t*) m->ptr)); - v = (v * cv) / 0x10000; + v = ((v * lo) >> 16) + (v * hi); sum += v; m->ptr = (uint8_t*) m->ptr + 1; @@ -569,13 +584,16 @@ size_t pa_mix( for (i = 0; i < nstreams; i++) { pa_mix_info *m = streams + i; - int32_t v, cv = m->linear[channel].i; + int32_t v, hi, lo, cv = m->linear[channel].i; if (PA_UNLIKELY(cv <= 0)) continue; + hi = cv >> 16; + lo = cv & 0xFFFF; + v = (int32_t) st_alaw2linear16(*((uint8_t*) m->ptr)); - v = (v * cv) / 0x10000; + v = ((v * lo) >> 16) + (v * hi); sum += v; m->ptr = (uint8_t*) m->ptr + 1; @@ -710,16 +728,26 @@ void pa_volume_memchunk( e = (int16_t*) ptr + c->length/sizeof(int16_t); for (channel = 0, d = ptr; d < e; d++) { - int32_t t; + int32_t t, hi, lo; + + /* Multiplying the 32bit volume factor with the 16bit + * sample might result in an 48bit value. We want to + * do without 64 bit integers and hence do the + * multiplication independantly for the HI and LO part + * of the volume. */ + + hi = linear[channel] >> 16; + lo = linear[channel] & 0xFFFF; t = (int32_t)(*d); - t = (t * linear[channel]) / 0x10000; + t = ((t * lo) >> 16) + (t * hi); t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); *d = (int16_t) t; if (PA_UNLIKELY(++channel >= spec->channels)) channel = 0; } + break; } @@ -733,10 +761,13 @@ void pa_volume_memchunk( e = (int16_t*) ptr + c->length/sizeof(int16_t); for (channel = 0, d = ptr; d < e; d++) { - int32_t t; + int32_t t, hi, lo; + + hi = linear[channel] >> 16; + lo = linear[channel] & 0xFFFF; t = (int32_t) PA_INT16_SWAP(*d); - t = (t * linear[channel]) / 0x10000; + t = ((t * lo) >> 16) + (t * hi); t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); *d = PA_INT16_SWAP((int16_t) t); @@ -760,7 +791,7 @@ void pa_volume_memchunk( int64_t t; t = (int64_t)(*d); - t = (t * linear[channel]) / 0x10000; + t = (t * linear[channel]) >> 16; t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); *d = (int32_t) t; @@ -783,7 +814,7 @@ void pa_volume_memchunk( int64_t t; t = (int64_t) PA_INT32_SWAP(*d); - t = (t * linear[channel]) / 0x10000; + t = (t * linear[channel]) >> 16; t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); *d = PA_INT32_SWAP((int32_t) t); @@ -806,7 +837,7 @@ void pa_volume_memchunk( int64_t t; t = (int64_t)((int32_t) (PA_READ24NE(d) << 8)); - t = (t * linear[channel]) / 0x10000; + t = (t * linear[channel]) >> 16; t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); PA_WRITE24NE(d, ((uint32_t) (int32_t) t) >> 8); @@ -829,7 +860,7 @@ void pa_volume_memchunk( int64_t t; t = (int64_t)((int32_t) (PA_READ24RE(d) << 8)); - t = (t * linear[channel]) / 0x10000; + t = (t * linear[channel]) >> 16; t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); PA_WRITE24RE(d, ((uint32_t) (int32_t) t) >> 8); @@ -852,7 +883,7 @@ void pa_volume_memchunk( int64_t t; t = (int64_t) ((int32_t) (*d << 8)); - t = (t * linear[channel]) / 0x10000; + t = (t * linear[channel]) >> 16; t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); *d = ((uint32_t) ((int32_t) t)) >> 8; @@ -875,7 +906,7 @@ void pa_volume_memchunk( int64_t t; t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*d) << 8)); - t = (t * linear[channel]) / 0x10000; + t = (t * linear[channel]) >> 16; t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); *d = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8); @@ -895,10 +926,13 @@ void pa_volume_memchunk( e = (uint8_t*) ptr + c->length; for (channel = 0, d = ptr; d < e; d++) { - int32_t t; + int32_t t, hi, lo; + + hi = linear[channel] >> 16; + lo = linear[channel] & 0xFFFF; t = (int32_t) *d - 0x80; - t = (t * linear[channel]) / 0x10000; + t = ((t * lo) >> 16) + (t * hi); t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F); *d = (uint8_t) (t + 0x80); @@ -918,10 +952,13 @@ void pa_volume_memchunk( e = (uint8_t*) ptr + c->length; for (channel = 0, d = ptr; d < e; d++) { - int32_t t; + int32_t t, hi, lo; + + hi = linear[channel] >> 16; + lo = linear[channel] & 0xFFFF; t = (int32_t) st_ulaw2linear16(*d); - t = (t * linear[channel]) / 0x10000; + t = ((t * lo) >> 16) + (t * hi); t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); *d = (uint8_t) st_14linear2ulaw((int16_t) t >> 2); @@ -941,10 +978,13 @@ void pa_volume_memchunk( e = (uint8_t*) ptr + c->length; for (channel = 0, d = ptr; d < e; d++) { - int32_t t; + int32_t t, hi, lo; + + hi = linear[channel] >> 16; + lo = linear[channel] & 0xFFFF; t = (int32_t) st_alaw2linear16(*d); - t = (t * linear[channel]) / 0x10000; + t = ((t * lo) >> 16) + (t * hi); t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); *d = (uint8_t) st_13linear2alaw((int16_t) t >> 3); diff --git a/src/pulsecore/vector.h b/src/pulsecore/vector.h new file mode 100644 index 00000000..076bd6c0 --- /dev/null +++ b/src/pulsecore/vector.h @@ -0,0 +1,97 @@ +/*** + This file is part of PulseAudio. + + Copyright 2004-2006 Lennart Poettering + Copyright 2006 Pierre Ossman <ossman@cendio.se> for Cendio AB + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2 of the License, + or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with PulseAudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#include <inttypes.h> + +/* First, define HAVE_VECTOR if we have the gcc vector extensions at all */ +#if defined(__SSE2__) || defined(__ALTIVEC__) +#define HAVE_VECTOR + + +/* This is supposed to be portable to different SIMD instruction + * sets. We define vector types for different base types: uint8_t, + * int16_t, int32_t, float. The vector type is a union. The fields .i, + * .u, .f are arrays for accessing the separate elements of a + * vector. .v is a gcc vector type of the right format. .m is the + * vector in the type the SIMD extenstion specific intrinsics API + * expects. PA_xxx_VECTOR_SIZE is the size of the + * entries. PA_xxxx_VECTOR_MAKE constructs a gcc vector variable with + * the same value in all elements. */ + +#ifdef __SSE2__ + +#include <xmmintrin.h> +#include <emmintrin.h> + +#define PA_UINT8_VECTOR_SIZE 16 +#define PA_INT16_VECTOR_SIZE 8 +#define PA_INT32_VECTOR_SIZE 4 +#define PA_FLOAT_VECTOR_SIZE 4 + +#define PA_UINT8_VECTOR_MAKE(x) (pa_v16qi) { x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x } +#define PA_INT16_VECTOR_MAKE(x) (pa_v8hi) { x, x, x, x, x, x, x, x } +#define PA_INT32_VECTOR_MAKE(x) (pa_v4si) { x, x, x, x } +#define PA_FLOAT_VECTOR_MAKE(x) (pa_v4fi) { x, x, x, x } + +#endif + +/* uint8_t vector */ +typedef uint8_t pa_v16qi __attribute__ ((vector_size (PA_UINT8_VECTOR_SIZE * sizeof(uint8_t)))); +typedef union pa_uint8_vector { + uint8_t u[PA_UINT8_VECTOR_SIZE]; + pa_v16qi v; +#ifdef __SSE2__ + __m128i m; +#endif +} pa_uint8_vector_t; + +/* int16_t vector*/ +typedef int16_t pa_v8hi __attribute__ ((vector_size (PA_INT16_VECTOR_SIZE * sizeof(int16_t)))); +typedef union pa_int16_vector { + int16_t i[PA_INT16_VECTOR_SIZE]; + pa_v8hi v; +#ifdef __SSE2__ + __m128i m; +#endif +} pa_int16_vector_t; + +/* int32_t vector */ +typedef int32_t pa_v4si __attribute__ ((vector_size (PA_INT32_VECTOR_SIZE * sizeof(int32_t)))); +typedef union pa_int32_vector { + int32_t i[PA_INT32_VECTOR_SIZE]; + pa_v4si v; +#ifdef __SSE2__ + __m128i m; +#endif +} pa_int32_vector_t; + +/* float vector */ +typedef float pa_v4sf __attribute__ ((vector_size (PA_FLOAT_VECTOR_SIZE * sizeof(float)))); +typedef union pa_float_vector { + float f[PA_FLOAT_VECTOR_SIZE]; + pa_v4sf v; +#ifdef __SSE2__ + __m128 m; +#endif +} pa_float_vector_t; + +#endif diff --git a/src/tests/rtstutter.c b/src/tests/rtstutter.c index fc23d959..d8aff342 100644 --- a/src/tests/rtstutter.c +++ b/src/tests/rtstutter.c @@ -43,24 +43,28 @@ static int msec_lower, msec_upper; static void* work(void *p) PA_GCC_NORETURN; static void* work(void *p) { +#ifdef HAVE_PTHREAD_SETAFFINITY_NP cpu_set_t mask; +#endif struct sched_param param; - pa_log_notice("CPU%i: Created thread.", PA_PTR_TO_INT(p)); + pa_log_notice("CPU%i: Created thread.", PA_PTR_TO_UINT(p)); memset(¶m, 0, sizeof(param)); param.sched_priority = 12; pa_assert_se(pthread_setschedparam(pthread_self(), SCHED_FIFO, ¶m) == 0); +#ifdef HAVE_PTHREAD_SETAFFINITY_NP CPU_ZERO(&mask); - CPU_SET((size_t) PA_PTR_TO_INT(p), &mask); + CPU_SET((size_t) PA_PTR_TO_UINT(p), &mask); pa_assert_se(pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask) == 0); +#endif for (;;) { struct timespec now, end; uint64_t nsec; - pa_log_notice("CPU%i: Sleeping for 1s", PA_PTR_TO_INT(p)); + pa_log_notice("CPU%i: Sleeping for 1s", PA_PTR_TO_UINT(p)); sleep(1); pa_assert_se(clock_gettime(CLOCK_REALTIME, &end) == 0); @@ -69,7 +73,7 @@ static void* work(void *p) { (uint64_t) ((((double) rand())*(double)(msec_upper-msec_lower)*PA_NSEC_PER_MSEC)/RAND_MAX) + (uint64_t) ((uint64_t) msec_lower*PA_NSEC_PER_MSEC); - pa_log_notice("CPU%i: Freezing for %ims", PA_PTR_TO_INT(p), (int) (nsec/PA_NSEC_PER_MSEC)); + pa_log_notice("CPU%i: Freezing for %ims", PA_PTR_TO_UINT(p), (int) (nsec/PA_NSEC_PER_MSEC)); end.tv_sec += (time_t) (nsec / PA_NSEC_PER_SEC); end.tv_nsec += (long int) (nsec % PA_NSEC_PER_SEC); @@ -87,7 +91,7 @@ static void* work(void *p) { } int main(int argc, char*argv[]) { - int n; + unsigned n; srand((unsigned) time(NULL)); @@ -109,7 +113,7 @@ int main(int argc, char*argv[]) { for (n = 1; n < pa_ncpus(); n++) { pthread_t t; - pa_assert_se(pthread_create(&t, NULL, work, PA_INT_TO_PTR(n)) == 0); + pa_assert_se(pthread_create(&t, NULL, work, PA_UINT_TO_PTR(n)) == 0); } work(PA_INT_TO_PTR(0)); diff --git a/src/tests/vector-test.c b/src/tests/vector-test.c new file mode 100644 index 00000000..f7344172 --- /dev/null +++ b/src/tests/vector-test.c @@ -0,0 +1,83 @@ +/*** + This file is part of PulseAudio. + + Copyright 2009 Lennart Poettering + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with PulseAudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <pulsecore/vector.h> +#include <pulsecore/log.h> + +int main(int argc, char *argv[]) { + +#ifdef __SSE2__ + pa_int16_vector_t input, zero; + pa_int32_vector_t unpacked1, unpacked2; + pa_int32_vector_t volume1, volume2, volume1_hi, volume1_lo, volume2_hi, volume2_lo, reduce, mask; + pa_int16_vector_t output; + + unsigned u; + + zero.v = PA_INT16_VECTOR_MAKE(0); + reduce.v = PA_INT32_VECTOR_MAKE(0x10000); + volume1.v = volume2.v = PA_INT32_VECTOR_MAKE(0x10000*2+7); + mask.v = PA_INT32_VECTOR_MAKE(0xFFFF); + + volume1_lo.m = _mm_and_si128(volume1.m, mask.m); + volume2_lo.m = _mm_and_si128(volume2.m, mask.m); + volume1_hi.m = _mm_srli_epi32(volume1.m, 16); + volume2_hi.m = _mm_srli_epi32(volume2.m, 16); + + input.v = PA_INT16_VECTOR_MAKE(32000); + + for (u = 0; u < PA_INT16_VECTOR_SIZE; u++) + pa_log("input=%i\n", input.i[u]); + + unpacked1.m = _mm_unpackhi_epi16(zero.m, input.m); + unpacked2.m = _mm_unpacklo_epi16(zero.m, input.m); + + for (u = 0; u < PA_INT32_VECTOR_SIZE; u++) + pa_log("unpacked1=%i\n", unpacked1.i[u]); + + unpacked1.v /= reduce.v; + unpacked2.v /= reduce.v; + + for (u = 0; u < PA_INT32_VECTOR_SIZE; u++) + pa_log("unpacked1=%i\n", unpacked1.i[u]); + + for (u = 0; u < PA_INT32_VECTOR_SIZE; u++) + pa_log("volume1=%i\n", volume1.i[u]); + + unpacked1.v = (unpacked1.v * volume1_lo.v) / reduce.v + unpacked1.v * volume1_hi.v; + unpacked2.v = (unpacked2.v * volume2_lo.v) / reduce.v + unpacked2.v * volume2_hi.v; + + for (u = 0; u < PA_INT32_VECTOR_SIZE; u++) + pa_log("unpacked1=%i\n", unpacked1.i[u]); + + output.m = _mm_packs_epi32(unpacked1.m, unpacked2.m); + + for (u = 0; u < PA_INT16_VECTOR_SIZE; u++) + pa_log("output=%i\n", output.i[u]); + +#endif + + return 0; +} |