From c5bd72509ecae1c12bb523fa56432fd71428fbf1 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 20 Aug 2009 00:20:03 +0200 Subject: core: check return value of getgrnam_r() instead of errno According to POSIX getgrnam_r() returns the error code as return value, and not in errno. Honour that. Pointed out and inspired by a patch from Ted Percival. --- src/pulsecore/core-util.c | 74 ++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 40 deletions(-) (limited to 'src') diff --git a/src/pulsecore/core-util.c b/src/pulsecore/core-util.c index 6494244e..ef8c8472 100644 --- a/src/pulsecore/core-util.c +++ b/src/pulsecore/core-util.c @@ -975,6 +975,7 @@ static int is_group(gid_t gid, const char *name) { int r = -1; #ifdef HAVE_GETGRGID_R + #ifdef _SC_GETGR_R_SIZE_MAX n = sysconf(_SC_GETGR_R_SIZE_MAX); #else @@ -985,38 +986,25 @@ static int is_group(gid_t gid, const char *name) { data = pa_xmalloc((size_t) n); + if ((errno = getgrgid_r(gid, &group, data, (size_t) n, &result)) || !result) +#else errno = 0; - if (getgrgid_r(gid, &group, data, (size_t) n, &result) < 0 || !result) { - pa_log("getgrgid_r(%u): %s", (unsigned) gid, pa_cstrerror(errno)); - + if (!(result = getgrgid(gid))) +#endif + { if (!errno) errno = ENOENT; - goto finish; - } - - r = strcmp(name, result->gr_name) == 0; - -finish: - pa_xfree(data); -#else - /* XXX Not thread-safe, but needed on OSes (e.g. FreeBSD 4.X) that do not - * support getgrgid_r. */ - - errno = 0; - if (!(result = getgrgid(gid))) { pa_log("getgrgid(%u): %s", gid, pa_cstrerror(errno)); - if (!errno) - errno = ENOENT; - goto finish; } r = strcmp(name, result->gr_name) == 0; finish: -#endif + + pa_xfree(data); return r; } @@ -1065,12 +1053,14 @@ finish: /* Check whether the specifc user id is a member of the specified group */ int pa_uid_in_group(uid_t uid, const char *name) { - char *g_buf, *p_buf; + char *g_buf = NULL, *p_buf = NULL; long g_n, p_n; - struct group grbuf, *gr; + struct group grbuf, *gr = NULL; char **i; int r = -1; +#ifdef HAVE_GETGRNAM_R + #ifdef _SC_GETGR_R_SIZE_MAX g_n = sysconf(_SC_GETGR_R_SIZE_MAX); #else @@ -1081,6 +1071,19 @@ int pa_uid_in_group(uid_t uid, const char *name) { g_buf = pa_xmalloc((size_t) g_n); + if ((errno = getgrnam_r(name, &grbuf, g_buf, (size_t) g_n, &gr)) != 0 || !gr) +#else + errno = 0; + if (!(gr = getgrnam(name))) +#endif + { + if (!errno) + errno = ENOENT; + goto finish; + } + +#ifdef HAVE_GETPWNAM_R + #ifdef _SC_GETPW_R_SIZE_MAX p_n = sysconf(_SC_GETPW_R_SIZE_MAX); #else @@ -1090,26 +1093,16 @@ int pa_uid_in_group(uid_t uid, const char *name) { p_n = 512; p_buf = pa_xmalloc((size_t) p_n); - - errno = 0; -#ifdef HAVE_GETGRNAM_R - if (getgrnam_r(name, &grbuf, g_buf, (size_t) g_n, &gr) != 0 || !gr) -#else - if (!(gr = getgrnam(name))) #endif - { - if (!errno) - errno = ENOENT; - goto finish; - } r = 0; for (i = gr->gr_mem; *i; i++) { - struct passwd pwbuf, *pw; + struct passwd pwbuf, *pw = NULL; #ifdef HAVE_GETPWNAM_R - if (getpwnam_r(*i, &pwbuf, p_buf, (size_t) p_n, &pw) != 0 || !pw) + if ((errno = getpwnam_r(*i, &pwbuf, p_buf, (size_t) p_n, &pw)) != 0 || !pw) #else + errno = 0; if (!(pw = getpwnam(*i))) #endif continue; @@ -1130,9 +1123,11 @@ finish: /* Get the GID of a gfiven group, return (gid_t) -1 on failure. */ gid_t pa_get_gid_of_group(const char *name) { gid_t ret = (gid_t) -1; - char *g_buf; + char *g_buf = NULL; long g_n; - struct group grbuf, *gr; + struct group grbuf, *gr = NULL; + +#ifdef HAVE_GETGRNAM_R #ifdef _SC_GETGR_R_SIZE_MAX g_n = sysconf(_SC_GETGR_R_SIZE_MAX); @@ -1144,10 +1139,9 @@ gid_t pa_get_gid_of_group(const char *name) { g_buf = pa_xmalloc((size_t) g_n); - errno = 0; -#ifdef HAVE_GETGRNAM_R - if (getgrnam_r(name, &grbuf, g_buf, (size_t) g_n, &gr) != 0 || !gr) + if ((errno = getgrnam_r(name, &grbuf, g_buf, (size_t) g_n, &gr)) != 0 || !gr) #else + errno = 0; if (!(gr = getgrnam(name))) #endif { -- cgit From 30ba9030efea782779cac6f107b8917572150d16 Mon Sep 17 00:00:00 2001 From: Xabier Rodriguez Calvar Date: Tue, 30 Jun 2009 18:20:03 +0200 Subject: Modification of the glib-mainloop doc to ensure that nobody frees the api as it is owned by the loop. --- src/pulse/glib-mainloop.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/pulse/glib-mainloop.h b/src/pulse/glib-mainloop.h index 189513a8..67aba27d 100644 --- a/src/pulse/glib-mainloop.h +++ b/src/pulse/glib-mainloop.h @@ -56,7 +56,9 @@ pa_glib_mainloop *pa_glib_mainloop_new(GMainContext *c); /** Free the GLIB main loop object */ void pa_glib_mainloop_free(pa_glib_mainloop* g); -/** Return the abstract main loop API vtable for the GLIB main loop object */ +/** Return the abstract main loop API vtable for the GLIB main loop + object. No need of freeing the API as it is owned by the loop and + it is destroyed when this dies */ pa_mainloop_api* pa_glib_mainloop_get_api(pa_glib_mainloop *g); PA_C_DECL_END -- cgit From 65f86ef7d43983cdcea3211705866384404b20cc Mon Sep 17 00:00:00 2001 From: Xabier Rodriguez Calvar Date: Tue, 30 Jun 2009 18:22:44 +0200 Subject: Modification of the mainloop doc to ensure that nobody frees the api as it is owned by the loop. --- src/pulse/mainloop.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/pulse/mainloop.h b/src/pulse/mainloop.h index 4a83ebe8..63abd588 100644 --- a/src/pulse/mainloop.h +++ b/src/pulse/mainloop.h @@ -108,7 +108,9 @@ int pa_mainloop_iterate(pa_mainloop *m, int block, int *retval); /** Run unlimited iterations of the main loop object until the main loop's quit() routine is called. */ int pa_mainloop_run(pa_mainloop *m, int *retval); -/** Return the abstract main loop abstraction layer vtable for this main loop. */ +/** Return the abstract main loop abstraction layer vtable for this + main loop. No need of freeing the API as it is owned by the loop + and it is destroyed when this dies */ pa_mainloop_api* pa_mainloop_get_api(pa_mainloop*m); /** Shutdown the main loop */ -- cgit From 52e5d4b1d24db4f4f9ff6e70ddf8c9a6b80cdc6a Mon Sep 17 00:00:00 2001 From: Xabier Rodriguez Calvar Date: Tue, 30 Jun 2009 18:23:17 +0200 Subject: Modification of the thread-mainloop doc to ensure that nobody frees the api as it is owned by the loop. --- src/pulse/thread-mainloop.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/pulse/thread-mainloop.h b/src/pulse/thread-mainloop.h index e847070d..2cf496e1 100644 --- a/src/pulse/thread-mainloop.h +++ b/src/pulse/thread-mainloop.h @@ -299,7 +299,9 @@ void pa_threaded_mainloop_accept(pa_threaded_mainloop *m); /** Return the return value as specified with the main loop's quit() routine. */ int pa_threaded_mainloop_get_retval(pa_threaded_mainloop *m); -/** Return the abstract main loop abstraction layer vtable for this main loop. */ +/** Return the abstract main loop abstraction layer vtable for this + main loop. No need of freeing the API as it is owned by the loop + and it is destroyed when this dies */ pa_mainloop_api* pa_threaded_mainloop_get_api(pa_threaded_mainloop*m); /** Returns non-zero when called from withing the event loop thread. \since 0.9.7 */ -- cgit From 26839c4b9eb549eebf8db6eae2399ed6fd94efa8 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Tue, 11 Aug 2009 15:15:57 +0200 Subject: sample-utils: split out functions from case Move the volume functions out of the switch case and use a table indexed by the sample format to find the volume function. --- src/pulsecore/sample-util.c | 586 +++++++++++++++++++++++--------------------- 1 file changed, 303 insertions(+), 283 deletions(-) (limited to 'src') diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c index 5b8ccf59..ef435673 100644 --- a/src/pulsecore/sample-util.c +++ b/src/pulsecore/sample-util.c @@ -690,361 +690,381 @@ size_t pa_mix( return length; } +typedef struct pa_volume_funcs { + void (*u8) (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length); + void (*alaw) (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length); + void (*ulaw) (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length); + void (*s16ne) (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length); + void (*s16re) (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length); + void (*float32ne) (float *samples, float *volumes, unsigned channels, unsigned length); + void (*float32re) (float *samples, float *volumes, unsigned channels, unsigned length); + void (*s32ne) (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length); + void (*s32re) (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length); + void (*s24ne) (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length); + void (*s24re) (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length); + void (*s24_32ne) (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length); + void (*s24_32re) (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length); +} pa_volume_funcs; + +static void +pa_volume_u8_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + for (channel = 0; length; length--) { + int32_t t, hi, lo; + + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; + + t = (int32_t) *samples - 0x80; + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F); + *samples++ = (uint8_t) (t + 0x80); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} -void pa_volume_memchunk( - pa_memchunk*c, - const pa_sample_spec *spec, - const pa_cvolume *volume) { - - void *ptr; - - pa_assert(c); - pa_assert(spec); - pa_assert(c->length % pa_frame_size(spec) == 0); - pa_assert(volume); - - if (pa_memblock_is_silence(c->memblock)) - return; - - if (pa_cvolume_channels_equal_to(volume, PA_VOLUME_NORM)) - return; - - if (pa_cvolume_channels_equal_to(volume, PA_VOLUME_MUTED)) { - pa_silence_memchunk(c, spec); - return; - } - - ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index; - - switch (spec->format) { - - case PA_SAMPLE_S16NE: { - int16_t *d, *e; - unsigned channel; - int32_t linear[PA_CHANNELS_MAX]; - - calc_linear_integer_volume(linear, volume); - - e = (int16_t*) ptr + c->length/sizeof(int16_t); - - for (channel = 0, d = ptr; d < e; d++) { - int32_t t, hi, lo; - - /* Multiplying the 32bit volume factor with the 16bit - * sample might result in an 48bit value. We want to - * do without 64 bit integers and hence do the - * multiplication independantly for the HI and LO part - * of the volume. */ - - hi = linear[channel] >> 16; - lo = linear[channel] & 0xFFFF; - - t = (int32_t)(*d); - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); - *d = (int16_t) t; - - if (PA_UNLIKELY(++channel >= spec->channels)) - channel = 0; - } - - break; - } - - case PA_SAMPLE_S16RE: { - int16_t *d, *e; - unsigned channel; - int32_t linear[PA_CHANNELS_MAX]; - - calc_linear_integer_volume(linear, volume); - - e = (int16_t*) ptr + c->length/sizeof(int16_t); - - for (channel = 0, d = ptr; d < e; d++) { - int32_t t, hi, lo; +static void +pa_volume_alaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; - hi = linear[channel] >> 16; - lo = linear[channel] & 0xFFFF; + for (channel = 0; length; length--) { + int32_t t, hi, lo; - t = (int32_t) PA_INT16_SWAP(*d); - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); - *d = PA_INT16_SWAP((int16_t) t); + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; - if (PA_UNLIKELY(++channel >= spec->channels)) - channel = 0; - } + t = (int32_t) st_alaw2linear16(*samples); + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); + *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3); - break; - } - - case PA_SAMPLE_S32NE: { - int32_t *d, *e; - unsigned channel; - int32_t linear[PA_CHANNELS_MAX]; + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} - calc_linear_integer_volume(linear, volume); +static void +pa_volume_ulaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; - e = (int32_t*) ptr + c->length/sizeof(int32_t); + for (channel = 0; length; length--) { + int32_t t, hi, lo; - for (channel = 0, d = ptr; d < e; d++) { - int64_t t; + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; - t = (int64_t)(*d); - t = (t * linear[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - *d = (int32_t) t; + t = (int32_t) st_ulaw2linear16(*samples); + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); + *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2); - if (PA_UNLIKELY(++channel >= spec->channels)) - channel = 0; - } - break; - } - - case PA_SAMPLE_S32RE: { - int32_t *d, *e; - unsigned channel; - int32_t linear[PA_CHANNELS_MAX]; - - calc_linear_integer_volume(linear, volume); - - e = (int32_t*) ptr + c->length/sizeof(int32_t); + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} - for (channel = 0, d = ptr; d < e; d++) { - int64_t t; +static void +pa_volume_s16ne_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; - t = (int64_t) PA_INT32_SWAP(*d); - t = (t * linear[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - *d = PA_INT32_SWAP((int32_t) t); + length /= sizeof (int16_t); - if (PA_UNLIKELY(++channel >= spec->channels)) - channel = 0; - } - break; - } + for (channel = 0; length; length--) { + int32_t t, hi, lo; - case PA_SAMPLE_S24NE: { - uint8_t *d, *e; - unsigned channel; - int32_t linear[PA_CHANNELS_MAX]; + /* Multiplying the 32bit volume factor with the 16bit + * sample might result in an 48bit value. We want to + * do without 64 bit integers and hence do the + * multiplication independantly for the HI and LO part + * of the volume. */ - calc_linear_integer_volume(linear, volume); + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; - e = (uint8_t*) ptr + c->length; + t = (int32_t)(*samples); + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); + *samples++ = (int16_t) t; - for (channel = 0, d = ptr; d < e; d += 3) { - int64_t t; + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} - t = (int64_t)((int32_t) (PA_READ24NE(d) << 8)); - t = (t * linear[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - PA_WRITE24NE(d, ((uint32_t) (int32_t) t) >> 8); +static void +pa_volume_s16re_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; - if (PA_UNLIKELY(++channel >= spec->channels)) - channel = 0; - } - break; - } + length /= sizeof (int16_t); - case PA_SAMPLE_S24RE: { - uint8_t *d, *e; - unsigned channel; - int32_t linear[PA_CHANNELS_MAX]; + for (channel = 0; length; length--) { + int32_t t, hi, lo; - calc_linear_integer_volume(linear, volume); + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; - e = (uint8_t*) ptr + c->length; + t = (int32_t) PA_INT16_SWAP(*samples); + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); + *samples++ = PA_INT16_SWAP((int16_t) t); - for (channel = 0, d = ptr; d < e; d += 3) { - int64_t t; + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} - t = (int64_t)((int32_t) (PA_READ24RE(d) << 8)); - t = (t * linear[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - PA_WRITE24RE(d, ((uint32_t) (int32_t) t) >> 8); +static void +pa_volume_float32ne_c (float *samples, float *volumes, unsigned channels, unsigned length) +{ + unsigned channel; - if (PA_UNLIKELY(++channel >= spec->channels)) - channel = 0; - } - break; - } + length /= sizeof (float); - case PA_SAMPLE_S24_32NE: { - uint32_t *d, *e; - unsigned channel; - int32_t linear[PA_CHANNELS_MAX]; + for (channel = 0; length; length--) { + *samples++ *= volumes[channel]; - calc_linear_integer_volume(linear, volume); + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} - e = (uint32_t*) ptr + c->length/sizeof(uint32_t); +static void +pa_volume_float32re_c (float *samples, float *volumes, unsigned channels, unsigned length) +{ + unsigned channel; - for (channel = 0, d = ptr; d < e; d++) { - int64_t t; + length /= sizeof (float); - t = (int64_t) ((int32_t) (*d << 8)); - t = (t * linear[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - *d = ((uint32_t) ((int32_t) t)) >> 8; + for (channel = 0; length; length--) { + float t; - if (PA_UNLIKELY(++channel >= spec->channels)) - channel = 0; - } - break; - } + t = PA_FLOAT32_SWAP(*samples); + t *= volumes[channel]; + *samples++ = PA_FLOAT32_SWAP(t); - case PA_SAMPLE_S24_32RE: { - uint32_t *d, *e; - unsigned channel; - int32_t linear[PA_CHANNELS_MAX]; + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} - calc_linear_integer_volume(linear, volume); +static void +pa_volume_s32ne_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; - e = (uint32_t*) ptr + c->length/sizeof(uint32_t); + length /= sizeof (int32_t); - for (channel = 0, d = ptr; d < e; d++) { - int64_t t; + for (channel = 0; length; length--) { + int64_t t; - t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*d) << 8)); - t = (t * linear[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - *d = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8); + t = (int64_t)(*samples); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + *samples++ = (int32_t) t; - if (PA_UNLIKELY(++channel >= spec->channels)) - channel = 0; - } - break; - } + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} - case PA_SAMPLE_U8: { - uint8_t *d, *e; - unsigned channel; - int32_t linear[PA_CHANNELS_MAX]; +static void +pa_volume_s32re_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; - calc_linear_integer_volume(linear, volume); + length /= sizeof (int32_t); - e = (uint8_t*) ptr + c->length; + for (channel = 0; length; length--) { + int64_t t; - for (channel = 0, d = ptr; d < e; d++) { - int32_t t, hi, lo; + t = (int64_t) PA_INT32_SWAP(*samples); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + *samples++ = PA_INT32_SWAP((int32_t) t); - hi = linear[channel] >> 16; - lo = linear[channel] & 0xFFFF; + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} - t = (int32_t) *d - 0x80; - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F); - *d = (uint8_t) (t + 0x80); +static void +pa_volume_s24ne_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + uint8_t *e; - if (PA_UNLIKELY(++channel >= spec->channels)) - channel = 0; - } - break; - } + e = samples + length; - case PA_SAMPLE_ULAW: { - uint8_t *d, *e; - unsigned channel; - int32_t linear[PA_CHANNELS_MAX]; + for (channel = 0; samples < e; samples += 3) { + int64_t t; - calc_linear_integer_volume(linear, volume); + t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8)); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8); - e = (uint8_t*) ptr + c->length; + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} - for (channel = 0, d = ptr; d < e; d++) { - int32_t t, hi, lo; +static void +pa_volume_s24re_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + uint8_t *e; - hi = linear[channel] >> 16; - lo = linear[channel] & 0xFFFF; + e = samples + length; - t = (int32_t) st_ulaw2linear16(*d); - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); - *d = (uint8_t) st_14linear2ulaw((int16_t) t >> 2); + for (channel = 0; samples < e; samples += 3) { + int64_t t; - if (PA_UNLIKELY(++channel >= spec->channels)) - channel = 0; - } - break; - } + t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8)); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8); - case PA_SAMPLE_ALAW: { - uint8_t *d, *e; - unsigned channel; - int32_t linear[PA_CHANNELS_MAX]; + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} - calc_linear_integer_volume(linear, volume); +static void +pa_volume_s24_32ne_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; - e = (uint8_t*) ptr + c->length; + length /= sizeof (uint32_t); - for (channel = 0, d = ptr; d < e; d++) { - int32_t t, hi, lo; + for (channel = 0; length; length--) { + int64_t t; - hi = linear[channel] >> 16; - lo = linear[channel] & 0xFFFF; + t = (int64_t) ((int32_t) (*samples << 8)); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + *samples++ = ((uint32_t) ((int32_t) t)) >> 8; - t = (int32_t) st_alaw2linear16(*d); - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); - *d = (uint8_t) st_13linear2alaw((int16_t) t >> 3); + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} - if (PA_UNLIKELY(++channel >= spec->channels)) - channel = 0; - } - break; - } +static void +pa_volume_s24_32re_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; - case PA_SAMPLE_FLOAT32NE: { - float *d; - int skip; - unsigned n; - unsigned channel; + length /= sizeof (uint32_t); - d = ptr; - skip = (int) (spec->channels * sizeof(float)); - n = (unsigned) (c->length/sizeof(float)/spec->channels); + for (channel = 0; length; length--) { + int64_t t; - for (channel = 0; channel < spec->channels; channel ++) { - float v, *t; + t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8)); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8); - if (PA_UNLIKELY(volume->values[channel] == PA_VOLUME_NORM)) - continue; + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} - v = (float) pa_sw_volume_to_linear(volume->values[channel]); - t = d + channel; - oil_scalarmult_f32(t, skip, t, skip, &v, (int) n); - } - break; - } +typedef void (*pa_do_volume_func) (void *samples, void *volumes, unsigned channels, unsigned length); +typedef void (*pa_calc_volume_func) (void *volumes, const pa_cvolume *volume); + +typedef union { + float f; + uint32_t i; +} volume_val; + +static pa_calc_volume_func calc_volume_funcs[] = +{ + (pa_calc_volume_func) calc_linear_integer_volume, + (pa_calc_volume_func) calc_linear_integer_volume, + (pa_calc_volume_func) calc_linear_integer_volume, + (pa_calc_volume_func) calc_linear_integer_volume, + (pa_calc_volume_func) calc_linear_integer_volume, + (pa_calc_volume_func) calc_linear_float_volume, + (pa_calc_volume_func) calc_linear_float_volume, + (pa_calc_volume_func) calc_linear_integer_volume, + (pa_calc_volume_func) calc_linear_integer_volume, + (pa_calc_volume_func) calc_linear_integer_volume, + (pa_calc_volume_func) calc_linear_integer_volume, + (pa_calc_volume_func) calc_linear_integer_volume, + (pa_calc_volume_func) calc_linear_integer_volume +}; + +static pa_do_volume_func do_volume_funcs[] = +{ + (pa_do_volume_func) pa_volume_u8_c, + (pa_do_volume_func) pa_volume_alaw_c, + (pa_do_volume_func) pa_volume_ulaw_c, +#ifdef WORDS_BIGENDIAN + (pa_do_volume_func) pa_volume_s16re_c, + (pa_do_volume_func) pa_volume_s16ne_c, + (pa_do_volume_func) pa_volume_float32re_c, + (pa_do_volume_func) pa_volume_float32ne_c, + (pa_do_volume_func) pa_volume_s32re_c, + (pa_do_volume_func) pa_volume_s32ne_c, + (pa_do_volume_func) pa_volume_s24re_c, + (pa_do_volume_func) pa_volume_s24ne_c, + (pa_do_volume_func) pa_volume_s24_32re_c + (pa_do_volume_func) pa_volume_s24_32ne_c, +#else + (pa_do_volume_func) pa_volume_s16ne_c, + (pa_do_volume_func) pa_volume_s16re_c, + (pa_do_volume_func) pa_volume_float32ne_c, + (pa_do_volume_func) pa_volume_float32re_c, + (pa_do_volume_func) pa_volume_s32ne_c, + (pa_do_volume_func) pa_volume_s32re_c, + (pa_do_volume_func) pa_volume_s24ne_c, + (pa_do_volume_func) pa_volume_s24re_c, + (pa_do_volume_func) pa_volume_s24_32ne_c, + (pa_do_volume_func) pa_volume_s24_32re_c +#endif +}; - case PA_SAMPLE_FLOAT32RE: { - float *d, *e; - unsigned channel; - float linear[PA_CHANNELS_MAX]; +void pa_volume_memchunk( + pa_memchunk*c, + const pa_sample_spec *spec, + const pa_cvolume *volume) { - calc_linear_float_volume(linear, volume); + void *ptr; + volume_val linear[PA_CHANNELS_MAX]; - e = (float*) ptr + c->length/sizeof(float); + pa_assert(c); + pa_assert(spec); + pa_assert(c->length % pa_frame_size(spec) == 0); + pa_assert(volume); - for (channel = 0, d = ptr; d < e; d++) { - float t; + if (pa_memblock_is_silence(c->memblock)) + return; - t = PA_FLOAT32_SWAP(*d); - t *= linear[channel]; - *d = PA_FLOAT32_SWAP(t); + if (pa_cvolume_channels_equal_to(volume, PA_VOLUME_NORM)) + return; - if (PA_UNLIKELY(++channel >= spec->channels)) - channel = 0; - } + if (pa_cvolume_channels_equal_to(volume, PA_VOLUME_MUTED)) { + pa_silence_memchunk(c, spec); + return; + } - break; - } + if (spec->format < 0 || spec->format > PA_SAMPLE_MAX) { + pa_log_warn(" Unable to change volume of format %s.", pa_sample_format_to_string(spec->format)); + return; + } + ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index; - default: - pa_log_warn(" Unable to change volume of format %s.", pa_sample_format_to_string(spec->format)); - /* If we cannot change the volume, we just don't do it */ - } + calc_volume_funcs[spec->format] ((void *)linear, volume); + do_volume_funcs[spec->format] (ptr, (void *)linear, spec->channels, c->length); pa_memblock_release(c->memblock); } -- cgit From 5b8b6544e205237d41bc502a7fd9f79051af78ec Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Tue, 11 Aug 2009 16:25:44 +0200 Subject: sample-utils: coding style cleanup Make the coding style match the rest of pulseaudio more. Remove some liboil functions, they seem unoptimized and likely slower than our handrolled versions here. --- src/pulsecore/sample-util.c | 99 +++++++++++++++++++++------------------------ 1 file changed, 46 insertions(+), 53 deletions(-) (limited to 'src') diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c index ef435673..0d4e01ef 100644 --- a/src/pulsecore/sample-util.c +++ b/src/pulsecore/sample-util.c @@ -30,9 +30,6 @@ #include #include -#include -#include - #include #include @@ -977,59 +974,50 @@ pa_volume_s24_32re_c (uint32_t *samples, int32_t *volumes, unsigned channels, un } } -typedef void (*pa_do_volume_func) (void *samples, void *volumes, unsigned channels, unsigned length); -typedef void (*pa_calc_volume_func) (void *volumes, const pa_cvolume *volume); +typedef void (*pa_do_volume_func_t) (void *samples, void *volumes, unsigned channels, unsigned length); +typedef void (*pa_calc_volume_func_t) (void *volumes, const pa_cvolume *volume); typedef union { float f; uint32_t i; } volume_val; -static pa_calc_volume_func calc_volume_funcs[] = -{ - (pa_calc_volume_func) calc_linear_integer_volume, - (pa_calc_volume_func) calc_linear_integer_volume, - (pa_calc_volume_func) calc_linear_integer_volume, - (pa_calc_volume_func) calc_linear_integer_volume, - (pa_calc_volume_func) calc_linear_integer_volume, - (pa_calc_volume_func) calc_linear_float_volume, - (pa_calc_volume_func) calc_linear_float_volume, - (pa_calc_volume_func) calc_linear_integer_volume, - (pa_calc_volume_func) calc_linear_integer_volume, - (pa_calc_volume_func) calc_linear_integer_volume, - (pa_calc_volume_func) calc_linear_integer_volume, - (pa_calc_volume_func) calc_linear_integer_volume, - (pa_calc_volume_func) calc_linear_integer_volume +typedef struct pa_sample_func_t { + pa_calc_volume_func_t calc_volume; + pa_do_volume_func_t do_volume; +} pa_sample_func_t; + +static const pa_calc_volume_func_t calc_volume_table[] = { + [PA_SAMPLE_U8] = (pa_calc_volume_func_t) calc_linear_integer_volume, + [PA_SAMPLE_ALAW] = (pa_calc_volume_func_t) calc_linear_integer_volume, + [PA_SAMPLE_ULAW] = (pa_calc_volume_func_t) calc_linear_integer_volume, + [PA_SAMPLE_S16LE] = (pa_calc_volume_func_t) calc_linear_integer_volume, + [PA_SAMPLE_S16BE] = (pa_calc_volume_func_t) calc_linear_integer_volume, + [PA_SAMPLE_FLOAT32LE] = (pa_calc_volume_func_t) calc_linear_float_volume, + [PA_SAMPLE_FLOAT32BE] = (pa_calc_volume_func_t) calc_linear_float_volume, + [PA_SAMPLE_S32LE] = (pa_calc_volume_func_t) calc_linear_integer_volume, + [PA_SAMPLE_S32BE] = (pa_calc_volume_func_t) calc_linear_integer_volume, + [PA_SAMPLE_S24LE] = (pa_calc_volume_func_t) calc_linear_integer_volume, + [PA_SAMPLE_S24BE] = (pa_calc_volume_func_t) calc_linear_integer_volume, + [PA_SAMPLE_S24_32LE] = (pa_calc_volume_func_t) calc_linear_integer_volume, + [PA_SAMPLE_S24_32BE] = (pa_calc_volume_func_t) calc_linear_integer_volume }; -static pa_do_volume_func do_volume_funcs[] = +static pa_do_volume_func_t do_volume_table[] = { - (pa_do_volume_func) pa_volume_u8_c, - (pa_do_volume_func) pa_volume_alaw_c, - (pa_do_volume_func) pa_volume_ulaw_c, -#ifdef WORDS_BIGENDIAN - (pa_do_volume_func) pa_volume_s16re_c, - (pa_do_volume_func) pa_volume_s16ne_c, - (pa_do_volume_func) pa_volume_float32re_c, - (pa_do_volume_func) pa_volume_float32ne_c, - (pa_do_volume_func) pa_volume_s32re_c, - (pa_do_volume_func) pa_volume_s32ne_c, - (pa_do_volume_func) pa_volume_s24re_c, - (pa_do_volume_func) pa_volume_s24ne_c, - (pa_do_volume_func) pa_volume_s24_32re_c - (pa_do_volume_func) pa_volume_s24_32ne_c, -#else - (pa_do_volume_func) pa_volume_s16ne_c, - (pa_do_volume_func) pa_volume_s16re_c, - (pa_do_volume_func) pa_volume_float32ne_c, - (pa_do_volume_func) pa_volume_float32re_c, - (pa_do_volume_func) pa_volume_s32ne_c, - (pa_do_volume_func) pa_volume_s32re_c, - (pa_do_volume_func) pa_volume_s24ne_c, - (pa_do_volume_func) pa_volume_s24re_c, - (pa_do_volume_func) pa_volume_s24_32ne_c, - (pa_do_volume_func) pa_volume_s24_32re_c -#endif + [PA_SAMPLE_U8] = (pa_do_volume_func_t) pa_volume_u8_c, + [PA_SAMPLE_ALAW] = (pa_do_volume_func_t) pa_volume_alaw_c, + [PA_SAMPLE_ULAW] = (pa_do_volume_func_t) pa_volume_ulaw_c, + [PA_SAMPLE_S16NE] = (pa_do_volume_func_t) pa_volume_s16ne_c, + [PA_SAMPLE_S16RE] = (pa_do_volume_func_t) pa_volume_s16re_c, + [PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c, + [PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c, + [PA_SAMPLE_S32NE] = (pa_do_volume_func_t) pa_volume_s32ne_c, + [PA_SAMPLE_S32RE] = (pa_do_volume_func_t) pa_volume_s32re_c, + [PA_SAMPLE_S24NE] = (pa_do_volume_func_t) pa_volume_s24ne_c, + [PA_SAMPLE_S24RE] = (pa_do_volume_func_t) pa_volume_s24re_c, + [PA_SAMPLE_S24_32NE] = (pa_do_volume_func_t) pa_volume_s24_32ne_c, + [PA_SAMPLE_S24_32RE] = (pa_do_volume_func_t) pa_volume_s24_32re_c }; void pa_volume_memchunk( @@ -1063,8 +1051,8 @@ void pa_volume_memchunk( ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index; - calc_volume_funcs[spec->format] ((void *)linear, volume); - do_volume_funcs[spec->format] (ptr, (void *)linear, spec->channels, c->length); + calc_volume_table[spec->format] ((void *)linear, volume); + do_volume_table[spec->format] (ptr, (void *)linear, spec->channels, c->length); pa_memblock_release(c->memblock); } @@ -1110,7 +1098,7 @@ void pa_interleave(const void *src[], unsigned channels, void *dst, size_t ss, u d = (uint8_t*) dst + c * ss; for (j = 0; j < n; j ++) { - oil_memcpy(d, s, (int) ss); + memcpy(d, s, (int) ss); s = (uint8_t*) s + ss; d = (uint8_t*) d + fs; } @@ -1138,7 +1126,7 @@ void pa_deinterleave(const void *src, void *dst[], unsigned channels, size_t ss, d = dst[c]; for (j = 0; j < n; j ++) { - oil_memcpy(d, s, (int) ss); + memcpy(d, s, (int) ss); s = (uint8_t*) s + fs; d = (uint8_t*) d + ss; } @@ -1247,10 +1235,15 @@ void pa_sample_clamp(pa_sample_format_t format, void *dst, size_t dstr, const vo s = src; d = dst; if (format == PA_SAMPLE_FLOAT32NE) { + for (; n > 0; n--) { + float f; - float minus_one = -1.0, plus_one = 1.0; - oil_clip_f32(d, (int) dstr, s, (int) sstr, (int) n, &minus_one, &plus_one); + f = *s; + *d = PA_CLAMP_UNLIKELY(f, -1.0f, 1.0f); + s = (const float*) ((const uint8_t*) s + sstr); + d = (float*) ((uint8_t*) d + dstr); + } } else { pa_assert(format == PA_SAMPLE_FLOAT32RE); -- cgit From e71e644eb668b6336dd48d2730839aa3e9f7278e Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Tue, 11 Aug 2009 16:43:46 +0200 Subject: sample-util: move some functions around Move some stuff around before splitting it into a separate file. --- src/pulsecore/sample-util.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c index 0d4e01ef..f8a4c70a 100644 --- a/src/pulsecore/sample-util.c +++ b/src/pulsecore/sample-util.c @@ -975,17 +975,34 @@ pa_volume_s24_32re_c (uint32_t *samples, int32_t *volumes, unsigned channels, un } typedef void (*pa_do_volume_func_t) (void *samples, void *volumes, unsigned channels, unsigned length); -typedef void (*pa_calc_volume_func_t) (void *volumes, const pa_cvolume *volume); + +typedef struct pa_sample_func_t { + pa_do_volume_func_t do_volume; +} pa_sample_func_t; + +static pa_do_volume_func_t do_volume_table[] = +{ + [PA_SAMPLE_U8] = (pa_do_volume_func_t) pa_volume_u8_c, + [PA_SAMPLE_ALAW] = (pa_do_volume_func_t) pa_volume_alaw_c, + [PA_SAMPLE_ULAW] = (pa_do_volume_func_t) pa_volume_ulaw_c, + [PA_SAMPLE_S16NE] = (pa_do_volume_func_t) pa_volume_s16ne_c, + [PA_SAMPLE_S16RE] = (pa_do_volume_func_t) pa_volume_s16re_c, + [PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c, + [PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c, + [PA_SAMPLE_S32NE] = (pa_do_volume_func_t) pa_volume_s32ne_c, + [PA_SAMPLE_S32RE] = (pa_do_volume_func_t) pa_volume_s32re_c, + [PA_SAMPLE_S24NE] = (pa_do_volume_func_t) pa_volume_s24ne_c, + [PA_SAMPLE_S24RE] = (pa_do_volume_func_t) pa_volume_s24re_c, + [PA_SAMPLE_S24_32NE] = (pa_do_volume_func_t) pa_volume_s24_32ne_c, + [PA_SAMPLE_S24_32RE] = (pa_do_volume_func_t) pa_volume_s24_32re_c +}; typedef union { float f; uint32_t i; } volume_val; -typedef struct pa_sample_func_t { - pa_calc_volume_func_t calc_volume; - pa_do_volume_func_t do_volume; -} pa_sample_func_t; +typedef void (*pa_calc_volume_func_t) (void *volumes, const pa_cvolume *volume); static const pa_calc_volume_func_t calc_volume_table[] = { [PA_SAMPLE_U8] = (pa_calc_volume_func_t) calc_linear_integer_volume, @@ -1003,23 +1020,6 @@ static const pa_calc_volume_func_t calc_volume_table[] = { [PA_SAMPLE_S24_32BE] = (pa_calc_volume_func_t) calc_linear_integer_volume }; -static pa_do_volume_func_t do_volume_table[] = -{ - [PA_SAMPLE_U8] = (pa_do_volume_func_t) pa_volume_u8_c, - [PA_SAMPLE_ALAW] = (pa_do_volume_func_t) pa_volume_alaw_c, - [PA_SAMPLE_ULAW] = (pa_do_volume_func_t) pa_volume_ulaw_c, - [PA_SAMPLE_S16NE] = (pa_do_volume_func_t) pa_volume_s16ne_c, - [PA_SAMPLE_S16RE] = (pa_do_volume_func_t) pa_volume_s16re_c, - [PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c, - [PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c, - [PA_SAMPLE_S32NE] = (pa_do_volume_func_t) pa_volume_s32ne_c, - [PA_SAMPLE_S32RE] = (pa_do_volume_func_t) pa_volume_s32re_c, - [PA_SAMPLE_S24NE] = (pa_do_volume_func_t) pa_volume_s24ne_c, - [PA_SAMPLE_S24RE] = (pa_do_volume_func_t) pa_volume_s24re_c, - [PA_SAMPLE_S24_32NE] = (pa_do_volume_func_t) pa_volume_s24_32ne_c, - [PA_SAMPLE_S24_32RE] = (pa_do_volume_func_t) pa_volume_s24_32re_c -}; - void pa_volume_memchunk( pa_memchunk*c, const pa_sample_spec *spec, -- cgit From 3d008961c095cf8d41d2c61d13d446c98c892136 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Tue, 11 Aug 2009 17:10:44 +0200 Subject: sample-util: move volume code to separate file Move the volume code into a separate file with the reference C implementations. Add a function to retrieve the volume function and one to install a new one. --- src/Makefile.am | 1 + src/pulsecore/sample-util.c | 316 +---------------------------------------- src/pulsecore/sample-util.h | 6 + src/pulsecore/svolume_c.c | 335 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 347 insertions(+), 311 deletions(-) create mode 100644 src/pulsecore/svolume_c.c (limited to 'src') diff --git a/src/Makefile.am b/src/Makefile.am index 17011cd3..fc5d39fb 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -825,6 +825,7 @@ libpulsecore_@PA_MAJORMINORMICRO@_la_SOURCES = \ pulsecore/resampler.c pulsecore/resampler.h \ pulsecore/rtpoll.c pulsecore/rtpoll.h \ pulsecore/sample-util.c pulsecore/sample-util.h \ + pulsecore/svolume_c.c \ pulsecore/sconv-s16be.c pulsecore/sconv-s16be.h \ pulsecore/sconv-s16le.c pulsecore/sconv-s16le.h \ pulsecore/sconv.c pulsecore/sconv.h \ diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c index f8a4c70a..0bbd5192 100644 --- a/src/pulsecore/sample-util.c +++ b/src/pulsecore/sample-util.c @@ -687,316 +687,6 @@ size_t pa_mix( return length; } -typedef struct pa_volume_funcs { - void (*u8) (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length); - void (*alaw) (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length); - void (*ulaw) (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length); - void (*s16ne) (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length); - void (*s16re) (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length); - void (*float32ne) (float *samples, float *volumes, unsigned channels, unsigned length); - void (*float32re) (float *samples, float *volumes, unsigned channels, unsigned length); - void (*s32ne) (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length); - void (*s32re) (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length); - void (*s24ne) (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length); - void (*s24re) (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length); - void (*s24_32ne) (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length); - void (*s24_32re) (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length); -} pa_volume_funcs; - -static void -pa_volume_u8_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - for (channel = 0; length; length--) { - int32_t t, hi, lo; - - hi = volumes[channel] >> 16; - lo = volumes[channel] & 0xFFFF; - - t = (int32_t) *samples - 0x80; - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F); - *samples++ = (uint8_t) (t + 0x80); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_alaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - for (channel = 0; length; length--) { - int32_t t, hi, lo; - - hi = volumes[channel] >> 16; - lo = volumes[channel] & 0xFFFF; - - t = (int32_t) st_alaw2linear16(*samples); - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); - *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_ulaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - for (channel = 0; length; length--) { - int32_t t, hi, lo; - - hi = volumes[channel] >> 16; - lo = volumes[channel] & 0xFFFF; - - t = (int32_t) st_ulaw2linear16(*samples); - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); - *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_s16ne_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (int16_t); - - for (channel = 0; length; length--) { - int32_t t, hi, lo; - - /* Multiplying the 32bit volume factor with the 16bit - * sample might result in an 48bit value. We want to - * do without 64 bit integers and hence do the - * multiplication independantly for the HI and LO part - * of the volume. */ - - hi = volumes[channel] >> 16; - lo = volumes[channel] & 0xFFFF; - - t = (int32_t)(*samples); - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); - *samples++ = (int16_t) t; - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_s16re_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (int16_t); - - for (channel = 0; length; length--) { - int32_t t, hi, lo; - - hi = volumes[channel] >> 16; - lo = volumes[channel] & 0xFFFF; - - t = (int32_t) PA_INT16_SWAP(*samples); - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); - *samples++ = PA_INT16_SWAP((int16_t) t); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_float32ne_c (float *samples, float *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (float); - - for (channel = 0; length; length--) { - *samples++ *= volumes[channel]; - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_float32re_c (float *samples, float *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (float); - - for (channel = 0; length; length--) { - float t; - - t = PA_FLOAT32_SWAP(*samples); - t *= volumes[channel]; - *samples++ = PA_FLOAT32_SWAP(t); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_s32ne_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (int32_t); - - for (channel = 0; length; length--) { - int64_t t; - - t = (int64_t)(*samples); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - *samples++ = (int32_t) t; - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_s32re_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (int32_t); - - for (channel = 0; length; length--) { - int64_t t; - - t = (int64_t) PA_INT32_SWAP(*samples); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - *samples++ = PA_INT32_SWAP((int32_t) t); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_s24ne_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - uint8_t *e; - - e = samples + length; - - for (channel = 0; samples < e; samples += 3) { - int64_t t; - - t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8)); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_s24re_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - uint8_t *e; - - e = samples + length; - - for (channel = 0; samples < e; samples += 3) { - int64_t t; - - t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8)); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_s24_32ne_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (uint32_t); - - for (channel = 0; length; length--) { - int64_t t; - - t = (int64_t) ((int32_t) (*samples << 8)); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - *samples++ = ((uint32_t) ((int32_t) t)) >> 8; - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_s24_32re_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (uint32_t); - - for (channel = 0; length; length--) { - int64_t t; - - t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8)); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -typedef void (*pa_do_volume_func_t) (void *samples, void *volumes, unsigned channels, unsigned length); - -typedef struct pa_sample_func_t { - pa_do_volume_func_t do_volume; -} pa_sample_func_t; - -static pa_do_volume_func_t do_volume_table[] = -{ - [PA_SAMPLE_U8] = (pa_do_volume_func_t) pa_volume_u8_c, - [PA_SAMPLE_ALAW] = (pa_do_volume_func_t) pa_volume_alaw_c, - [PA_SAMPLE_ULAW] = (pa_do_volume_func_t) pa_volume_ulaw_c, - [PA_SAMPLE_S16NE] = (pa_do_volume_func_t) pa_volume_s16ne_c, - [PA_SAMPLE_S16RE] = (pa_do_volume_func_t) pa_volume_s16re_c, - [PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c, - [PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c, - [PA_SAMPLE_S32NE] = (pa_do_volume_func_t) pa_volume_s32ne_c, - [PA_SAMPLE_S32RE] = (pa_do_volume_func_t) pa_volume_s32re_c, - [PA_SAMPLE_S24NE] = (pa_do_volume_func_t) pa_volume_s24ne_c, - [PA_SAMPLE_S24RE] = (pa_do_volume_func_t) pa_volume_s24re_c, - [PA_SAMPLE_S24_32NE] = (pa_do_volume_func_t) pa_volume_s24_32ne_c, - [PA_SAMPLE_S24_32RE] = (pa_do_volume_func_t) pa_volume_s24_32re_c -}; - typedef union { float f; uint32_t i; @@ -1027,6 +717,7 @@ void pa_volume_memchunk( void *ptr; volume_val linear[PA_CHANNELS_MAX]; + pa_do_volume_func_t do_volume; pa_assert(c); pa_assert(spec); @@ -1051,8 +742,11 @@ void pa_volume_memchunk( ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index; + do_volume = pa_get_volume_func (spec->format); + pa_assert(do_volume); + calc_volume_table[spec->format] ((void *)linear, volume); - do_volume_table[spec->format] (ptr, (void *)linear, spec->channels, c->length); + do_volume (ptr, (void *)linear, spec->channels, c->length); pa_memblock_release(c->memblock); } diff --git a/src/pulsecore/sample-util.h b/src/pulsecore/sample-util.h index 6a306c11..278b88b0 100644 --- a/src/pulsecore/sample-util.h +++ b/src/pulsecore/sample-util.h @@ -86,6 +86,12 @@ void pa_memchunk_dump_to_file(pa_memchunk *c, const char *fn); void pa_memchunk_sine(pa_memchunk *c, pa_mempool *pool, unsigned rate, unsigned freq); +typedef void (*pa_do_volume_func_t) (void *samples, void *volumes, unsigned channels, unsigned length); + +pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f); +void pa_set_volume_func(pa_sample_format_t f, pa_do_volume_func_t func); + + #define PA_CHANNEL_POSITION_MASK_LEFT \ (PA_CHANNEL_POSITION_MASK(PA_CHANNEL_POSITION_FRONT_LEFT) \ | PA_CHANNEL_POSITION_MASK(PA_CHANNEL_POSITION_REAR_LEFT) \ diff --git a/src/pulsecore/svolume_c.c b/src/pulsecore/svolume_c.c new file mode 100644 index 00000000..2148a573 --- /dev/null +++ b/src/pulsecore/svolume_c.c @@ -0,0 +1,335 @@ +/*** + This file is part of PulseAudio. + + Copyright 2004-2006 Lennart Poettering + Copyright 2006 Pierre Ossman for Cendio AB + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of the License, + or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with PulseAudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#ifdef HAVE_CONFIG_H +#include +#endif + + +#include +#include +#include + +#include "sample-util.h" +#include "endianmacros.h" + +static void +pa_volume_u8_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + for (channel = 0; length; length--) { + int32_t t, hi, lo; + + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; + + t = (int32_t) *samples - 0x80; + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F); + *samples++ = (uint8_t) (t + 0x80); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_alaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + for (channel = 0; length; length--) { + int32_t t, hi, lo; + + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; + + t = (int32_t) st_alaw2linear16(*samples); + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); + *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_ulaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + for (channel = 0; length; length--) { + int32_t t, hi, lo; + + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; + + t = (int32_t) st_ulaw2linear16(*samples); + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); + *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_s16ne_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (int16_t); + + for (channel = 0; length; length--) { + int32_t t, hi, lo; + + /* Multiplying the 32bit volume factor with the 16bit + * sample might result in an 48bit value. We want to + * do without 64 bit integers and hence do the + * multiplication independantly for the HI and LO part + * of the volume. */ + + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; + + t = (int32_t)(*samples); + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); + *samples++ = (int16_t) t; + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_s16re_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (int16_t); + + for (channel = 0; length; length--) { + int32_t t, hi, lo; + + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; + + t = (int32_t) PA_INT16_SWAP(*samples); + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); + *samples++ = PA_INT16_SWAP((int16_t) t); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_float32ne_c (float *samples, float *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (float); + + for (channel = 0; length; length--) { + *samples++ *= volumes[channel]; + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_float32re_c (float *samples, float *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (float); + + for (channel = 0; length; length--) { + float t; + + t = PA_FLOAT32_SWAP(*samples); + t *= volumes[channel]; + *samples++ = PA_FLOAT32_SWAP(t); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_s32ne_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (int32_t); + + for (channel = 0; length; length--) { + int64_t t; + + t = (int64_t)(*samples); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + *samples++ = (int32_t) t; + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_s32re_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (int32_t); + + for (channel = 0; length; length--) { + int64_t t; + + t = (int64_t) PA_INT32_SWAP(*samples); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + *samples++ = PA_INT32_SWAP((int32_t) t); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_s24ne_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + uint8_t *e; + + e = samples + length; + + for (channel = 0; samples < e; samples += 3) { + int64_t t; + + t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8)); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_s24re_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + uint8_t *e; + + e = samples + length; + + for (channel = 0; samples < e; samples += 3) { + int64_t t; + + t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8)); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_s24_32ne_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (uint32_t); + + for (channel = 0; length; length--) { + int64_t t; + + t = (int64_t) ((int32_t) (*samples << 8)); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + *samples++ = ((uint32_t) ((int32_t) t)) >> 8; + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_s24_32re_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (uint32_t); + + for (channel = 0; length; length--) { + int64_t t; + + t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8)); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static pa_do_volume_func_t do_volume_table[] = +{ + [PA_SAMPLE_U8] = (pa_do_volume_func_t) pa_volume_u8_c, + [PA_SAMPLE_ALAW] = (pa_do_volume_func_t) pa_volume_alaw_c, + [PA_SAMPLE_ULAW] = (pa_do_volume_func_t) pa_volume_ulaw_c, + [PA_SAMPLE_S16NE] = (pa_do_volume_func_t) pa_volume_s16ne_c, + [PA_SAMPLE_S16RE] = (pa_do_volume_func_t) pa_volume_s16re_c, + [PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c, + [PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c, + [PA_SAMPLE_S32NE] = (pa_do_volume_func_t) pa_volume_s32ne_c, + [PA_SAMPLE_S32RE] = (pa_do_volume_func_t) pa_volume_s32re_c, + [PA_SAMPLE_S24NE] = (pa_do_volume_func_t) pa_volume_s24ne_c, + [PA_SAMPLE_S24RE] = (pa_do_volume_func_t) pa_volume_s24re_c, + [PA_SAMPLE_S24_32NE] = (pa_do_volume_func_t) pa_volume_s24_32ne_c, + [PA_SAMPLE_S24_32RE] = (pa_do_volume_func_t) pa_volume_s24_32re_c +}; + +pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f) { + pa_assert(f >= 0); + pa_assert(f < PA_SAMPLE_MAX); + + return do_volume_table[f]; +} + +void pa_set_volume_func(pa_sample_format_t f, pa_do_volume_func_t func) { + pa_assert(f >= 0); + pa_assert(f < PA_SAMPLE_MAX); + + do_volume_table[f] = func; +} -- cgit From 2d73f13567ad03efe798d07eda87fa776b0505f2 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 12 Aug 2009 17:03:30 +0200 Subject: samples-util: add padding to volume array Pad the volume array with a copy of the start. We'll need this later to be able to write optimized functions. --- src/pulsecore/sample-util.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c index 0bbd5192..677f914a 100644 --- a/src/pulsecore/sample-util.c +++ b/src/pulsecore/sample-util.c @@ -103,24 +103,36 @@ void* pa_silence_memory(void *p, size_t length, const pa_sample_spec *spec) { return p; } +#define VOLUME_PADDING 32 + static void calc_linear_integer_volume(int32_t linear[], const pa_cvolume *volume) { - unsigned channel; + unsigned channel, nchannels, padding; pa_assert(linear); pa_assert(volume); - for (channel = 0; channel < volume->channels; channel++) + nchannels = volume->channels; + + for (channel = 0; channel < nchannels; channel++) linear[channel] = (int32_t) lrint(pa_sw_volume_to_linear(volume->values[channel]) * 0x10000); + + for (padding = 0; padding < VOLUME_PADDING; padding++, channel++) + linear[channel] = linear[padding]; } static void calc_linear_float_volume(float linear[], const pa_cvolume *volume) { - unsigned channel; + unsigned channel, nchannels, padding; pa_assert(linear); pa_assert(volume); - for (channel = 0; channel < volume->channels; channel++) + nchannels = volume->channels; + + for (channel = 0; channel < nchannels; channel++) linear[channel] = (float) pa_sw_volume_to_linear(volume->values[channel]); + + for (padding = 0; padding < VOLUME_PADDING; padding++, channel++) + linear[channel] = linear[padding]; } static void calc_linear_integer_stream_volumes(pa_mix_info streams[], unsigned nstreams, const pa_cvolume *volume, const pa_sample_spec *spec) { @@ -716,7 +728,7 @@ void pa_volume_memchunk( const pa_cvolume *volume) { void *ptr; - volume_val linear[PA_CHANNELS_MAX]; + volume_val linear[PA_CHANNELS_MAX + VOLUME_PADDING]; pa_do_volume_func_t do_volume; pa_assert(c); -- cgit From 3a0b012ee016e2fe40f49c72da119cb89d2ba312 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 12 Aug 2009 17:08:41 +0200 Subject: volume: add first mmx optimized function Add code for an mmx optimized version of s16ne volume scaling. Install the custom function. --- src/Makefile.am | 1 + src/daemon/main.c | 2 + src/pulsecore/sample-util.h | 1 + src/pulsecore/svolume_mmx.c | 424 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 428 insertions(+) create mode 100644 src/pulsecore/svolume_mmx.c (limited to 'src') diff --git a/src/Makefile.am b/src/Makefile.am index fc5d39fb..e7a99003 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -826,6 +826,7 @@ libpulsecore_@PA_MAJORMINORMICRO@_la_SOURCES = \ pulsecore/rtpoll.c pulsecore/rtpoll.h \ pulsecore/sample-util.c pulsecore/sample-util.h \ pulsecore/svolume_c.c \ + pulsecore/svolume_mmx.c \ pulsecore/sconv-s16be.c pulsecore/sconv-s16be.h \ pulsecore/sconv-s16le.c pulsecore/sconv-s16le.h \ pulsecore/sconv.c pulsecore/sconv.h \ diff --git a/src/daemon/main.c b/src/daemon/main.c index 8521e720..e3c395f2 100644 --- a/src/daemon/main.c +++ b/src/daemon/main.c @@ -821,6 +821,8 @@ int main(int argc, char *argv[]) { pa_memtrap_install(); + pa_volume_func_init_mmx(); + pa_assert_se(mainloop = pa_mainloop_new()); if (!(c = pa_core_new(pa_mainloop_get_api(mainloop), !conf->disable_shm, conf->shm_size))) { diff --git a/src/pulsecore/sample-util.h b/src/pulsecore/sample-util.h index 278b88b0..00b9ae0b 100644 --- a/src/pulsecore/sample-util.h +++ b/src/pulsecore/sample-util.h @@ -91,6 +91,7 @@ typedef void (*pa_do_volume_func_t) (void *samples, void *volumes, unsigned chan pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f); void pa_set_volume_func(pa_sample_format_t f, pa_do_volume_func_t func); +void pa_volume_func_init_mmx(void); #define PA_CHANNEL_POSITION_MASK_LEFT \ (PA_CHANNEL_POSITION_MASK(PA_CHANNEL_POSITION_FRONT_LEFT) \ diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c new file mode 100644 index 00000000..9f49a624 --- /dev/null +++ b/src/pulsecore/svolume_mmx.c @@ -0,0 +1,424 @@ +/*** + This file is part of PulseAudio. + + Copyright 2004-2006 Lennart Poettering + Copyright 2009 Wim Taymans + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of the License, + or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with PulseAudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include +#include +#include +#include + +#include "sample-util.h" +#include "endianmacros.h" + +#if 0 +static void +pa_volume_u8_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + for (channel = 0; length; length--) { + int32_t t, hi, lo; + + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; + + t = (int32_t) *samples - 0x80; + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F); + *samples++ = (uint8_t) (t + 0x80); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_alaw_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + for (channel = 0; length; length--) { + int32_t t, hi, lo; + + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; + + t = (int32_t) st_alaw2linear16(*samples); + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); + *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_ulaw_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + for (channel = 0; length; length--) { + int32_t t, hi, lo; + + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; + + t = (int32_t) st_ulaw2linear16(*samples); + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); + *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} +#endif + +static void +pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + int64_t channel, temp; + + /* the max number of samples we process at a time */ + channels = MAX (4, channels); + +#define VOLUME_32x16(s,v) /* v1_h | v1_l | v0_h | v0_l */ \ + " pxor %%mm4, %%mm4 \n\t" \ + " punpcklwd %%mm4, "#s" \n\t" /* 0 | p1 | 0 | p0 */ \ + " pcmpgtw "#s", %%mm4 \n\t" /* select sign from sample */ \ + " pand "#v", %%mm4 \n\t" /* extract correction factors */ \ + " movq "#s", %%mm5 \n\t" \ + " pmulhuw "#v", "#s" \n\t" /* 0 | p1*v1lh | 0 | p0*v0lh */ \ + " psubd %%mm4, "#s" \n\t" /* sign correction */ \ + " psrld $16, "#v" \n\t" /* 0 | v1h | 0 | v0h */ \ + " pmaddwd %%mm5, "#v" \n\t" /* p1 * v1h | p0 * v0h */ \ + " paddd "#s", "#v" \n\t" /* p1 * v1 | p0 * v0 */ \ + " packssdw "#v", "#v" \n\t" /* p0*v0 | p1*v1 | p0*v0 | p1*v1 */ + +#define MOD_ADD(a,b) \ + " add "#a", %3 \n\t" \ + " mov %3, %4 \n\t" \ + " sub %5, %4 \n\t" \ + " cmp %3, "#b" \n\t" \ + " cmovae %4, %3 \n\t" + + __asm__ __volatile__ ( + " xor %3, %3 \n\t" + " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */ + + " test $1, %2 \n\t" /* check for odd samples */ + " je 2f \n\t" + + " movd (%1, %3, 4), %%mm0 \n\t" /* do odd samples */ + " movw (%0), %%ax \n\t" + " movd %%eax, %%mm1 \n\t" + VOLUME_32x16 (%%mm1, %%mm0) + " movd %%mm0, %%eax \n\t" + " movw %%ax, (%0) \n\t" + " add $2, %0 \n\t" + MOD_ADD ($1, %5) + " dec %2 \n\t" + + "2: \n\t" + " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */ + " test $1, %2 \n\t" /* check for odd samples */ + " je 4f \n\t" + + "3: \n\t" /* do samples in pairs of 2 */ + " movq (%1, %3, 4), %%mm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */ + " movd (%0), %%mm1 \n\t" /* X | X | p1 | p0 */ + VOLUME_32x16 (%%mm1, %%mm0) + " movd %%mm0, (%0) \n\t" + " add $4, %0 \n\t" + MOD_ADD ($2, %5) + " dec %2 \n\t" + + "4: \n\t" + " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */ + " cmp $0, %2 \n\t" + " je 6f \n\t" + + "5: \n\t" /* do samples in pairs of 4 */ + " movq (%1, %3, 4), %%mm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */ + " movq 8(%1, %3, 4), %%mm2 \n\t" /* v3_h | v3_l | v2_h | v2_l */ + " movd (%0), %%mm1 \n\t" /* X | X | p1 | p0 */ + " movd 4(%0), %%mm3 \n\t" /* X | X | p3 | p2 */ + VOLUME_32x16 (%%mm1, %%mm0) + VOLUME_32x16 (%%mm3, %%mm2) + " movd %%mm0, (%0) \n\t" + " movd %%mm2, 4(%0) \n\t" + " add $8, %0 \n\t" + MOD_ADD ($4, %5) + " dec %2 \n\t" + " jne 5b \n\t" + + "6: \n\t" + " emms \n\t" + + : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((int64_t)channel), "=r" (temp) + : "r" ((int64_t)channels) + : "rax", "cc" + ); +} + +#if 0 +static void +pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (int16_t); + + for (channel = 0; length; length--) { + int32_t t, hi, lo; + + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; + + t = (int32_t) PA_INT16_SWAP(*samples); + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); + *samples++ = PA_INT16_SWAP((int16_t) t); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_float32ne_mmx (float *samples, float *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (float); + + for (channel = 0; length; length--) { + *samples++ *= volumes[channel]; + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_float32re_mmx (float *samples, float *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (float); + + for (channel = 0; length; length--) { + float t; + + t = PA_FLOAT32_SWAP(*samples); + t *= volumes[channel]; + *samples++ = PA_FLOAT32_SWAP(t); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_s32ne_mmx (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (int32_t); + + for (channel = 0; length; length--) { + int64_t t; + + t = (int64_t)(*samples); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + *samples++ = (int32_t) t; + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_s32re_mmx (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (int32_t); + + for (channel = 0; length; length--) { + int64_t t; + + t = (int64_t) PA_INT32_SWAP(*samples); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + *samples++ = PA_INT32_SWAP((int32_t) t); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_s24ne_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + uint8_t *e; + + e = samples + length; + + for (channel = 0; samples < e; samples += 3) { + int64_t t; + + t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8)); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_s24re_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + uint8_t *e; + + e = samples + length; + + for (channel = 0; samples < e; samples += 3) { + int64_t t; + + t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8)); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_s24_32ne_mmx (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (uint32_t); + + for (channel = 0; length; length--) { + int64_t t; + + t = (int64_t) ((int32_t) (*samples << 8)); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + *samples++ = ((uint32_t) ((int32_t) t)) >> 8; + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_s24_32re_mmx (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (uint32_t); + + for (channel = 0; length; length--) { + int64_t t; + + t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8)); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} +#endif + +#undef RUN_TEST + +#ifdef RUN_TEST +#define CHANNELS 1 +#define SAMPLES 1021 +#define TIMES 1000 + +static void run_test (void) { + int16_t samples[SAMPLES]; + int16_t samples_ref[SAMPLES]; + int16_t samples_orig[SAMPLES]; + int32_t volumes[CHANNELS]; + int i, j; + pa_do_volume_func_t func; + + func = pa_get_volume_func (PA_SAMPLE_S16NE); + + printf ("checking\n"); + + for (j = 0; j < TIMES; j++) { + pa_random (samples, sizeof (samples)); + memcpy (samples_ref, samples, sizeof (samples)); + memcpy (samples_orig, samples, sizeof (samples)); + + for (i = 0; i < CHANNELS; i++) { + volumes[i] = rand() >> 15; + } + + pa_volume_s16ne_mmx (samples, volumes, CHANNELS, SAMPLES * sizeof (int16_t)); + func (samples_ref, volumes, CHANNELS, SAMPLES * sizeof (int16_t)); + + for (i = 0; i < SAMPLES; i++) { + if (samples[i] != samples_ref[i]) { + printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], + samples_orig[i], volumes[i % CHANNELS]); + } +#if 0 + else + printf ("%d: %04x == %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], + samples_orig[i], volumes[i % CHANNELS]); +#endif + } + } +} +#endif + +void pa_volume_func_init_mmx (void) { + pa_log_info("Initialising MMX optimized functions."); + +#ifdef RUN_TEST + run_test (); +#endif + + pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx); +} -- cgit From 08f3e16c84fabca9c6789440f98ff8dca62eb81a Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 12 Aug 2009 20:43:37 +0200 Subject: volume_mmx: fix mmx code a bit --- src/pulsecore/svolume_mmx.c | 46 +++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) (limited to 'src') diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c index 9f49a624..6dcc26c2 100644 --- a/src/pulsecore/svolume_mmx.c +++ b/src/pulsecore/svolume_mmx.c @@ -104,14 +104,15 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi { int64_t channel, temp; - /* the max number of samples we process at a time */ + /* the max number of samples we process at a time, this is also the max amount + * we overread the volume array, which should have enough padding. */ channels = MAX (4, channels); #define VOLUME_32x16(s,v) /* v1_h | v1_l | v0_h | v0_l */ \ " pxor %%mm4, %%mm4 \n\t" \ " punpcklwd %%mm4, "#s" \n\t" /* 0 | p1 | 0 | p0 */ \ " pcmpgtw "#s", %%mm4 \n\t" /* select sign from sample */ \ - " pand "#v", %%mm4 \n\t" /* extract correction factors */ \ + " pand "#v", %%mm4 \n\t" /* extract sign correction factors */ \ " movq "#s", %%mm5 \n\t" \ " pmulhuw "#v", "#s" \n\t" /* 0 | p1*v1lh | 0 | p0*v0lh */ \ " psubd %%mm4, "#s" \n\t" /* sign correction */ \ @@ -123,8 +124,8 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi #define MOD_ADD(a,b) \ " add "#a", %3 \n\t" \ " mov %3, %4 \n\t" \ - " sub %5, %4 \n\t" \ - " cmp %3, "#b" \n\t" \ + " sub "#b", %4 \n\t" \ + " cmp "#b", %3 \n\t" \ " cmovae %4, %3 \n\t" __asm__ __volatile__ ( @@ -135,14 +136,13 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi " je 2f \n\t" " movd (%1, %3, 4), %%mm0 \n\t" /* do odd samples */ - " movw (%0), %%ax \n\t" - " movd %%eax, %%mm1 \n\t" + " movw (%0), %4 \n\t" + " movd %4, %%mm1 \n\t" VOLUME_32x16 (%%mm1, %%mm0) - " movd %%mm0, %%eax \n\t" - " movw %%ax, (%0) \n\t" + " movd %%mm0, %4 \n\t" + " movw %4, (%0) \n\t" " add $2, %0 \n\t" MOD_ADD ($1, %5) - " dec %2 \n\t" "2: \n\t" " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */ @@ -156,7 +156,6 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi " movd %%mm0, (%0) \n\t" " add $4, %0 \n\t" MOD_ADD ($2, %5) - " dec %2 \n\t" "4: \n\t" " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */ @@ -180,9 +179,9 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi "6: \n\t" " emms \n\t" - : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((int64_t)channel), "=r" (temp) + : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((int64_t)channel), "=&r" (temp) : "r" ((int64_t)channels) - : "rax", "cc" + : "cc" ); } @@ -370,7 +369,7 @@ pa_volume_s24_32re_mmx (uint32_t *samples, int32_t *volumes, unsigned channels, #undef RUN_TEST #ifdef RUN_TEST -#define CHANNELS 1 +#define CHANNELS 2 #define SAMPLES 1021 #define TIMES 1000 @@ -378,25 +377,32 @@ static void run_test (void) { int16_t samples[SAMPLES]; int16_t samples_ref[SAMPLES]; int16_t samples_orig[SAMPLES]; - int32_t volumes[CHANNELS]; - int i, j; + int32_t volumes[CHANNELS + 16]; + int i, j, padding; pa_do_volume_func_t func; func = pa_get_volume_func (PA_SAMPLE_S16NE); - printf ("checking\n"); + printf ("checking %d\n", sizeof (samples)); for (j = 0; j < TIMES; j++) { + /* + for (i = 0; i < SAMPLES; i++) { + samples[i] samples_ref[i] = samples_orig[i] = rand() >> 16; + } + */ + pa_random (samples, sizeof (samples)); memcpy (samples_ref, samples, sizeof (samples)); memcpy (samples_orig, samples, sizeof (samples)); - for (i = 0; i < CHANNELS; i++) { + for (i = 0; i < CHANNELS; i++) volumes[i] = rand() >> 15; - } + for (padding = 0; padding < 16; padding++, i++) + volumes[i] = volumes[padding]; - pa_volume_s16ne_mmx (samples, volumes, CHANNELS, SAMPLES * sizeof (int16_t)); - func (samples_ref, volumes, CHANNELS, SAMPLES * sizeof (int16_t)); + pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples)); + func (samples_ref, volumes, CHANNELS, sizeof (samples)); for (i = 0; i < SAMPLES; i++) { if (samples[i] != samples_ref[i]) { -- cgit From 7086784573e9e6c92d4c34404f18891c2d19872a Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 12 Aug 2009 20:44:12 +0200 Subject: volume_sse: add sse optimisations --- src/Makefile.am | 2 +- src/daemon/main.c | 1 + src/pulsecore/sample-util.h | 1 + src/pulsecore/svolume_sse.c | 437 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 440 insertions(+), 1 deletion(-) create mode 100644 src/pulsecore/svolume_sse.c (limited to 'src') diff --git a/src/Makefile.am b/src/Makefile.am index e7a99003..b692e4a9 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -826,7 +826,7 @@ libpulsecore_@PA_MAJORMINORMICRO@_la_SOURCES = \ pulsecore/rtpoll.c pulsecore/rtpoll.h \ pulsecore/sample-util.c pulsecore/sample-util.h \ pulsecore/svolume_c.c \ - pulsecore/svolume_mmx.c \ + pulsecore/svolume_mmx.c pulsecore/svolume_sse.c \ pulsecore/sconv-s16be.c pulsecore/sconv-s16be.h \ pulsecore/sconv-s16le.c pulsecore/sconv-s16le.h \ pulsecore/sconv.c pulsecore/sconv.h \ diff --git a/src/daemon/main.c b/src/daemon/main.c index e3c395f2..3c5f7f95 100644 --- a/src/daemon/main.c +++ b/src/daemon/main.c @@ -822,6 +822,7 @@ int main(int argc, char *argv[]) { pa_memtrap_install(); pa_volume_func_init_mmx(); + pa_volume_func_init_sse(); pa_assert_se(mainloop = pa_mainloop_new()); diff --git a/src/pulsecore/sample-util.h b/src/pulsecore/sample-util.h index 00b9ae0b..563dbb6a 100644 --- a/src/pulsecore/sample-util.h +++ b/src/pulsecore/sample-util.h @@ -92,6 +92,7 @@ pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f); void pa_set_volume_func(pa_sample_format_t f, pa_do_volume_func_t func); void pa_volume_func_init_mmx(void); +void pa_volume_func_init_sse(void); #define PA_CHANNEL_POSITION_MASK_LEFT \ (PA_CHANNEL_POSITION_MASK(PA_CHANNEL_POSITION_FRONT_LEFT) \ diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c new file mode 100644 index 00000000..2d4c541b --- /dev/null +++ b/src/pulsecore/svolume_sse.c @@ -0,0 +1,437 @@ +/*** + This file is part of PulseAudio. + + Copyright 2004-2006 Lennart Poettering + Copyright 2009 Wim Taymans + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of the License, + or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with PulseAudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include +#include +#include +#include + +#include "sample-util.h" +#include "endianmacros.h" + +#if 0 +static void +pa_volume_u8_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + for (channel = 0; length; length--) { + int32_t t, hi, lo; + + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; + + t = (int32_t) *samples - 0x80; + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F); + *samples++ = (uint8_t) (t + 0x80); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_alaw_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + for (channel = 0; length; length--) { + int32_t t, hi, lo; + + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; + + t = (int32_t) st_alaw2linear16(*samples); + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); + *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_ulaw_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + for (channel = 0; length; length--) { + int32_t t, hi, lo; + + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; + + t = (int32_t) st_ulaw2linear16(*samples); + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); + *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} +#endif + +static void +pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + int64_t channel; + int64_t temp; + + /* the max number of samples we process at a time, this is also the max amount + * we overread the volume array, which should have enough padding. */ + channels = MAX (8, channels); + +#define VOLUME_32x16(s,v) /* v1_h | v1_l | v0_h | v0_l */ \ + " pxor %%xmm4, %%xmm4 \n\t" \ + " punpcklwd %%xmm4, "#s" \n\t" /* 0 | p1 | 0 | p0 */ \ + " pcmpgtw "#s", %%xmm4 \n\t" /* select sign from sample */ \ + " pand "#v", %%xmm4 \n\t" /* extract sign correction factors */ \ + " movdqa "#s", %%xmm5 \n\t" \ + " pmulhuw "#v", "#s" \n\t" /* 0 | p1*v1lh | 0 | p0*v0lh */ \ + " psubd %%xmm4, "#s" \n\t" /* sign correction */ \ + " psrld $16, "#v" \n\t" /* 0 | v1h | 0 | v0h */ \ + " pmaddwd %%xmm5, "#v" \n\t" /* p1 * v1h | p0 * v0h */ \ + " paddd "#s", "#v" \n\t" /* p1 * v1 | p0 * v0 */ \ + " packssdw "#v", "#v" \n\t" /* p0*v0 | p1*v1 | p0*v0 | p1*v1 */ + +#define MOD_ADD(a,b) \ + " add "#a", %3 \n\t" \ + " mov %3, %4 \n\t" \ + " sub "#b", %4 \n\t" \ + " cmp "#b", %3 \n\t" \ + " cmovae %4, %3 \n\t" + + __asm__ __volatile__ ( + " xor %3, %3 \n\t" + " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */ + + " test $1, %2 \n\t" /* check for odd samples */ + " je 2f \n\t" + + " movd (%1, %3, 4), %%xmm0 \n\t" /* do odd sample */ + " movw (%0), %4 \n\t" + " movd %4, %%xmm1 \n\t" + VOLUME_32x16 (%%xmm1, %%xmm0) + " movd %%xmm0, %4 \n\t" + " movw %4, (%0) \n\t" + " add $2, %0 \n\t" + MOD_ADD ($1, %5) + + "2: \n\t" + " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */ + " test $1, %2 \n\t" /* check for odd samples */ + " je 4f \n\t" + + "3: \n\t" /* do samples in pairs of 2 */ + " movq (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */ + " movd (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */ + VOLUME_32x16 (%%xmm1, %%xmm0) + " movd %%xmm0, (%0) \n\t" + " add $4, %0 \n\t" + MOD_ADD ($2, %5) + + "4: \n\t" + " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */ + " test $1, %2 \n\t" /* check for odd samples */ + " je 6f \n\t" + + "5: \n\t" /* do samples in pairs of 4 */ + " movdqa (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */ + " movq (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */ + VOLUME_32x16 (%%xmm1, %%xmm0) + " movq %%xmm0, (%0) \n\t" + " add $8, %0 \n\t" + MOD_ADD ($4, %5) + + "6: \n\t" + " sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */ + " cmp $0, %2 \n\t" + " je 8f \n\t" + + "7: \n\t" /* do samples in pairs of 8 */ + " movdqa (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */ + " movdqa 16(%1, %3, 4), %%xmm2 \n\t" /* v3_h | v3_l | v2_h | v2_l */ + " movq (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */ + " movq 8(%0), %%xmm3 \n\t" /* X | X | p3 | p2 */ + VOLUME_32x16 (%%xmm1, %%xmm0) + VOLUME_32x16 (%%xmm3, %%xmm2) + " movq %%xmm0, (%0) \n\t" + " movq %%xmm2, 8(%0) \n\t" + " add $16, %0 \n\t" + MOD_ADD ($8, %5) + " dec %2 \n\t" + " jne 7b \n\t" + "8: \n\t" + + : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp) + : "r" ((int64_t)channels) + : "cc" + ); +} + +#if 0 +static void +pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (int16_t); + + for (channel = 0; length; length--) { + int32_t t, hi, lo; + + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; + + t = (int32_t) PA_INT16_SWAP(*samples); + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); + *samples++ = PA_INT16_SWAP((int16_t) t); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_float32ne_sse (float *samples, float *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (float); + + for (channel = 0; length; length--) { + *samples++ *= volumes[channel]; + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_float32re_sse (float *samples, float *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (float); + + for (channel = 0; length; length--) { + float t; + + t = PA_FLOAT32_SWAP(*samples); + t *= volumes[channel]; + *samples++ = PA_FLOAT32_SWAP(t); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_s32ne_sse (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (int32_t); + + for (channel = 0; length; length--) { + int64_t t; + + t = (int64_t)(*samples); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + *samples++ = (int32_t) t; + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_s32re_sse (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (int32_t); + + for (channel = 0; length; length--) { + int64_t t; + + t = (int64_t) PA_INT32_SWAP(*samples); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + *samples++ = PA_INT32_SWAP((int32_t) t); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_s24ne_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + uint8_t *e; + + e = samples + length; + + for (channel = 0; samples < e; samples += 3) { + int64_t t; + + t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8)); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_s24re_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + uint8_t *e; + + e = samples + length; + + for (channel = 0; samples < e; samples += 3) { + int64_t t; + + t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8)); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_s24_32ne_sse (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (uint32_t); + + for (channel = 0; length; length--) { + int64_t t; + + t = (int64_t) ((int32_t) (*samples << 8)); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + *samples++ = ((uint32_t) ((int32_t) t)) >> 8; + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} + +static void +pa_volume_s24_32re_sse (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + unsigned channel; + + length /= sizeof (uint32_t); + + for (channel = 0; length; length--) { + int64_t t; + + t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8)); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8); + + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } +} +#endif + +#undef RUN_TEST + +#ifdef RUN_TEST +#define CHANNELS 2 +#define SAMPLES 1021 +#define TIMES 1000 + +static void run_test (void) { + int16_t samples[SAMPLES]; + int16_t samples_ref[SAMPLES]; + int16_t samples_orig[SAMPLES]; + int32_t volumes[CHANNELS + 16]; + int i, j, padding; + pa_do_volume_func_t func; + + func = pa_get_volume_func (PA_SAMPLE_S16NE); + + printf ("checking %d\n", sizeof (samples)); + + for (j = 0; j < TIMES; j++) { + pa_random (samples, sizeof (samples)); + memcpy (samples_ref, samples, sizeof (samples)); + memcpy (samples_orig, samples, sizeof (samples)); + + for (i = 0; i < CHANNELS; i++) + volumes[i] = rand() >> 15; + + for (padding = 0; padding < 16; padding++, i++) + volumes[i] = volumes[padding]; + + pa_volume_s16ne_sse (samples, volumes, CHANNELS, SAMPLES * sizeof (int16_t)); + func (samples_ref, volumes, CHANNELS, SAMPLES * sizeof (int16_t)); + + for (i = 0; i < SAMPLES; i++) { + if (samples[i] != samples_ref[i]) { + printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], + samples_orig[i], volumes[i % CHANNELS]); + } +#if 0 + else + printf ("%d: %04x == %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], + samples_orig[i], volumes[i % CHANNELS]); +#endif + } + } +} +#endif + +void pa_volume_func_init_sse (void) { + pa_log_info("Initialising SSE optimized functions."); + +#ifdef RUN_TEST + run_test (); +#endif + + pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_sse); +} -- cgit From 5998cf99b08d448dd5158ed6229262aa67ea4a66 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Thu, 13 Aug 2009 13:45:01 +0200 Subject: svolume: improve SSE and MMX code --- src/pulsecore/svolume_mmx.c | 200 +++++++++++++++++++++++++++++--------------- src/pulsecore/svolume_sse.c | 191 +++++++++++++++++++++++++++++------------- 2 files changed, 268 insertions(+), 123 deletions(-) (limited to 'src') diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c index 6dcc26c2..3c229456 100644 --- a/src/pulsecore/svolume_mmx.c +++ b/src/pulsecore/svolume_mmx.c @@ -99,6 +99,46 @@ pa_volume_ulaw_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsig } #endif +#define VOLUME_32x16(s,v) /* .. | vh | vl | */ \ + " pxor %%mm4, %%mm4 \n\t" /* .. | 0 | 0 | */ \ + " punpcklwd %%mm4, "#s" \n\t" /* .. | 0 | p0 | */ \ + " pcmpgtw "#v", %%mm4 \n\t" /* .. | 0 | s(vl) | */ \ + " pand "#s", %%mm4 \n\t" /* .. | 0 | (p0) | (vl >> 15) & p */ \ + " movq %%mm6, %%mm5 \n\t" /* .. | ffff | 0 | */ \ + " pand "#v", %%mm5 \n\t" /* .. | vh | 0 | */ \ + " por %%mm5, %%mm4 \n\t" /* .. | vh | (p0) | */ \ + " pmulhw "#s", "#v" \n\t" /* .. | 0 | vl*p0 | */ \ + " paddw %%mm4, "#v" \n\t" /* .. | vh | vl*p0 | vh + sign correct */ \ + " pslld $16, "#s" \n\t" /* .. | p0 | 0 | */ \ + " por %%mm7, "#s" \n\t" /* .. | p0 | 1 | */ \ + " pmaddwd "#s", "#v" \n\t" /* .. | p0 * v0 | */ \ + " packssdw "#v", "#v" \n\t" /* .. | p1*v1 | p0*v0 | */ + +#define MOD_ADD(a,b) \ + " add "#a", %3 \n\t" \ + " mov %3, %4 \n\t" \ + " sub "#b", %4 \n\t" \ + " cmp "#b", %3 \n\t" \ + " cmovae %4, %3 \n\t" + +/* swap 16 bits */ +#define SWAP_16(s) \ + " movq "#s", %%mm4 \n\t" /* .. | h l | */ \ + " psrlw $8, %%mm4 \n\t" /* .. | 0 h | */ \ + " psllw $8, "#s" \n\t" /* .. | l 0 | */ \ + " por %%mm4, "#s" \n\t" /* .. | l h | */ + +/* swap 2 registers 16 bits for better pairing */ +#define SWAP_16_2(s1,s2) \ + " movq "#s1", %%mm4 \n\t" /* .. | h l | */ \ + " movq "#s2", %%mm5 \n\t" \ + " psrlw $8, %%mm4 \n\t" /* .. | 0 h | */ \ + " psrlw $8, %%mm5 \n\t" \ + " psllw $8, "#s1" \n\t" /* .. | l 0 | */ \ + " psllw $8, "#s2" \n\t" \ + " por %%mm4, "#s1" \n\t" /* .. | l h | */ \ + " por %%mm5, "#s2" \n\t" + static void pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) { @@ -108,38 +148,22 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi * we overread the volume array, which should have enough padding. */ channels = MAX (4, channels); -#define VOLUME_32x16(s,v) /* v1_h | v1_l | v0_h | v0_l */ \ - " pxor %%mm4, %%mm4 \n\t" \ - " punpcklwd %%mm4, "#s" \n\t" /* 0 | p1 | 0 | p0 */ \ - " pcmpgtw "#s", %%mm4 \n\t" /* select sign from sample */ \ - " pand "#v", %%mm4 \n\t" /* extract sign correction factors */ \ - " movq "#s", %%mm5 \n\t" \ - " pmulhuw "#v", "#s" \n\t" /* 0 | p1*v1lh | 0 | p0*v0lh */ \ - " psubd %%mm4, "#s" \n\t" /* sign correction */ \ - " psrld $16, "#v" \n\t" /* 0 | v1h | 0 | v0h */ \ - " pmaddwd %%mm5, "#v" \n\t" /* p1 * v1h | p0 * v0h */ \ - " paddd "#s", "#v" \n\t" /* p1 * v1 | p0 * v0 */ \ - " packssdw "#v", "#v" \n\t" /* p0*v0 | p1*v1 | p0*v0 | p1*v1 */ - -#define MOD_ADD(a,b) \ - " add "#a", %3 \n\t" \ - " mov %3, %4 \n\t" \ - " sub "#b", %4 \n\t" \ - " cmp "#b", %3 \n\t" \ - " cmovae %4, %3 \n\t" - __asm__ __volatile__ ( " xor %3, %3 \n\t" " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */ + " pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */ + " pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */ + " pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */ + " psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */ " test $1, %2 \n\t" /* check for odd samples */ " je 2f \n\t" - " movd (%1, %3, 4), %%mm0 \n\t" /* do odd samples */ - " movw (%0), %4 \n\t" + " movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */ + " movw (%0), %4 \n\t" /* .. | p0 | */ " movd %4, %%mm1 \n\t" VOLUME_32x16 (%%mm1, %%mm0) - " movd %%mm0, %4 \n\t" + " movd %%mm0, %4 \n\t" /* .. | p0*v0 | */ " movw %4, (%0) \n\t" " add $2, %0 \n\t" MOD_ADD ($1, %5) @@ -149,11 +173,11 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi " test $1, %2 \n\t" /* check for odd samples */ " je 4f \n\t" - "3: \n\t" /* do samples in pairs of 2 */ - " movq (%1, %3, 4), %%mm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */ - " movd (%0), %%mm1 \n\t" /* X | X | p1 | p0 */ + "3: \n\t" /* do samples in groups of 2 */ + " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ + " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ VOLUME_32x16 (%%mm1, %%mm0) - " movd %%mm0, (%0) \n\t" + " movd %%mm0, (%0) \n\t" /* | p1*v1 | p0*v0 | */ " add $4, %0 \n\t" MOD_ADD ($2, %5) @@ -162,15 +186,15 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi " cmp $0, %2 \n\t" " je 6f \n\t" - "5: \n\t" /* do samples in pairs of 4 */ - " movq (%1, %3, 4), %%mm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */ - " movq 8(%1, %3, 4), %%mm2 \n\t" /* v3_h | v3_l | v2_h | v2_l */ - " movd (%0), %%mm1 \n\t" /* X | X | p1 | p0 */ - " movd 4(%0), %%mm3 \n\t" /* X | X | p3 | p2 */ + "5: \n\t" /* do samples in groups of 4 */ + " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ + " movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */ + " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ + " movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */ VOLUME_32x16 (%%mm1, %%mm0) VOLUME_32x16 (%%mm3, %%mm2) - " movd %%mm0, (%0) \n\t" - " movd %%mm2, 4(%0) \n\t" + " movd %%mm0, (%0) \n\t" /* | p1*v1 | p0*v0 | */ + " movd %%mm2, 4(%0) \n\t" /* | p3*v3 | p2*v2 | */ " add $8, %0 \n\t" MOD_ADD ($4, %5) " dec %2 \n\t" @@ -185,30 +209,83 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi ); } -#if 0 static void pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - unsigned channel; + int64_t channel, temp; - length /= sizeof (int16_t); + /* the max number of samples we process at a time, this is also the max amount + * we overread the volume array, which should have enough padding. */ + channels = MAX (4, channels); - for (channel = 0; length; length--) { - int32_t t, hi, lo; + __asm__ __volatile__ ( + " xor %3, %3 \n\t" + " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */ + " pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */ + " pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */ + " pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */ + " psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */ - hi = volumes[channel] >> 16; - lo = volumes[channel] & 0xFFFF; + " test $1, %2 \n\t" /* check for odd samples */ + " je 2f \n\t" - t = (int32_t) PA_INT16_SWAP(*samples); - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); - *samples++ = PA_INT16_SWAP((int16_t) t); + " movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */ + " movw (%0), %4 \n\t" /* .. | p0 | */ + " rorw $8, %4 \n\t" + " movd %4, %%mm1 \n\t" + VOLUME_32x16 (%%mm1, %%mm0) + " movd %%mm0, %4 \n\t" /* .. | p0*v0 | */ + " rorw $8, %4 \n\t" + " movw %4, (%0) \n\t" + " add $2, %0 \n\t" + MOD_ADD ($1, %5) - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } + "2: \n\t" + " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */ + " test $1, %2 \n\t" /* check for odd samples */ + " je 4f \n\t" + + "3: \n\t" /* do samples in groups of 2 */ + " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ + " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ + SWAP_16 (%%mm1) + VOLUME_32x16 (%%mm1, %%mm0) + SWAP_16 (%%mm0) + " movd %%mm0, (%0) \n\t" /* | p1*v1 | p0*v0 | */ + " add $4, %0 \n\t" + MOD_ADD ($2, %5) + + "4: \n\t" + " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */ + " cmp $0, %2 \n\t" + " je 6f \n\t" + + "5: \n\t" /* do samples in groups of 4 */ + " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ + " movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */ + " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ + " movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */ + SWAP_16_2 (%%mm1, %%mm3) + VOLUME_32x16 (%%mm1, %%mm0) + VOLUME_32x16 (%%mm3, %%mm2) + SWAP_16_2 (%%mm0, %%mm2) + " movd %%mm0, (%0) \n\t" /* | p1*v1 | p0*v0 | */ + " movd %%mm2, 4(%0) \n\t" /* | p3*v3 | p2*v2 | */ + " add $8, %0 \n\t" + MOD_ADD ($4, %5) + " dec %2 \n\t" + " jne 5b \n\t" + + "6: \n\t" + " emms \n\t" + + : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((int64_t)channel), "=&r" (temp) + : "r" ((int64_t)channels) + : "cc" + ); } +#if 0 static void pa_volume_float32ne_mmx (float *samples, float *volumes, unsigned channels, unsigned length) { @@ -366,42 +443,37 @@ pa_volume_s24_32re_mmx (uint32_t *samples, int32_t *volumes, unsigned channels, } #endif -#undef RUN_TEST +#define RUN_TEST #ifdef RUN_TEST #define CHANNELS 2 #define SAMPLES 1021 #define TIMES 1000 +#define PADDING 16 static void run_test (void) { int16_t samples[SAMPLES]; int16_t samples_ref[SAMPLES]; int16_t samples_orig[SAMPLES]; - int32_t volumes[CHANNELS + 16]; + int32_t volumes[CHANNELS + PADDING]; int i, j, padding; pa_do_volume_func_t func; - func = pa_get_volume_func (PA_SAMPLE_S16NE); + func = pa_get_volume_func (PA_SAMPLE_S16RE); - printf ("checking %d\n", sizeof (samples)); + printf ("checking MMX %d\n", sizeof (samples)); for (j = 0; j < TIMES; j++) { - /* - for (i = 0; i < SAMPLES; i++) { - samples[i] samples_ref[i] = samples_orig[i] = rand() >> 16; - } - */ - pa_random (samples, sizeof (samples)); memcpy (samples_ref, samples, sizeof (samples)); memcpy (samples_orig, samples, sizeof (samples)); for (i = 0; i < CHANNELS; i++) - volumes[i] = rand() >> 15; - for (padding = 0; padding < 16; padding++, i++) + volumes[i] = rand() >> 1; + for (padding = 0; padding < PADDING; padding++, i++) volumes[i] = volumes[padding]; - pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples)); + pa_volume_s16re_mmx (samples, volumes, CHANNELS, sizeof (samples)); func (samples_ref, volumes, CHANNELS, sizeof (samples)); for (i = 0; i < SAMPLES; i++) { @@ -409,11 +481,6 @@ static void run_test (void) { printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], samples_orig[i], volumes[i % CHANNELS]); } -#if 0 - else - printf ("%d: %04x == %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], - samples_orig[i], volumes[i % CHANNELS]); -#endif } } } @@ -427,4 +494,5 @@ void pa_volume_func_init_mmx (void) { #endif pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx); + pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_mmx); } diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c index 2d4c541b..ff583a06 100644 --- a/src/pulsecore/svolume_sse.c +++ b/src/pulsecore/svolume_sse.c @@ -99,6 +99,44 @@ pa_volume_ulaw_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsig } #endif +#define VOLUME_32x16(s,v) /* .. | vh | vl | */ \ + " pxor %%xmm4, %%xmm4 \n\t" /* .. | 0 | 0 | */ \ + " punpcklwd %%xmm4, "#s" \n\t" /* .. | 0 | p0 | */ \ + " pcmpgtw "#s", %%xmm4 \n\t" /* .. | 0 | s(p0) | */ \ + " pand "#v", %%xmm4 \n\t" /* .. | 0 | (vl) | */ \ + " movdqa "#s", %%xmm5 \n\t" \ + " pmulhuw "#v", "#s" \n\t" /* .. | 0 | vl*p0 | */ \ + " psubd %%xmm4, "#s" \n\t" /* .. | 0 | vl*p0 | + sign correct */ \ + " psrld $16, "#v" \n\t" /* .. | p0 | 0 | */ \ + " pmaddwd %%xmm5, "#v" \n\t" /* .. | p0 * vh | */ \ + " paddd "#s", "#v" \n\t" /* .. | p0 * v0 | */ \ + " packssdw "#v", "#v" \n\t" /* .. | p1*v1 | p0*v0 | */ + +#define MOD_ADD(a,b) \ + " add "#a", %3 \n\t" /* channel += inc */ \ + " mov %3, %4 \n\t" \ + " sub "#b", %4 \n\t" /* tmp = channel - channels */ \ + " cmp "#b", %3 \n\t" /* if (channel >= channels) */ \ + " cmovae %4, %3 \n\t" /* channel = tmp */ + +/* swap 16 bits */ +#define SWAP_16(s) \ + " movdqa "#s", %%xmm4 \n\t" /* .. | h l | */ \ + " psrlw $8, %%xmm4 \n\t" /* .. | 0 h | */ \ + " psllw $8, "#s" \n\t" /* .. | l 0 | */ \ + " por %%xmm4, "#s" \n\t" /* .. | l h | */ + +/* swap 2 registers 16 bits for better pairing */ +#define SWAP_16_2(s1,s2) \ + " movdqa "#s1", %%xmm4 \n\t" /* .. | h l | */ \ + " movdqa "#s2", %%xmm5 \n\t" \ + " psrlw $8, %%xmm4 \n\t" /* .. | 0 h | */ \ + " psrlw $8, %%xmm5 \n\t" \ + " psllw $8, "#s1" \n\t" /* .. | l 0 | */ \ + " psllw $8, "#s2" \n\t" \ + " por %%xmm4, "#s1" \n\t" /* .. | l h | */ \ + " por %%xmm5, "#s2" \n\t" + static void pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) { @@ -109,25 +147,83 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi * we overread the volume array, which should have enough padding. */ channels = MAX (8, channels); -#define VOLUME_32x16(s,v) /* v1_h | v1_l | v0_h | v0_l */ \ - " pxor %%xmm4, %%xmm4 \n\t" \ - " punpcklwd %%xmm4, "#s" \n\t" /* 0 | p1 | 0 | p0 */ \ - " pcmpgtw "#s", %%xmm4 \n\t" /* select sign from sample */ \ - " pand "#v", %%xmm4 \n\t" /* extract sign correction factors */ \ - " movdqa "#s", %%xmm5 \n\t" \ - " pmulhuw "#v", "#s" \n\t" /* 0 | p1*v1lh | 0 | p0*v0lh */ \ - " psubd %%xmm4, "#s" \n\t" /* sign correction */ \ - " psrld $16, "#v" \n\t" /* 0 | v1h | 0 | v0h */ \ - " pmaddwd %%xmm5, "#v" \n\t" /* p1 * v1h | p0 * v0h */ \ - " paddd "#s", "#v" \n\t" /* p1 * v1 | p0 * v0 */ \ - " packssdw "#v", "#v" \n\t" /* p0*v0 | p1*v1 | p0*v0 | p1*v1 */ + __asm__ __volatile__ ( + " xor %3, %3 \n\t" + " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */ -#define MOD_ADD(a,b) \ - " add "#a", %3 \n\t" \ - " mov %3, %4 \n\t" \ - " sub "#b", %4 \n\t" \ - " cmp "#b", %3 \n\t" \ - " cmovae %4, %3 \n\t" + " test $1, %2 \n\t" /* check for odd samples */ + " je 2f \n\t" + + " movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */ + " movw (%0), %4 \n\t" /* .. | p0 | */ + " movd %4, %%xmm1 \n\t" + VOLUME_32x16 (%%xmm1, %%xmm0) + " movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */ + " movw %4, (%0) \n\t" + " add $2, %0 \n\t" + MOD_ADD ($1, %5) + + "2: \n\t" + " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */ + " test $1, %2 \n\t" + " je 4f \n\t" + + "3: \n\t" /* do samples in groups of 2 */ + " movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */ + " movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */ + VOLUME_32x16 (%%xmm1, %%xmm0) + " movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ + " add $4, %0 \n\t" + MOD_ADD ($2, %5) + + "4: \n\t" + " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */ + " test $1, %2 \n\t" + " je 6f \n\t" + + "5: \n\t" /* do samples in groups of 4 */ + " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */ + " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */ + VOLUME_32x16 (%%xmm1, %%xmm0) + " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */ + " add $8, %0 \n\t" + MOD_ADD ($4, %5) + + "6: \n\t" + " sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */ + " cmp $0, %2 \n\t" + " je 8f \n\t" + + "7: \n\t" /* do samples in groups of 8 */ + " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */ + " movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */ + " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */ + " movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */ + VOLUME_32x16 (%%xmm1, %%xmm0) + VOLUME_32x16 (%%xmm3, %%xmm2) + " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */ + " movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */ + " add $16, %0 \n\t" + MOD_ADD ($8, %5) + " dec %2 \n\t" + " jne 7b \n\t" + "8: \n\t" + + : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp) + : "r" ((int64_t)channels) + : "cc" + ); +} + +static void +pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + int64_t channel; + int64_t temp; + + /* the max number of samples we process at a time, this is also the max amount + * we overread the volume array, which should have enough padding. */ + channels = MAX (8, channels); __asm__ __volatile__ ( " xor %3, %3 \n\t" @@ -138,9 +234,11 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi " movd (%1, %3, 4), %%xmm0 \n\t" /* do odd sample */ " movw (%0), %4 \n\t" + " rorw $8, %4 \n\t" " movd %4, %%xmm1 \n\t" VOLUME_32x16 (%%xmm1, %%xmm0) " movd %%xmm0, %4 \n\t" + " rorw $8, %4 \n\t" " movw %4, (%0) \n\t" " add $2, %0 \n\t" MOD_ADD ($1, %5) @@ -153,7 +251,9 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi "3: \n\t" /* do samples in pairs of 2 */ " movq (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */ " movd (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */ + SWAP_16 (%%xmm1) VOLUME_32x16 (%%xmm1, %%xmm0) + SWAP_16 (%%xmm0) " movd %%xmm0, (%0) \n\t" " add $4, %0 \n\t" MOD_ADD ($2, %5) @@ -164,9 +264,11 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi " je 6f \n\t" "5: \n\t" /* do samples in pairs of 4 */ - " movdqa (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */ + " movdqu (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */ " movq (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */ + SWAP_16 (%%xmm1) VOLUME_32x16 (%%xmm1, %%xmm0) + SWAP_16 (%%xmm0) " movq %%xmm0, (%0) \n\t" " add $8, %0 \n\t" MOD_ADD ($4, %5) @@ -177,12 +279,14 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi " je 8f \n\t" "7: \n\t" /* do samples in pairs of 8 */ - " movdqa (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */ - " movdqa 16(%1, %3, 4), %%xmm2 \n\t" /* v3_h | v3_l | v2_h | v2_l */ + " movdqu (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */ + " movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* v3_h | v3_l | v2_h | v2_l */ " movq (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */ " movq 8(%0), %%xmm3 \n\t" /* X | X | p3 | p2 */ + SWAP_16_2 (%%xmm1, %%xmm3) VOLUME_32x16 (%%xmm1, %%xmm0) VOLUME_32x16 (%%xmm3, %%xmm2) + SWAP_16_2 (%%xmm0, %%xmm2) " movq %%xmm0, (%0) \n\t" " movq %%xmm2, 8(%0) \n\t" " add $16, %0 \n\t" @@ -198,29 +302,6 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi } #if 0 -static void -pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (int16_t); - - for (channel = 0; length; length--) { - int32_t t, hi, lo; - - hi = volumes[channel] >> 16; - lo = volumes[channel] & 0xFFFF; - - t = (int32_t) PA_INT16_SWAP(*samples); - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); - *samples++ = PA_INT16_SWAP((int16_t) t); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - static void pa_volume_float32ne_sse (float *samples, float *volumes, unsigned channels, unsigned length) { @@ -378,24 +459,25 @@ pa_volume_s24_32re_sse (uint32_t *samples, int32_t *volumes, unsigned channels, } #endif -#undef RUN_TEST +#define RUN_TEST #ifdef RUN_TEST #define CHANNELS 2 #define SAMPLES 1021 #define TIMES 1000 +#define PADDING 16 static void run_test (void) { int16_t samples[SAMPLES]; int16_t samples_ref[SAMPLES]; int16_t samples_orig[SAMPLES]; - int32_t volumes[CHANNELS + 16]; + int32_t volumes[CHANNELS + PADDING]; int i, j, padding; pa_do_volume_func_t func; - func = pa_get_volume_func (PA_SAMPLE_S16NE); + func = pa_get_volume_func (PA_SAMPLE_S16RE); - printf ("checking %d\n", sizeof (samples)); + printf ("checking SSE %d\n", sizeof (samples)); for (j = 0; j < TIMES; j++) { pa_random (samples, sizeof (samples)); @@ -403,12 +485,11 @@ static void run_test (void) { memcpy (samples_orig, samples, sizeof (samples)); for (i = 0; i < CHANNELS; i++) - volumes[i] = rand() >> 15; - - for (padding = 0; padding < 16; padding++, i++) + volumes[i] = rand() >> 1; + for (padding = 0; padding < PADDING; padding++, i++) volumes[i] = volumes[padding]; - pa_volume_s16ne_sse (samples, volumes, CHANNELS, SAMPLES * sizeof (int16_t)); + pa_volume_s16re_sse (samples, volumes, CHANNELS, SAMPLES * sizeof (int16_t)); func (samples_ref, volumes, CHANNELS, SAMPLES * sizeof (int16_t)); for (i = 0; i < SAMPLES; i++) { @@ -416,11 +497,6 @@ static void run_test (void) { printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], samples_orig[i], volumes[i % CHANNELS]); } -#if 0 - else - printf ("%d: %04x == %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], - samples_orig[i], volumes[i % CHANNELS]); -#endif } } } @@ -434,4 +510,5 @@ void pa_volume_func_init_sse (void) { #endif pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_sse); + pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_sse); } -- cgit From a83f5524fbf2f0fa861d2fae6973f0f42e8c9c25 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Thu, 13 Aug 2009 17:11:43 +0200 Subject: cpu-x86: add cpu detection code and helpers Add CPU detection code and various macros and typdefs to make it easier to write 64 and 32 bit code. --- src/Makefile.am | 1 + src/pulsecore/cpu-x86.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++++ src/pulsecore/cpu-x86.h | 61 ++++++++++++++++++++++++ 3 files changed, 184 insertions(+) create mode 100644 src/pulsecore/cpu-x86.c create mode 100644 src/pulsecore/cpu-x86.h (limited to 'src') diff --git a/src/Makefile.am b/src/Makefile.am index b692e4a9..4e90d793 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -825,6 +825,7 @@ libpulsecore_@PA_MAJORMINORMICRO@_la_SOURCES = \ pulsecore/resampler.c pulsecore/resampler.h \ pulsecore/rtpoll.c pulsecore/rtpoll.h \ pulsecore/sample-util.c pulsecore/sample-util.h \ + pulsecore/cpu-x86.c \ pulsecore/svolume_c.c \ pulsecore/svolume_mmx.c pulsecore/svolume_sse.c \ pulsecore/sconv-s16be.c pulsecore/sconv-s16be.h \ diff --git a/src/pulsecore/cpu-x86.c b/src/pulsecore/cpu-x86.c new file mode 100644 index 00000000..2da31c92 --- /dev/null +++ b/src/pulsecore/cpu-x86.c @@ -0,0 +1,122 @@ +/*** + This file is part of PulseAudio. + + Copyright 2004-2006 Lennart Poettering + Copyright 2009 Wim Taymans + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of the License, + or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with PulseAudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include + +#include "cpu-x86.h" + +#if defined (__i386__) || defined (__amd64__) +static void +get_cpuid (uint32_t op, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) +{ + __asm__ __volatile__ ( + " push %%"PA_REG_b" \n\t" + " cpuid \n\t" + " mov %%ebx, %%esi \n\t" + " pop %%"PA_REG_b" \n\t" + + : "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d) + : "0" (op)); +} +#endif + +static pa_cpu_x86_flag_t pa_cpu_x86_flags; + +void pa_cpu_init_x86 (void) { +#if defined (__i386__) || defined (__amd64__) + uint32_t eax, ebx, ecx, edx; + uint32_t level; + + /* get standard level */ + get_cpuid (0x00000000, &level, &ebx, &ecx, &edx); + if (level >= 1) { + get_cpuid (0x00000001, &eax, &ebx, &ecx, &edx); + + if (edx & (1<<23)) + pa_cpu_x86_flags |= PA_CPU_X86_MMX; + + if (edx & (1<<25)) + pa_cpu_x86_flags |= PA_CPU_X86_SSE; + + if (edx & (1<<26)) + pa_cpu_x86_flags |= PA_CPU_X86_SSE2; + + if (ecx & (1<<0)) + pa_cpu_x86_flags |= PA_CPU_X86_SSE3; + + if (ecx & (1<<9)) + pa_cpu_x86_flags |= PA_CPU_X86_SSSE3; + + if (ecx & (1<<19)) + pa_cpu_x86_flags |= PA_CPU_X86_SSE4_1; + + if (ecx & (1<<20)) + pa_cpu_x86_flags |= PA_CPU_X86_SSE4_2; + } + + /* get extended level */ + get_cpuid (0x80000000, &level, &ebx, &ecx, &edx); + if (level >= 0x80000001) { + get_cpuid (0x80000001, &eax, &ebx, &ecx, &edx); + + if (edx & (1<<22)) + pa_cpu_x86_flags |= PA_CPU_X86_MMXEXT; + + if (edx & (1<<23)) + pa_cpu_x86_flags |= PA_CPU_X86_MMX; + + if (edx & (1<<30)) + pa_cpu_x86_flags |= PA_CPU_X86_3DNOWEXT; + + if (edx & (1<<31)) + pa_cpu_x86_flags |= PA_CPU_X86_3DNOW; + } + + pa_log_info ("CPU flags: %s%s%s%s%s%s%s%s%s%s", + (pa_cpu_x86_flags & PA_CPU_X86_MMX) ? "MMX " : "", + (pa_cpu_x86_flags & PA_CPU_X86_SSE) ? "SSE " : "", + (pa_cpu_x86_flags & PA_CPU_X86_SSE2) ? "SSE2 " : "", + (pa_cpu_x86_flags & PA_CPU_X86_SSE3) ? "SSE3 " : "", + (pa_cpu_x86_flags & PA_CPU_X86_SSSE3) ? "SSSE3 " : "", + (pa_cpu_x86_flags & PA_CPU_X86_SSE4_1) ? "SSE4_1 " : "", + (pa_cpu_x86_flags & PA_CPU_X86_SSE4_2) ? "SSE4_2 " : "", + (pa_cpu_x86_flags & PA_CPU_X86_MMXEXT) ? "MMXEXT " : "", + (pa_cpu_x86_flags & PA_CPU_X86_3DNOW) ? "3DNOW " : "", + (pa_cpu_x86_flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : ""); + + /* activate various optimisations */ + if (pa_cpu_x86_flags & PA_CPU_X86_MMX) { + pa_volume_func_init_mmx (pa_cpu_x86_flags); + } + if (pa_cpu_x86_flags & PA_CPU_X86_SSE) { + pa_volume_func_init_sse (pa_cpu_x86_flags); + } +#else + pa_cpu_x86_flags = 0; +#endif +} diff --git a/src/pulsecore/cpu-x86.h b/src/pulsecore/cpu-x86.h new file mode 100644 index 00000000..8158ea7a --- /dev/null +++ b/src/pulsecore/cpu-x86.h @@ -0,0 +1,61 @@ +/*** + This file is part of PulseAudio. + + Copyright 2004-2006 Lennart Poettering + Copyright 2009 Wim Taymans + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of the License, + or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with PulseAudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#include + +typedef enum pa_cpu_x86_flag { + PA_CPU_X86_MMX = (1 << 0), + PA_CPU_X86_MMXEXT = (1 << 1), + PA_CPU_X86_SSE = (1 << 2), + PA_CPU_X86_SSE2 = (1 << 3), + PA_CPU_X86_SSE3 = (1 << 4), + PA_CPU_X86_SSSE3 = (1 << 5), + PA_CPU_X86_SSE4_1 = (1 << 6), + PA_CPU_X86_SSE4_2 = (1 << 7), + PA_CPU_X86_3DNOW = (1 << 8), + PA_CPU_X86_3DNOWEXT = (1 << 9) +} pa_cpu_x86_flag_t; + +void pa_cpu_init_x86 (void); + + +#if defined (__i386__) +typedef int32_t pa_reg_x86; +#define PA_REG_a "eax" +#define PA_REG_b "ebx" +#define PA_REG_c "ecx" +#define PA_REG_d "edx" +#define PA_REG_D "edi" +#define PA_REG_S "esi" +#elif defined (__amd64__) +typedef int64_t pa_reg_x86; +#define PA_REG_a "rax" +#define PA_REG_b "rbx" +#define PA_REG_c "rcx" +#define PA_REG_d "rdx" +#define PA_REG_D "rdi" +#define PA_REG_S "rsi" +#endif + +/* some optimized functions */ +void pa_volume_func_init_mmx(pa_cpu_x86_flag_t flags); +void pa_volume_func_init_sse(pa_cpu_x86_flag_t flags); -- cgit From 563cb2dea9f7f73180e2b8cc8d45b0df9358c936 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Thu, 13 Aug 2009 17:12:44 +0200 Subject: main: hook up cpu detection code Add CPU detection code to activate the various optimisations. Move some method definitions around. Use compatibility macros when we can. --- src/daemon/main.c | 4 ++-- src/pulsecore/sample-util.h | 3 --- src/pulsecore/svolume_mmx.c | 18 ++++++++++-------- src/pulsecore/svolume_sse.c | 16 ++++++++-------- 4 files changed, 20 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/daemon/main.c b/src/daemon/main.c index 3c5f7f95..774b4e90 100644 --- a/src/daemon/main.c +++ b/src/daemon/main.c @@ -95,6 +95,7 @@ #ifdef HAVE_DBUS #include #endif +#include #include "cmdline.h" #include "cpulimit.h" @@ -821,8 +822,7 @@ int main(int argc, char *argv[]) { pa_memtrap_install(); - pa_volume_func_init_mmx(); - pa_volume_func_init_sse(); + pa_cpu_init_x86(); pa_assert_se(mainloop = pa_mainloop_new()); diff --git a/src/pulsecore/sample-util.h b/src/pulsecore/sample-util.h index 563dbb6a..34df5cf3 100644 --- a/src/pulsecore/sample-util.h +++ b/src/pulsecore/sample-util.h @@ -91,9 +91,6 @@ typedef void (*pa_do_volume_func_t) (void *samples, void *volumes, unsigned chan pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f); void pa_set_volume_func(pa_sample_format_t f, pa_do_volume_func_t func); -void pa_volume_func_init_mmx(void); -void pa_volume_func_init_sse(void); - #define PA_CHANNEL_POSITION_MASK_LEFT \ (PA_CHANNEL_POSITION_MASK(PA_CHANNEL_POSITION_FRONT_LEFT) \ | PA_CHANNEL_POSITION_MASK(PA_CHANNEL_POSITION_REAR_LEFT) \ diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c index 3c229456..e56f7c31 100644 --- a/src/pulsecore/svolume_mmx.c +++ b/src/pulsecore/svolume_mmx.c @@ -31,6 +31,8 @@ #include #include +#include "cpu-x86.h" + #include "sample-util.h" #include "endianmacros.h" @@ -142,7 +144,7 @@ pa_volume_ulaw_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsig static void pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - int64_t channel, temp; + pa_reg_x86 channel, temp; /* the max number of samples we process at a time, this is also the max amount * we overread the volume array, which should have enough padding. */ @@ -203,8 +205,8 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi "6: \n\t" " emms \n\t" - : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((int64_t)channel), "=&r" (temp) - : "r" ((int64_t)channels) + : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp) + : "r" ((pa_reg_x86)channels) : "cc" ); } @@ -212,7 +214,7 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi static void pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - int64_t channel, temp; + pa_reg_x86 channel, temp; /* the max number of samples we process at a time, this is also the max amount * we overread the volume array, which should have enough padding. */ @@ -279,8 +281,8 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi "6: \n\t" " emms \n\t" - : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((int64_t)channel), "=&r" (temp) - : "r" ((int64_t)channels) + : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp) + : "r" ((pa_reg_x86)channels) : "cc" ); } @@ -443,7 +445,7 @@ pa_volume_s24_32re_mmx (uint32_t *samples, int32_t *volumes, unsigned channels, } #endif -#define RUN_TEST +#undef RUN_TEST #ifdef RUN_TEST #define CHANNELS 2 @@ -486,7 +488,7 @@ static void run_test (void) { } #endif -void pa_volume_func_init_mmx (void) { +void pa_volume_func_init_mmx (pa_cpu_x86_flag_t flags) { pa_log_info("Initialising MMX optimized functions."); #ifdef RUN_TEST diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c index ff583a06..b60471a7 100644 --- a/src/pulsecore/svolume_sse.c +++ b/src/pulsecore/svolume_sse.c @@ -31,6 +31,8 @@ #include #include +#include "cpu-x86.h" + #include "sample-util.h" #include "endianmacros.h" @@ -140,8 +142,7 @@ pa_volume_ulaw_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsig static void pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - int64_t channel; - int64_t temp; + pa_reg_x86 channel, temp; /* the max number of samples we process at a time, this is also the max amount * we overread the volume array, which should have enough padding. */ @@ -210,7 +211,7 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi "8: \n\t" : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp) - : "r" ((int64_t)channels) + : "r" ((pa_reg_x86)channels) : "cc" ); } @@ -218,8 +219,7 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi static void pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - int64_t channel; - int64_t temp; + pa_reg_x86 channel, temp; /* the max number of samples we process at a time, this is also the max amount * we overread the volume array, which should have enough padding. */ @@ -296,7 +296,7 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi "8: \n\t" : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp) - : "r" ((int64_t)channels) + : "r" ((pa_reg_x86)channels) : "cc" ); } @@ -459,7 +459,7 @@ pa_volume_s24_32re_sse (uint32_t *samples, int32_t *volumes, unsigned channels, } #endif -#define RUN_TEST +#undef RUN_TEST #ifdef RUN_TEST #define CHANNELS 2 @@ -502,7 +502,7 @@ static void run_test (void) { } #endif -void pa_volume_func_init_sse (void) { +void pa_volume_func_init_sse (pa_cpu_x86_flag_t flags) { pa_log_info("Initialising SSE optimized functions."); #ifdef RUN_TEST -- cgit From e396fe67fb3b0acec40c2334c426bcb284163d20 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Thu, 13 Aug 2009 17:22:39 +0200 Subject: cpu-x86: guard header with ifdef --- src/pulsecore/cpu-x86.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/pulsecore/cpu-x86.h b/src/pulsecore/cpu-x86.h index 8158ea7a..07e630ea 100644 --- a/src/pulsecore/cpu-x86.h +++ b/src/pulsecore/cpu-x86.h @@ -1,3 +1,6 @@ +#ifndef foocpux86hfoo +#define foocpux86hfoo + /*** This file is part of PulseAudio. @@ -59,3 +62,5 @@ typedef int64_t pa_reg_x86; /* some optimized functions */ void pa_volume_func_init_mmx(pa_cpu_x86_flag_t flags); void pa_volume_func_init_sse(pa_cpu_x86_flag_t flags); + +#endif /* foocpux86hfoo */ -- cgit From dcae9a3113d1ce30e330c97dd5a81fec4e272bed Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Fri, 14 Aug 2009 13:12:30 +0200 Subject: svolume: add some comments --- src/pulsecore/svolume_mmx.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'src') diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c index e56f7c31..b36fe946 100644 --- a/src/pulsecore/svolume_mmx.c +++ b/src/pulsecore/svolume_mmx.c @@ -101,6 +101,22 @@ pa_volume_ulaw_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsig } #endif +/* in s: 2 int16_t samples + * in v: 2 int32_t volumes, fixed point 16:16 + * out s: contains scaled and clamped int16_t samples. + * + * We calculate the high 32 bits of a 32x16 multiply which we then + * clamp to 16 bits. The calulcation is: + * + * vl = (v & 0xffff) + * vh = (v >> 16) + * s = ((s * vl) >> 16) + (s * vh); + * + * For the first multiply we have to do a sign correction as we need to + * multiply a signed int with an unsigned int. Hacker's delight 8-3 gives a + * simple formula to correct the sign of the high word after the signed + * multiply. + */ #define VOLUME_32x16(s,v) /* .. | vh | vl | */ \ " pxor %%mm4, %%mm4 \n\t" /* .. | 0 | 0 | */ \ " punpcklwd %%mm4, "#s" \n\t" /* .. | 0 | p0 | */ \ @@ -116,6 +132,8 @@ pa_volume_ulaw_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsig " pmaddwd "#s", "#v" \n\t" /* .. | p0 * v0 | */ \ " packssdw "#v", "#v" \n\t" /* .. | p1*v1 | p0*v0 | */ +/* approximately advances %3 = (%3 + a) % b. This function requires that + * a <= b. */ #define MOD_ADD(a,b) \ " add "#a", %3 \n\t" \ " mov %3, %4 \n\t" \ -- cgit From a1235446a733164f00a96688784913172456a34e Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Fri, 14 Aug 2009 15:19:26 +0200 Subject: volume: make the benchmark more meaningfull MMX is about 6x faster, SSE around 15x on my machine. --- src/pulsecore/svolume_mmx.c | 53 +++++++++++++++++++++++++--------------- src/pulsecore/svolume_sse.c | 59 +++++++++++++++++++++++++++------------------ 2 files changed, 69 insertions(+), 43 deletions(-) (limited to 'src') diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c index b36fe946..9ad7dea1 100644 --- a/src/pulsecore/svolume_mmx.c +++ b/src/pulsecore/svolume_mmx.c @@ -24,8 +24,7 @@ #include #endif -#include - +#include #include #include #include @@ -478,31 +477,45 @@ static void run_test (void) { int32_t volumes[CHANNELS + PADDING]; int i, j, padding; pa_do_volume_func_t func; + struct timeval start, stop; - func = pa_get_volume_func (PA_SAMPLE_S16RE); - - printf ("checking MMX %d\n", sizeof (samples)); + func = pa_get_volume_func (PA_SAMPLE_S16NE); - for (j = 0; j < TIMES; j++) { - pa_random (samples, sizeof (samples)); - memcpy (samples_ref, samples, sizeof (samples)); - memcpy (samples_orig, samples, sizeof (samples)); + printf ("checking MMX %zd\n", sizeof (samples)); - for (i = 0; i < CHANNELS; i++) - volumes[i] = rand() >> 1; - for (padding = 0; padding < PADDING; padding++, i++) - volumes[i] = volumes[padding]; + pa_random (samples, sizeof (samples)); + memcpy (samples_ref, samples, sizeof (samples)); + memcpy (samples_orig, samples, sizeof (samples)); - pa_volume_s16re_mmx (samples, volumes, CHANNELS, sizeof (samples)); - func (samples_ref, volumes, CHANNELS, sizeof (samples)); + for (i = 0; i < CHANNELS; i++) + volumes[i] = rand() >> 1; + for (padding = 0; padding < PADDING; padding++, i++) + volumes[i] = volumes[padding]; - for (i = 0; i < SAMPLES; i++) { - if (samples[i] != samples_ref[i]) { - printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], - samples_orig[i], volumes[i % CHANNELS]); - } + func (samples_ref, volumes, CHANNELS, sizeof (samples)); + pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples)); + for (i = 0; i < SAMPLES; i++) { + if (samples[i] != samples_ref[i]) { + printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], + samples_orig[i], volumes[i % CHANNELS]); } } + + pa_gettimeofday(&start); + for (j = 0; j < TIMES; j++) { + memcpy (samples, samples_orig, sizeof (samples)); + pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples)); + } + pa_gettimeofday(&stop); + pa_log_info("MMX: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); + + pa_gettimeofday(&start); + for (j = 0; j < TIMES; j++) { + memcpy (samples_ref, samples_orig, sizeof (samples)); + func (samples_ref, volumes, CHANNELS, sizeof (samples)); + } + pa_gettimeofday(&stop); + pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); } #endif diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c index b60471a7..8138c6c1 100644 --- a/src/pulsecore/svolume_sse.c +++ b/src/pulsecore/svolume_sse.c @@ -24,8 +24,7 @@ #include #endif -#include - +#include #include #include #include @@ -459,7 +458,7 @@ pa_volume_s24_32re_sse (uint32_t *samples, int32_t *volumes, unsigned channels, } #endif -#undef RUN_TEST +#define RUN_TEST #ifdef RUN_TEST #define CHANNELS 2 @@ -474,31 +473,45 @@ static void run_test (void) { int32_t volumes[CHANNELS + PADDING]; int i, j, padding; pa_do_volume_func_t func; + struct timeval start, stop; - func = pa_get_volume_func (PA_SAMPLE_S16RE); + func = pa_get_volume_func (PA_SAMPLE_S16NE); - printf ("checking SSE %d\n", sizeof (samples)); + printf ("checking SSE %zd\n", sizeof (samples)); - for (j = 0; j < TIMES; j++) { - pa_random (samples, sizeof (samples)); - memcpy (samples_ref, samples, sizeof (samples)); - memcpy (samples_orig, samples, sizeof (samples)); - - for (i = 0; i < CHANNELS; i++) - volumes[i] = rand() >> 1; - for (padding = 0; padding < PADDING; padding++, i++) - volumes[i] = volumes[padding]; - - pa_volume_s16re_sse (samples, volumes, CHANNELS, SAMPLES * sizeof (int16_t)); - func (samples_ref, volumes, CHANNELS, SAMPLES * sizeof (int16_t)); - - for (i = 0; i < SAMPLES; i++) { - if (samples[i] != samples_ref[i]) { - printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], - samples_orig[i], volumes[i % CHANNELS]); - } + pa_random (samples, sizeof (samples)); + memcpy (samples_ref, samples, sizeof (samples)); + memcpy (samples_orig, samples, sizeof (samples)); + + for (i = 0; i < CHANNELS; i++) + volumes[i] = rand() >> 1; + for (padding = 0; padding < PADDING; padding++, i++) + volumes[i] = volumes[padding]; + + func (samples_ref, volumes, CHANNELS, sizeof (samples)); + pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples)); + for (i = 0; i < SAMPLES; i++) { + if (samples[i] != samples_ref[i]) { + printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], + samples_orig[i], volumes[i % CHANNELS]); } } + + pa_gettimeofday(&start); + for (j = 0; j < TIMES; j++) { + memcpy (samples, samples_orig, sizeof (samples)); + pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples)); + } + pa_gettimeofday(&stop); + pa_log_info("SSE: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); + + pa_gettimeofday(&start); + for (j = 0; j < TIMES; j++) { + memcpy (samples_ref, samples_orig, sizeof (samples)); + func (samples_ref, volumes, CHANNELS, sizeof (samples)); + } + pa_gettimeofday(&stop); + pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); } #endif -- cgit From f24c24c14b6614cf19ee916886c8b02384bac435 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Fri, 14 Aug 2009 15:41:32 +0200 Subject: volume: improved comments --- src/pulsecore/svolume_mmx.c | 12 ++++++------ src/pulsecore/svolume_sse.c | 45 ++++++++++++++++++++++++--------------------- 2 files changed, 30 insertions(+), 27 deletions(-) (limited to 'src') diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c index 9ad7dea1..d4fcedf5 100644 --- a/src/pulsecore/svolume_mmx.c +++ b/src/pulsecore/svolume_mmx.c @@ -196,7 +196,7 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ VOLUME_32x16 (%%mm1, %%mm0) - " movd %%mm0, (%0) \n\t" /* | p1*v1 | p0*v0 | */ + " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ " add $4, %0 \n\t" MOD_ADD ($2, %5) @@ -212,8 +212,8 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi " movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */ VOLUME_32x16 (%%mm1, %%mm0) VOLUME_32x16 (%%mm3, %%mm2) - " movd %%mm0, (%0) \n\t" /* | p1*v1 | p0*v0 | */ - " movd %%mm2, 4(%0) \n\t" /* | p3*v3 | p2*v2 | */ + " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ + " movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */ " add $8, %0 \n\t" MOD_ADD ($4, %5) " dec %2 \n\t" @@ -270,7 +270,7 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi SWAP_16 (%%mm1) VOLUME_32x16 (%%mm1, %%mm0) SWAP_16 (%%mm0) - " movd %%mm0, (%0) \n\t" /* | p1*v1 | p0*v0 | */ + " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ " add $4, %0 \n\t" MOD_ADD ($2, %5) @@ -288,8 +288,8 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi VOLUME_32x16 (%%mm1, %%mm0) VOLUME_32x16 (%%mm3, %%mm2) SWAP_16_2 (%%mm0, %%mm2) - " movd %%mm0, (%0) \n\t" /* | p1*v1 | p0*v0 | */ - " movd %%mm2, 4(%0) \n\t" /* | p3*v3 | p2*v2 | */ + " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ + " movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */ " add $8, %0 \n\t" MOD_ADD ($4, %5) " dec %2 \n\t" diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c index 8138c6c1..d95fa9d9 100644 --- a/src/pulsecore/svolume_sse.c +++ b/src/pulsecore/svolume_sse.c @@ -231,12 +231,12 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi " test $1, %2 \n\t" /* check for odd samples */ " je 2f \n\t" - " movd (%1, %3, 4), %%xmm0 \n\t" /* do odd sample */ - " movw (%0), %4 \n\t" + " movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */ + " movw (%0), %4 \n\t" /* .. | p0 | */ " rorw $8, %4 \n\t" " movd %4, %%xmm1 \n\t" VOLUME_32x16 (%%xmm1, %%xmm0) - " movd %%xmm0, %4 \n\t" + " movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */ " rorw $8, %4 \n\t" " movw %4, (%0) \n\t" " add $2, %0 \n\t" @@ -244,31 +244,34 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi "2: \n\t" " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */ - " test $1, %2 \n\t" /* check for odd samples */ + " test $1, %2 \n\t" " je 4f \n\t" - "3: \n\t" /* do samples in pairs of 2 */ - " movq (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */ - " movd (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */ + "3: \n\t" /* do samples in groups of 2 */ + " movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */ + " movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */ SWAP_16 (%%xmm1) VOLUME_32x16 (%%xmm1, %%xmm0) SWAP_16 (%%xmm0) - " movd %%xmm0, (%0) \n\t" + " movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ " add $4, %0 \n\t" MOD_ADD ($2, %5) "4: \n\t" " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */ - " test $1, %2 \n\t" /* check for odd samples */ + " test $1, %2 \n\t" " je 6f \n\t" - "5: \n\t" /* do samples in pairs of 4 */ - " movdqu (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */ - " movq (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */ + /* FIXME, we can do aligned access of the volume values if we can guarantee + * that the array is 16 bytes aligned, we probably have to do the odd values + * after this then. */ + "5: \n\t" /* do samples in groups of 4 */ + " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */ + " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */ SWAP_16 (%%xmm1) VOLUME_32x16 (%%xmm1, %%xmm0) SWAP_16 (%%xmm0) - " movq %%xmm0, (%0) \n\t" + " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */ " add $8, %0 \n\t" MOD_ADD ($4, %5) @@ -277,17 +280,17 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi " cmp $0, %2 \n\t" " je 8f \n\t" - "7: \n\t" /* do samples in pairs of 8 */ - " movdqu (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */ - " movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* v3_h | v3_l | v2_h | v2_l */ - " movq (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */ - " movq 8(%0), %%xmm3 \n\t" /* X | X | p3 | p2 */ + "7: \n\t" /* do samples in groups of 8 */ + " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */ + " movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */ + " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */ + " movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */ SWAP_16_2 (%%xmm1, %%xmm3) VOLUME_32x16 (%%xmm1, %%xmm0) VOLUME_32x16 (%%xmm3, %%xmm2) SWAP_16_2 (%%xmm0, %%xmm2) - " movq %%xmm0, (%0) \n\t" - " movq %%xmm2, 8(%0) \n\t" + " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */ + " movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */ " add $16, %0 \n\t" MOD_ADD ($8, %5) " dec %2 \n\t" @@ -458,7 +461,7 @@ pa_volume_s24_32re_sse (uint32_t *samples, int32_t *volumes, unsigned channels, } #endif -#define RUN_TEST +#undef RUN_TEST #ifdef RUN_TEST #define CHANNELS 2 -- cgit From 591baacba5913de32e6556a71a8300d25addbec4 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Fri, 14 Aug 2009 15:48:10 +0200 Subject: volume: remove ref functions --- src/pulsecore/svolume_mmx.c | 223 -------------------------------------------- src/pulsecore/svolume_sse.c | 223 -------------------------------------------- 2 files changed, 446 deletions(-) (limited to 'src') diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c index d4fcedf5..ad539278 100644 --- a/src/pulsecore/svolume_mmx.c +++ b/src/pulsecore/svolume_mmx.c @@ -35,71 +35,6 @@ #include "sample-util.h" #include "endianmacros.h" -#if 0 -static void -pa_volume_u8_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - for (channel = 0; length; length--) { - int32_t t, hi, lo; - - hi = volumes[channel] >> 16; - lo = volumes[channel] & 0xFFFF; - - t = (int32_t) *samples - 0x80; - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F); - *samples++ = (uint8_t) (t + 0x80); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_alaw_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - for (channel = 0; length; length--) { - int32_t t, hi, lo; - - hi = volumes[channel] >> 16; - lo = volumes[channel] & 0xFFFF; - - t = (int32_t) st_alaw2linear16(*samples); - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); - *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_ulaw_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - for (channel = 0; length; length--) { - int32_t t, hi, lo; - - hi = volumes[channel] >> 16; - lo = volumes[channel] & 0xFFFF; - - t = (int32_t) st_ulaw2linear16(*samples); - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); - *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} -#endif - /* in s: 2 int16_t samples * in v: 2 int32_t volumes, fixed point 16:16 * out s: contains scaled and clamped int16_t samples. @@ -304,164 +239,6 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi ); } -#if 0 -static void -pa_volume_float32ne_mmx (float *samples, float *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (float); - - for (channel = 0; length; length--) { - *samples++ *= volumes[channel]; - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_float32re_mmx (float *samples, float *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (float); - - for (channel = 0; length; length--) { - float t; - - t = PA_FLOAT32_SWAP(*samples); - t *= volumes[channel]; - *samples++ = PA_FLOAT32_SWAP(t); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_s32ne_mmx (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (int32_t); - - for (channel = 0; length; length--) { - int64_t t; - - t = (int64_t)(*samples); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - *samples++ = (int32_t) t; - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_s32re_mmx (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (int32_t); - - for (channel = 0; length; length--) { - int64_t t; - - t = (int64_t) PA_INT32_SWAP(*samples); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - *samples++ = PA_INT32_SWAP((int32_t) t); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_s24ne_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - uint8_t *e; - - e = samples + length; - - for (channel = 0; samples < e; samples += 3) { - int64_t t; - - t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8)); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_s24re_mmx (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - uint8_t *e; - - e = samples + length; - - for (channel = 0; samples < e; samples += 3) { - int64_t t; - - t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8)); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_s24_32ne_mmx (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (uint32_t); - - for (channel = 0; length; length--) { - int64_t t; - - t = (int64_t) ((int32_t) (*samples << 8)); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - *samples++ = ((uint32_t) ((int32_t) t)) >> 8; - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_s24_32re_mmx (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (uint32_t); - - for (channel = 0; length; length--) { - int64_t t; - - t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8)); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} -#endif - #undef RUN_TEST #ifdef RUN_TEST diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c index d95fa9d9..b0a6e0dd 100644 --- a/src/pulsecore/svolume_sse.c +++ b/src/pulsecore/svolume_sse.c @@ -35,71 +35,6 @@ #include "sample-util.h" #include "endianmacros.h" -#if 0 -static void -pa_volume_u8_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - for (channel = 0; length; length--) { - int32_t t, hi, lo; - - hi = volumes[channel] >> 16; - lo = volumes[channel] & 0xFFFF; - - t = (int32_t) *samples - 0x80; - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F); - *samples++ = (uint8_t) (t + 0x80); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_alaw_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - for (channel = 0; length; length--) { - int32_t t, hi, lo; - - hi = volumes[channel] >> 16; - lo = volumes[channel] & 0xFFFF; - - t = (int32_t) st_alaw2linear16(*samples); - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); - *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_ulaw_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - for (channel = 0; length; length--) { - int32_t t, hi, lo; - - hi = volumes[channel] >> 16; - lo = volumes[channel] & 0xFFFF; - - t = (int32_t) st_ulaw2linear16(*samples); - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); - *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} -#endif - #define VOLUME_32x16(s,v) /* .. | vh | vl | */ \ " pxor %%xmm4, %%xmm4 \n\t" /* .. | 0 | 0 | */ \ " punpcklwd %%xmm4, "#s" \n\t" /* .. | 0 | p0 | */ \ @@ -303,164 +238,6 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi ); } -#if 0 -static void -pa_volume_float32ne_sse (float *samples, float *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (float); - - for (channel = 0; length; length--) { - *samples++ *= volumes[channel]; - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_float32re_sse (float *samples, float *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (float); - - for (channel = 0; length; length--) { - float t; - - t = PA_FLOAT32_SWAP(*samples); - t *= volumes[channel]; - *samples++ = PA_FLOAT32_SWAP(t); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_s32ne_sse (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (int32_t); - - for (channel = 0; length; length--) { - int64_t t; - - t = (int64_t)(*samples); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - *samples++ = (int32_t) t; - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_s32re_sse (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (int32_t); - - for (channel = 0; length; length--) { - int64_t t; - - t = (int64_t) PA_INT32_SWAP(*samples); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - *samples++ = PA_INT32_SWAP((int32_t) t); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_s24ne_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - uint8_t *e; - - e = samples + length; - - for (channel = 0; samples < e; samples += 3) { - int64_t t; - - t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8)); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_s24re_sse (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - uint8_t *e; - - e = samples + length; - - for (channel = 0; samples < e; samples += 3) { - int64_t t; - - t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8)); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_s24_32ne_sse (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (uint32_t); - - for (channel = 0; length; length--) { - int64_t t; - - t = (int64_t) ((int32_t) (*samples << 8)); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - *samples++ = ((uint32_t) ((int32_t) t)) >> 8; - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} - -static void -pa_volume_s24_32re_sse (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length) -{ - unsigned channel; - - length /= sizeof (uint32_t); - - for (channel = 0; length; length--) { - int64_t t; - - t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8)); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8); - - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } -} -#endif - #undef RUN_TEST #ifdef RUN_TEST -- cgit From 25724cdd40283a00e6edd9449d0f3cf16823b41b Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Fri, 14 Aug 2009 19:45:39 +0200 Subject: Get rid of liboil Get rid of the liboil dependency and reimplement the liboil functions with an equivalent C implementation. Note that most of these functions are deprecated in liboil and that none of them had any optimisations. We can further specialize our handrolled versions for some extra speedups. --- src/daemon/main.c | 4 --- src/pulsecore/resampler.c | 69 ++++++++++++++++++++++++--------------------- src/pulsecore/sconv-s16le.c | 42 +++++++++++---------------- src/pulsecore/sconv.c | 28 ++++++++---------- src/tests/envelope-test.c | 3 -- src/tests/mix-test.c | 3 -- src/tests/remix-test.c | 3 -- src/tests/resampler-test.c | 3 -- 8 files changed, 65 insertions(+), 90 deletions(-) (limited to 'src') diff --git a/src/daemon/main.c b/src/daemon/main.c index 774b4e90..31e434d9 100644 --- a/src/daemon/main.c +++ b/src/daemon/main.c @@ -39,8 +39,6 @@ #include #include -#include - #ifdef HAVE_SYS_MMAN_H #include #endif @@ -863,8 +861,6 @@ int main(int argc, char *argv[]) { win32_timer = pa_mainloop_get_api(mainloop)->rtclock_time_new(pa_mainloop_get_api(mainloop), pa_gettimeofday(&win32_tv), message_cb, NULL); #endif - oil_init(); - if (!conf->no_cpu_limit) pa_assert_se(pa_cpu_limit_init(pa_mainloop_get_api(mainloop)) == 0); diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c index 59e0a0c1..a3c17f8c 100644 --- a/src/pulsecore/resampler.c +++ b/src/pulsecore/resampler.c @@ -31,9 +31,6 @@ #include -#include -#include - #include #include #include @@ -1045,33 +1042,46 @@ static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input) return &r->buf1; } -static void vectoradd_s16_with_fraction( - int16_t *d, int dstr, - const int16_t *s1, int sstr1, - const int16_t *s2, int sstr2, - int n, - float s3, float s4) { +static void vectoradd_f32( + float *d, int dstr, + const float *s, int sstr, + int n, float s4) { - int32_t i3, i4; + for (; n > 0; n--) { + *d = (float) (*d + (s4 * *s)); - i3 = (int32_t) (s3 * 0x10000); - i4 = (int32_t) (s4 * 0x10000); + s = (const float*) ((const uint8_t*) s + sstr); + d = (float*) ((uint8_t*) d + dstr); + } +} + +static void vectoradd_s16( + int16_t *d, int dstr, + const int16_t *s, int sstr, + int n) { for (; n > 0; n--) { - int32_t a, b; + *d = (int16_t) (*d + *s); - a = *s1; - b = *s2; + s = (const int16_t*) ((const uint8_t*) s + sstr); + d = (int16_t*) ((uint8_t*) d + dstr); + } +} - a = (a * i3) / 0x10000; - b = (b * i4) / 0x10000; +static void vectoradd_s16_with_fraction( + int16_t *d, int dstr, + const int16_t *s, int sstr, + int n, float s4) { - *d = (int16_t) (a + b); + int32_t i4; - s1 = (const int16_t*) ((const uint8_t*) s1 + sstr1); - s2 = (const int16_t*) ((const uint8_t*) s2 + sstr2); - d = (int16_t*) ((uint8_t*) d + dstr); + i4 = (int32_t) (s4 * 0x10000); + + for (; n > 0; n--) { + *d = (int16_t) (*d + (((int32_t)*s * i4) >> 16)); + s = (const int16_t*) ((const uint8_t*) s + sstr); + d = (int16_t*) ((uint8_t*) d + dstr); } } @@ -1125,12 +1135,11 @@ static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) { if (r->map_table[oc][ic] <= 0.0) continue; - oil_vectoradd_f32( - (float*) dst + oc, o_skip, + vectoradd_f32( (float*) dst + oc, o_skip, (float*) src + ic, i_skip, (int) n_frames, - &one, &r->map_table[oc][ic]); + r->map_table[oc][ic]); } } @@ -1147,23 +1156,19 @@ static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) { continue; if (r->map_table[oc][ic] >= 1.0) { - static const int16_t one = 1; - oil_vectoradd_s16( - (int16_t*) dst + oc, o_skip, + vectoradd_s16( (int16_t*) dst + oc, o_skip, (int16_t*) src + ic, i_skip, - (int) n_frames, - &one, &one); + (int) n_frames); } else vectoradd_s16_with_fraction( - (int16_t*) dst + oc, o_skip, (int16_t*) dst + oc, o_skip, (int16_t*) src + ic, i_skip, (int) n_frames, - 1.0f, r->map_table[oc][ic]); + r->map_table[oc][ic]); } } @@ -1469,7 +1474,7 @@ static void trivial_resample(pa_resampler *r, const pa_memchunk *input, unsigned pa_assert(o_index * fz < pa_memblock_get_length(output->memblock)); - oil_memcpy((uint8_t*) dst + fz * o_index, + memcpy((uint8_t*) dst + fz * o_index, (uint8_t*) src + fz * j, (int) fz); } diff --git a/src/pulsecore/sconv-s16le.c b/src/pulsecore/sconv-s16le.c index 43b8cb3e..0fefdf1c 100644 --- a/src/pulsecore/sconv-s16le.c +++ b/src/pulsecore/sconv-s16le.c @@ -28,8 +28,6 @@ #include #include -#include - #include #include #include @@ -86,17 +84,13 @@ void pa_sconv_s16le_to_float32ne(unsigned n, const int16_t *a, float *b) { pa_assert(b); #if SWAP_WORDS == 1 - for (; n > 0; n--) { int16_t s = *(a++); *(b++) = ((float) INT16_FROM(s))/(float) 0x7FFF; } - #else -{ - static const double add = 0, factor = 1.0/0x7FFF; - oil_scaleconv_f32_s16(b, a, (int) n, &add, &factor); -} + for (; n > 0; n--) + *(b++) = ((float) (*(a++)))/(float) 0x7FFF; #endif } @@ -105,17 +99,13 @@ void pa_sconv_s32le_to_float32ne(unsigned n, const int32_t *a, float *b) { pa_assert(b); #if SWAP_WORDS == 1 - for (; n > 0; n--) { int32_t s = *(a++); *(b++) = (float) (((double) INT32_FROM(s))/0x7FFFFFFF); } - #else -{ - static const double add = 0, factor = 1.0/0x7FFFFFFF; - oil_scaleconv_f32_s32(b, a, (int) n, &add, &factor); -} + for (; n > 0; n--) + *(b++) = (float) (((double) (*(a++)))/0x7FFFFFFF); #endif } @@ -124,7 +114,6 @@ void pa_sconv_s16le_from_float32ne(unsigned n, const float *a, int16_t *b) { pa_assert(b); #if SWAP_WORDS == 1 - for (; n > 0; n--) { int16_t s; float v = *(a++); @@ -133,12 +122,13 @@ void pa_sconv_s16le_from_float32ne(unsigned n, const float *a, int16_t *b) { s = (int16_t) lrintf(v * 0x7FFF); *(b++) = INT16_TO(s); } - #else -{ - static const double add = 0, factor = 0x7FFF; - oil_scaleconv_s16_f32(b, a, (int) n, &add, &factor); -} + for (; n > 0; n--) { + float v = *(a++); + + v = PA_CLAMP_UNLIKELY(v, -1.0f, 1.f); + *(b++) = (int16_t) lrintf(v * 0x7FFF); + } #endif } @@ -147,7 +137,6 @@ void pa_sconv_s32le_from_float32ne(unsigned n, const float *a, int32_t *b) { pa_assert(b); #if SWAP_WORDS == 1 - for (; n > 0; n--) { int32_t s; float v = *(a++); @@ -156,12 +145,13 @@ void pa_sconv_s32le_from_float32ne(unsigned n, const float *a, int32_t *b) { s = (int32_t) lrint((double) v * (double) 0x7FFFFFFF); *(b++) = INT32_TO(s); } - #else -{ - static const double add = 0, factor = 0x7FFFFFFF; - oil_scaleconv_s32_f32(b, a, (int) n, &add, &factor); -} + for (; n > 0; n--) { + float v = *(a++); + + v = PA_CLAMP_UNLIKELY(v, -1.0f, 1.0f); + *(b++) = (int32_t) lrint((double) v * (double) 0x7FFFFFFF); + } #endif } diff --git a/src/pulsecore/sconv.c b/src/pulsecore/sconv.c index d89f4283..937bf5d1 100644 --- a/src/pulsecore/sconv.c +++ b/src/pulsecore/sconv.c @@ -27,9 +27,6 @@ #include #include -#include -#include - #include #include @@ -41,32 +38,31 @@ /* u8 */ static void u8_to_float32ne(unsigned n, const uint8_t *a, float *b) { - static const double add = -1, factor = 1.0/128.0; - pa_assert(a); pa_assert(b); - oil_scaleconv_f32_u8(b, a, (int) n, &add, &factor); + for (; n > 0; n--, a++, b++) + *b = (*a * 1.0/128.0) - 1.0; } static void u8_from_float32ne(unsigned n, const float *a, uint8_t *b) { - static const double add = 128, factor = 127.0; - pa_assert(a); pa_assert(b); - oil_scaleconv_u8_f32(b, a, (int) n, &add, &factor); + for (; n > 0; n--, a++, b++) { + float v; + v = (*a * 127.0) + 128.0; + v = PA_CLAMP_UNLIKELY (v, 0.0, 255.0); + *b = rint (v); + } } static void u8_to_s16ne(unsigned n, const uint8_t *a, int16_t *b) { - static const int16_t add = -0x80, factor = 0x100; - pa_assert(a); pa_assert(b); - oil_conv_s16_u8(b, 2, a, 1, (int) n); - oil_scalaradd_s16(b, 2, b, 2, &add, (int) n); - oil_scalarmult_s16(b, 2, b, 2, &factor, (int) n); + for (; n > 0; n--, a++, b++) + *b = (((int16_t)*a) - 128) << 8; } static void u8_from_s16ne(unsigned n, const int16_t *a, uint8_t *b) { @@ -84,7 +80,7 @@ static void float32ne_to_float32ne(unsigned n, const float *a, float *b) { pa_assert(a); pa_assert(b); - oil_memcpy(b, a, (int) (sizeof(float) * n)); + memcpy(b, a, (int) (sizeof(float) * n)); } static void float32re_to_float32ne(unsigned n, const float *a, float *b) { @@ -101,7 +97,7 @@ static void s16ne_to_s16ne(unsigned n, const int16_t *a, int16_t *b) { pa_assert(a); pa_assert(b); - oil_memcpy(b, a, (int) (sizeof(int16_t) * n)); + memcpy(b, a, (int) (sizeof(int16_t) * n)); } static void s16re_to_s16ne(unsigned n, const int16_t *a, int16_t *b) { diff --git a/src/tests/envelope-test.c b/src/tests/envelope-test.c index 3af3044e..9382040b 100644 --- a/src/tests/envelope-test.c +++ b/src/tests/envelope-test.c @@ -34,8 +34,6 @@ #include #include -#include - const pa_envelope_def ramp_down = { .n_points = 2, .points_x = { 100*PA_USEC_PER_MSEC, 300*PA_USEC_PER_MSEC }, @@ -202,7 +200,6 @@ int main(int argc, char *argv[]) { .values = { PA_VOLUME_NORM, PA_VOLUME_NORM/2 } }; - oil_init(); pa_log_set_level(PA_LOG_DEBUG); pa_assert_se(pool = pa_mempool_new(FALSE, 0)); diff --git a/src/tests/mix-test.c b/src/tests/mix-test.c index f9f76da3..457c4acd 100644 --- a/src/tests/mix-test.c +++ b/src/tests/mix-test.c @@ -32,8 +32,6 @@ #include #include -#include - static float swap_float(float a) { uint32_t *b = (uint32_t*) &a; *b = PA_UINT32_SWAP(*b); @@ -211,7 +209,6 @@ int main(int argc, char *argv[]) { pa_sample_spec a; pa_cvolume v; - oil_init(); pa_log_set_level(PA_LOG_DEBUG); pa_assert_se(pool = pa_mempool_new(FALSE, 0)); diff --git a/src/tests/remix-test.c b/src/tests/remix-test.c index 9d110d6b..4990bf93 100644 --- a/src/tests/remix-test.c +++ b/src/tests/remix-test.c @@ -32,8 +32,6 @@ #include #include -#include - int main(int argc, char *argv[]) { static const pa_channel_map maps[] = { @@ -55,7 +53,6 @@ int main(int argc, char *argv[]) { unsigned i, j; pa_mempool *pool; - oil_init(); pa_log_set_level(PA_LOG_DEBUG); pa_assert_se(pool = pa_mempool_new(FALSE, 0)); diff --git a/src/tests/resampler-test.c b/src/tests/resampler-test.c index 7236265a..82198b5e 100644 --- a/src/tests/resampler-test.c +++ b/src/tests/resampler-test.c @@ -32,8 +32,6 @@ #include #include -#include - static void dump_block(const pa_sample_spec *ss, const pa_memchunk *chunk) { void *d; unsigned i; @@ -248,7 +246,6 @@ int main(int argc, char *argv[]) { pa_sample_spec a, b; pa_cvolume v; - oil_init(); pa_log_set_level(PA_LOG_DEBUG); pa_assert_se(pool = pa_mempool_new(FALSE, 0)); -- cgit From 601e5f1867065912e1740e2408a948ca818f6c59 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Mon, 17 Aug 2009 11:35:47 +0200 Subject: resampler: cache integer channel_map Calculate and cache an integer version of the channel map so that we don't have to regenerate it when dealing with s16 samples. --- src/pulsecore/resampler.c | 66 +++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 33 deletions(-) (limited to 'src') diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c index a3c17f8c..e3473ac5 100644 --- a/src/pulsecore/resampler.c +++ b/src/pulsecore/resampler.c @@ -61,7 +61,8 @@ struct pa_resampler { pa_convert_func_t to_work_format_func; pa_convert_func_t from_work_format_func; - float map_table[PA_CHANNELS_MAX][PA_CHANNELS_MAX]; + float map_table_f[PA_CHANNELS_MAX][PA_CHANNELS_MAX]; + int32_t map_table_i[PA_CHANNELS_MAX][PA_CHANNELS_MAX]; pa_bool_t map_required; void (*impl_free)(pa_resampler *r); @@ -587,7 +588,8 @@ static void calc_map_table(pa_resampler *r) { if (!(r->map_required = (r->i_ss.channels != r->o_ss.channels || (!(r->flags & PA_RESAMPLER_NO_REMAP) && !pa_channel_map_equal(&r->i_cm, &r->o_cm))))) return; - memset(r->map_table, 0, sizeof(r->map_table)); + memset(r->map_table_f, 0, sizeof(r->map_table_f)); + memset(r->map_table_i, 0, sizeof(r->map_table_i)); memset(ic_connected, 0, sizeof(ic_connected)); remix = (r->flags & (PA_RESAMPLER_NO_REMAP|PA_RESAMPLER_NO_REMIX)) == 0; @@ -602,7 +604,7 @@ static void calc_map_table(pa_resampler *r) { /* We shall not do any remapping. Hence, just check by index */ if (ic == oc) - r->map_table[oc][ic] = 1.0; + r->map_table_f[oc][ic] = 1.0; continue; } @@ -611,7 +613,7 @@ static void calc_map_table(pa_resampler *r) { /* We shall not do any remixing. Hence, just check by name */ if (a == b) - r->map_table[oc][ic] = 1.0; + r->map_table_f[oc][ic] = 1.0; continue; } @@ -686,7 +688,7 @@ static void calc_map_table(pa_resampler *r) { */ if (a == b || a == PA_CHANNEL_POSITION_MONO || b == PA_CHANNEL_POSITION_MONO) { - r->map_table[oc][ic] = 1.0; + r->map_table_f[oc][ic] = 1.0; oc_connected = TRUE; ic_connected[ic] = TRUE; @@ -711,7 +713,7 @@ static void calc_map_table(pa_resampler *r) { if (n > 0) for (ic = 0; ic < r->i_ss.channels; ic++) if (on_left(r->i_cm.map[ic])) { - r->map_table[oc][ic] = 1.0f / (float) n; + r->map_table_f[oc][ic] = 1.0f / (float) n; ic_connected[ic] = TRUE; } @@ -732,7 +734,7 @@ static void calc_map_table(pa_resampler *r) { if (n > 0) for (ic = 0; ic < r->i_ss.channels; ic++) if (on_right(r->i_cm.map[ic])) { - r->map_table[oc][ic] = 1.0f / (float) n; + r->map_table_f[oc][ic] = 1.0f / (float) n; ic_connected[ic] = TRUE; } @@ -753,7 +755,7 @@ static void calc_map_table(pa_resampler *r) { if (n > 0) { for (ic = 0; ic < r->i_ss.channels; ic++) if (on_center(r->i_cm.map[ic])) { - r->map_table[oc][ic] = 1.0f / (float) n; + r->map_table_f[oc][ic] = 1.0f / (float) n; ic_connected[ic] = TRUE; } } else { @@ -770,7 +772,7 @@ static void calc_map_table(pa_resampler *r) { if (n > 0) for (ic = 0; ic < r->i_ss.channels; ic++) if (on_left(r->i_cm.map[ic]) || on_right(r->i_cm.map[ic])) { - r->map_table[oc][ic] = 1.0f / (float) n; + r->map_table_f[oc][ic] = 1.0f / (float) n; ic_connected[ic] = TRUE; } @@ -787,9 +789,9 @@ static void calc_map_table(pa_resampler *r) { for (ic = 0; ic < r->i_ss.channels; ic++) { if (!(r->flags & PA_RESAMPLER_NO_LFE)) - r->map_table[oc][ic] = 1.0f / (float) r->i_ss.channels; + r->map_table_f[oc][ic] = 1.0f / (float) r->i_ss.channels; else - r->map_table[oc][ic] = 0; + r->map_table_f[oc][ic] = 0; /* Please note that a channel connected to LFE * doesn't really count as connected. */ @@ -836,12 +838,12 @@ static void calc_map_table(pa_resampler *r) { for (ic = 0; ic < r->i_ss.channels; ic++) { if (ic_connected[ic]) { - r->map_table[oc][ic] *= .9f; + r->map_table_f[oc][ic] *= .9f; continue; } if (on_left(r->i_cm.map[ic])) - r->map_table[oc][ic] = .1f / (float) ic_unconnected_left; + r->map_table_f[oc][ic] = .1f / (float) ic_unconnected_left; } } } @@ -861,12 +863,12 @@ static void calc_map_table(pa_resampler *r) { for (ic = 0; ic < r->i_ss.channels; ic++) { if (ic_connected[ic]) { - r->map_table[oc][ic] *= .9f; + r->map_table_f[oc][ic] *= .9f; continue; } if (on_right(r->i_cm.map[ic])) - r->map_table[oc][ic] = .1f / (float) ic_unconnected_right; + r->map_table_f[oc][ic] = .1f / (float) ic_unconnected_right; } } } @@ -887,12 +889,12 @@ static void calc_map_table(pa_resampler *r) { for (ic = 0; ic < r->i_ss.channels; ic++) { if (ic_connected[ic]) { - r->map_table[oc][ic] *= .9f; + r->map_table_f[oc][ic] *= .9f; continue; } if (on_center(r->i_cm.map[ic])) { - r->map_table[oc][ic] = .1f / (float) ic_unconnected_center; + r->map_table_f[oc][ic] = .1f / (float) ic_unconnected_center; mixed_in = TRUE; } } @@ -950,7 +952,7 @@ static void calc_map_table(pa_resampler *r) { for (ic = 0; ic < r->i_ss.channels; ic++) { if (ic_connected[ic]) { - r->map_table[oc][ic] *= .75f; + r->map_table_f[oc][ic] *= .75f; continue; } @@ -958,7 +960,7 @@ static void calc_map_table(pa_resampler *r) { continue; if (!found_frs[ic] || front_rear_side(r->i_cm.map[ic]) == front_rear_side(r->o_cm.map[oc])) - r->map_table[oc][ic] = .375f / (float) ncenter[oc]; + r->map_table_f[oc][ic] = .375f / (float) ncenter[oc]; } } } @@ -975,11 +977,14 @@ static void calc_map_table(pa_resampler *r) { continue; for (oc = 0; oc < r->o_ss.channels; oc++) - r->map_table[oc][ic] = 0.375f / (float) ic_unconnected_lfe; + r->map_table_f[oc][ic] = 0.375f / (float) ic_unconnected_lfe; } } } - + /* make an 16:16 int version of the matrix */ + for (oc = 0; oc < r->o_ss.channels; oc++) + for (ic = 0; ic < r->i_ss.channels; ic++) + r->map_table_i[oc][ic] = (int32_t) (r->map_table_f[oc][ic] * 0x10000); s = pa_strbuf_new(); @@ -996,7 +1001,7 @@ static void calc_map_table(pa_resampler *r) { pa_strbuf_printf(s, "O%02u |", oc); for (ic = 0; ic < r->i_ss.channels; ic++) - pa_strbuf_printf(s, " %1.3f", r->map_table[oc][ic]); + pa_strbuf_printf(s, " %1.3f", r->map_table_f[oc][ic]); pa_strbuf_puts(s, "\n"); } @@ -1071,11 +1076,7 @@ static void vectoradd_s16( static void vectoradd_s16_with_fraction( int16_t *d, int dstr, const int16_t *s, int sstr, - int n, float s4) { - - int32_t i4; - - i4 = (int32_t) (s4 * 0x10000); + int n, int32_t i4) { for (; n > 0; n--) { *d = (int16_t) (*d + (((int32_t)*s * i4) >> 16)); @@ -1128,18 +1129,17 @@ static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) { for (oc = 0; oc < r->o_ss.channels; oc++) { unsigned ic; - static const float one = 1.0; for (ic = 0; ic < r->i_ss.channels; ic++) { - if (r->map_table[oc][ic] <= 0.0) + if (r->map_table_f[oc][ic] <= 0.0) continue; vectoradd_f32( (float*) dst + oc, o_skip, (float*) src + ic, i_skip, (int) n_frames, - r->map_table[oc][ic]); + r->map_table_f[oc][ic]); } } @@ -1152,10 +1152,10 @@ static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) { for (ic = 0; ic < r->i_ss.channels; ic++) { - if (r->map_table[oc][ic] <= 0.0) + if (r->map_table_f[oc][ic] <= 0.0) continue; - if (r->map_table[oc][ic] >= 1.0) { + if (r->map_table_f[oc][ic] >= 1.0) { vectoradd_s16( (int16_t*) dst + oc, o_skip, @@ -1168,7 +1168,7 @@ static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) { (int16_t*) dst + oc, o_skip, (int16_t*) src + ic, i_skip, (int) n_frames, - r->map_table[oc][ic]); + r->map_table_i[oc][ic]); } } -- cgit From a98fa950d2f04e2ba4d4a470296a081e1050f76d Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 19 Aug 2009 15:56:44 +0200 Subject: svolume: remove unneeded compare We don't need the compare because the sub operation already set the right flags for us. --- src/pulsecore/svolume_mmx.c | 1 - src/pulsecore/svolume_sse.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'src') diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c index ad539278..5243b447 100644 --- a/src/pulsecore/svolume_mmx.c +++ b/src/pulsecore/svolume_mmx.c @@ -72,7 +72,6 @@ " add "#a", %3 \n\t" \ " mov %3, %4 \n\t" \ " sub "#b", %4 \n\t" \ - " cmp "#b", %3 \n\t" \ " cmovae %4, %3 \n\t" /* swap 16 bits */ diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c index b0a6e0dd..98f828c0 100644 --- a/src/pulsecore/svolume_sse.c +++ b/src/pulsecore/svolume_sse.c @@ -52,8 +52,7 @@ " add "#a", %3 \n\t" /* channel += inc */ \ " mov %3, %4 \n\t" \ " sub "#b", %4 \n\t" /* tmp = channel - channels */ \ - " cmp "#b", %3 \n\t" /* if (channel >= channels) */ \ - " cmovae %4, %3 \n\t" /* channel = tmp */ + " cmovae %4, %3 \n\t" /* if (tmp >= 0) channel = tmp */ /* swap 16 bits */ #define SWAP_16(s) \ -- cgit From 951bf1b28d25a93b99cbe074a46b8313a9e5f9f0 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 19 Aug 2009 16:09:14 +0200 Subject: svolume: add ARM optimized volume scaling --- src/Makefile.am | 4 +- src/daemon/main.c | 2 + src/pulsecore/cpu-arm.c | 43 ++++++++++ src/pulsecore/cpu-arm.h | 39 +++++++++ src/pulsecore/svolume_arm.c | 195 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 281 insertions(+), 2 deletions(-) create mode 100644 src/pulsecore/cpu-arm.c create mode 100644 src/pulsecore/cpu-arm.h create mode 100644 src/pulsecore/svolume_arm.c (limited to 'src') diff --git a/src/Makefile.am b/src/Makefile.am index 4e90d793..eca68b16 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -825,8 +825,8 @@ libpulsecore_@PA_MAJORMINORMICRO@_la_SOURCES = \ pulsecore/resampler.c pulsecore/resampler.h \ pulsecore/rtpoll.c pulsecore/rtpoll.h \ pulsecore/sample-util.c pulsecore/sample-util.h \ - pulsecore/cpu-x86.c \ - pulsecore/svolume_c.c \ + pulsecore/cpu-arm.c pulsecore/cpu-x86.c \ + pulsecore/svolume_c.c pulsecore/svolume_arm.c\ pulsecore/svolume_mmx.c pulsecore/svolume_sse.c \ pulsecore/sconv-s16be.c pulsecore/sconv-s16be.h \ pulsecore/sconv-s16le.c pulsecore/sconv-s16le.h \ diff --git a/src/daemon/main.c b/src/daemon/main.c index 31e434d9..ec8ff400 100644 --- a/src/daemon/main.c +++ b/src/daemon/main.c @@ -93,6 +93,7 @@ #ifdef HAVE_DBUS #include #endif +#include #include #include "cmdline.h" @@ -821,6 +822,7 @@ int main(int argc, char *argv[]) { pa_memtrap_install(); pa_cpu_init_x86(); + pa_cpu_init_arm(); pa_assert_se(mainloop = pa_mainloop_new()); diff --git a/src/pulsecore/cpu-arm.c b/src/pulsecore/cpu-arm.c new file mode 100644 index 00000000..75646fe4 --- /dev/null +++ b/src/pulsecore/cpu-arm.c @@ -0,0 +1,43 @@ +/*** + This file is part of PulseAudio. + + Copyright 2004-2006 Lennart Poettering + Copyright 2009 Wim Taymans + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of the License, + or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with PulseAudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include + +#include "cpu-arm.h" + +static pa_cpu_arm_flag_t pa_cpu_arm_flags; + +void pa_cpu_init_arm (void) { +#if defined (__arm__) + pa_cpu_arm_flags = 0; + + pa_log ("ARM init\n"); + + pa_volume_func_init_arm (pa_cpu_arm_flags); +#endif /* defined (__arm__) */ +} diff --git a/src/pulsecore/cpu-arm.h b/src/pulsecore/cpu-arm.h new file mode 100644 index 00000000..1a0ac273 --- /dev/null +++ b/src/pulsecore/cpu-arm.h @@ -0,0 +1,39 @@ +#ifndef foocpuarmhfoo +#define foocpuarmhfoo + +/*** + This file is part of PulseAudio. + + Copyright 2004-2006 Lennart Poettering + Copyright 2009 Wim Taymans + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of the License, + or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with PulseAudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#include + +typedef enum pa_cpu_arm_flag { + PA_CPU_ARM_V6 = (1 << 0), + PA_CPU_ARM_NEON = (1 << 1), + PA_CPU_ARM_VFP = (1 << 2) +} pa_cpu_arm_flag_t; + +void pa_cpu_init_arm (void); + +/* some optimized functions */ +void pa_volume_func_init_arm(pa_cpu_arm_flag_t flags); + +#endif /* foocpuarmhfoo */ diff --git a/src/pulsecore/svolume_arm.c b/src/pulsecore/svolume_arm.c new file mode 100644 index 00000000..7e25a13c --- /dev/null +++ b/src/pulsecore/svolume_arm.c @@ -0,0 +1,195 @@ +/*** + This file is part of PulseAudio. + + Copyright 2004-2006 Lennart Poettering + Copyright 2009 Wim Taymans + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of the License, + or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with PulseAudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include + +#include "cpu-arm.h" + +#include "sample-util.h" +#include "endianmacros.h" + +#if defined (__arm__) + +#define MOD_INC() \ + " subs r0, r6, %2 \n\t" \ + " addcs r0, %1 \n\t" \ + " movcs r6, r0 \n\t" + +static void +pa_volume_s16ne_arm (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) +{ + int32_t *ve; + + channels = MAX (4, channels); + ve = volumes + channels; + + __asm__ __volatile__ ( + " mov r6, %1 \n\t" + " mov %3, %3, LSR #1 \n\t" /* length /= sizeof (int16_t) */ + " tst %3, #1 \n\t" /* check for odd samples */ + " beq 2f \n\t" + + "1: \n\t" + " ldr r0, [r6], #4 \n\t" /* odd samples volumes */ + " ldrh r2, [%0] \n\t" + + " smulwb r0, r0, r2 \n\t" + " ssat r0, #16, r0 \n\t" + + " strh r0, [%0], #2 \n\t" + + MOD_INC() + + "2: \n\t" + " mov %3, %3, LSR #1 \n\t" + " tst %3, #1 \n\t" /* check for odd samples */ + " beq 4f \n\t" + + "3: \n\t" + " ldrd r2, [r6], #8 \n\t" /* 2 samples at a time */ + " ldr r0, [%0] \n\t" + + " smulwt r2, r2, r0 \n\t" + " smulwb r3, r3, r0 \n\t" + + " ssat r2, #16, r2 \n\t" + " ssat r3, #16, r3 \n\t" + + " pkhbt r0, r3, r2, LSL #16 \n\t" + " str r0, [%0], #4 \n\t" + + MOD_INC() + + "4: \n\t" + " movs %3, %3, LSR #1 \n\t" + " beq 6f \n\t" + + "5: \n\t" + " ldrd r2, [r6], #8 \n\t" /* 4 samples at a time */ + " ldrd r4, [r6], #8 \n\t" + " ldrd r0, [%0] \n\t" + + " smulwt r2, r2, r0 \n\t" + " smulwb r3, r3, r0 \n\t" + " smulwt r4, r4, r1 \n\t" + " smulwb r5, r5, r1 \n\t" + + " ssat r2, #16, r2 \n\t" + " ssat r3, #16, r3 \n\t" + " ssat r4, #16, r4 \n\t" + " ssat r5, #16, r5 \n\t" + + " pkhbt r0, r3, r2, LSL #16 \n\t" + " pkhbt r1, r5, r4, LSL #16 \n\t" + " strd r0, [%0], #8 \n\t" + + MOD_INC() + + " subs %3, %3, #1 \n\t" + " bne 5b \n\t" + "6: \n\t" + + : "+r" (samples), "+r" (volumes), "+r" (ve), "+r" (length) + : + : "r6", "r5", "r4", "r3", "r2", "r1", "r0", "cc" + ); +} + +#undef RUN_TEST + +#ifdef RUN_TEST +#define CHANNELS 2 +#define SAMPLES 1023 +#define TIMES 1000 +#define PADDING 16 + +static void run_test (void) { + int16_t samples[SAMPLES]; + int16_t samples_ref[SAMPLES]; + int16_t samples_orig[SAMPLES]; + int32_t volumes[CHANNELS + PADDING]; + int i, j, padding; + pa_do_volume_func_t func; + struct timeval start, stop; + + func = pa_get_volume_func (PA_SAMPLE_S16NE); + + printf ("checking ARM %zd\n", sizeof (samples)); + + pa_random (samples, sizeof (samples)); + memcpy (samples_ref, samples, sizeof (samples)); + memcpy (samples_orig, samples, sizeof (samples)); + + for (i = 0; i < CHANNELS; i++) + volumes[i] = rand() >> 1; + for (padding = 0; padding < PADDING; padding++, i++) + volumes[i] = volumes[padding]; + + func (samples_ref, volumes, CHANNELS, sizeof (samples)); + pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples)); + for (i = 0; i < SAMPLES; i++) { + if (samples[i] != samples_ref[i]) { + printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], + samples_orig[i], volumes[i % CHANNELS]); + } + } + + pa_gettimeofday(&start); + for (j = 0; j < TIMES; j++) { + memcpy (samples, samples_orig, sizeof (samples)); + pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples)); + } + pa_gettimeofday(&stop); + pa_log_info("ARM: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); + + pa_gettimeofday(&start); + for (j = 0; j < TIMES; j++) { + memcpy (samples_ref, samples_orig, sizeof (samples)); + func (samples_ref, volumes, CHANNELS, sizeof (samples)); + } + pa_gettimeofday(&stop); + pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); +} +#endif + +#endif /* defined (__arm__) */ + + +void pa_volume_func_init_arm (pa_cpu_arm_flag_t flags) { +#if defined (__arm__) + pa_log_info("Initialising ARM optimized functions."); + +#ifdef RUN_TEST + run_test (); +#endif + + pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_arm); +#endif /* defined (__arm__) */ +} -- cgit From bd49d43bd387758f151c56b7ed1643ecb72c0258 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 19 Aug 2009 16:09:48 +0200 Subject: svolume: add CPU guards around code Mark code that should only be compiled on x86 CPUs with proper defines. --- src/pulsecore/svolume_mmx.c | 6 ++++++ src/pulsecore/svolume_sse.c | 5 +++++ 2 files changed, 11 insertions(+) (limited to 'src') diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c index 5243b447..fb4c82c6 100644 --- a/src/pulsecore/svolume_mmx.c +++ b/src/pulsecore/svolume_mmx.c @@ -35,6 +35,7 @@ #include "sample-util.h" #include "endianmacros.h" +#if defined (__i386__) || defined (__amd64__) /* in s: 2 int16_t samples * in v: 2 int32_t volumes, fixed point 16:16 * out s: contains scaled and clamped int16_t samples. @@ -295,7 +296,11 @@ static void run_test (void) { } #endif +#endif /* defined (__i386__) || defined (__amd64__) */ + + void pa_volume_func_init_mmx (pa_cpu_x86_flag_t flags) { +#if defined (__i386__) || defined (__amd64__) pa_log_info("Initialising MMX optimized functions."); #ifdef RUN_TEST @@ -304,4 +309,5 @@ void pa_volume_func_init_mmx (pa_cpu_x86_flag_t flags) { pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx); pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_mmx); +#endif /* defined (__i386__) || defined (__amd64__) */ } diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c index 98f828c0..141c466e 100644 --- a/src/pulsecore/svolume_sse.c +++ b/src/pulsecore/svolume_sse.c @@ -35,6 +35,8 @@ #include "sample-util.h" #include "endianmacros.h" +#if defined (__i386__) || defined (__amd64__) + #define VOLUME_32x16(s,v) /* .. | vh | vl | */ \ " pxor %%xmm4, %%xmm4 \n\t" /* .. | 0 | 0 | */ \ " punpcklwd %%xmm4, "#s" \n\t" /* .. | 0 | p0 | */ \ @@ -293,8 +295,10 @@ static void run_test (void) { pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); } #endif +#endif /* defined (__i386__) || defined (__amd64__) */ void pa_volume_func_init_sse (pa_cpu_x86_flag_t flags) { +#if defined (__i386__) || defined (__amd64__) pa_log_info("Initialising SSE optimized functions."); #ifdef RUN_TEST @@ -303,4 +307,5 @@ void pa_volume_func_init_sse (pa_cpu_x86_flag_t flags) { pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_sse); pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_sse); +#endif /* defined (__i386__) || defined (__amd64__) */ } -- cgit From b4e9942c2f3929b4baf4b53b0561102af7845269 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 19 Aug 2009 16:15:18 +0200 Subject: resample: refactor the channel remapping a little Factor out the channel remap matrix code into a separate function. Keep a pointer to the channel remapping function so we can install custom functions. Catch the common mono->stereo remapping case and install a custom, more optimized function. --- src/pulsecore/resampler.c | 197 +++++++++++++++++++++++++--------------------- 1 file changed, 109 insertions(+), 88 deletions(-) (limited to 'src') diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c index e3473ac5..4fb03ce7 100644 --- a/src/pulsecore/resampler.c +++ b/src/pulsecore/resampler.c @@ -44,6 +44,11 @@ /* Number of samples of extra space we allow the resamplers to return */ #define EXTRA_FRAMES 128 +typedef void (*pa_do_remap_func_t) (pa_resampler *r, void *d, const void *s, unsigned n); + +static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, unsigned n); +static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, unsigned n); + struct pa_resampler { pa_resample_method_t method; pa_resample_flags_t flags; @@ -64,6 +69,7 @@ struct pa_resampler { float map_table_f[PA_CHANNELS_MAX][PA_CHANNELS_MAX]; int32_t map_table_i[PA_CHANNELS_MAX][PA_CHANNELS_MAX]; pa_bool_t map_required; + pa_do_remap_func_t do_remap; void (*impl_free)(pa_resampler *r); void (*impl_update_rates)(pa_resampler *r); @@ -1008,6 +1014,17 @@ static void calc_map_table(pa_resampler *r) { pa_log_debug("Channel matrix:\n%s", t = pa_strbuf_tostring_free(s)); pa_xfree(t); + + /* find some common channel remappings, fall back to full matrix operation. */ + if (r->i_ss.channels == 1 && r->o_ss.channels == 2 && + r->map_table_i[0][0] == 1.0 && r->map_table_i[1][0] == 1.0) { + r->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo;; + pa_log_debug("Using mono to stereo remapping"); + } else { + r->do_remap = (pa_do_remap_func_t) remap_channels_matrix; + pa_log_debug("Using generic matrix remapping"); + } + } static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input) { @@ -1047,49 +1064,111 @@ static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input) return &r->buf1; } -static void vectoradd_f32( - float *d, int dstr, - const float *s, int sstr, - int n, float s4) { +static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, unsigned n) { + + switch (r->work_format) { + case PA_SAMPLE_FLOAT32NE: + { + float *d, *s; + + d = (float *) dst; + s = (float *) src; + + for (; n > 0; n--) { + *d++ = *s; + *d++ = *s++; + } + break; + } + case PA_SAMPLE_S16NE: + { + int16_t *d, *s; - for (; n > 0; n--) { - *d = (float) (*d + (s4 * *s)); + d = (int16_t *) dst; + s = (int16_t *) src; - s = (const float*) ((const uint8_t*) s + sstr); - d = (float*) ((uint8_t*) d + dstr); + for (; n > 0; n--) { + *d++ = *s; + *d++ = *s++; + } + break; + } + default: + pa_assert_not_reached(); } } -static void vectoradd_s16( - int16_t *d, int dstr, - const int16_t *s, int sstr, - int n) { +static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, unsigned n) { + unsigned oc; + unsigned n_ic, n_oc; - for (; n > 0; n--) { - *d = (int16_t) (*d + *s); + n_ic = r->i_ss.channels; + n_oc = r->o_ss.channels; - s = (const int16_t*) ((const uint8_t*) s + sstr); - d = (int16_t*) ((uint8_t*) d + dstr); - } -} + memset(dst, 0, r->buf2.length); -static void vectoradd_s16_with_fraction( - int16_t *d, int dstr, - const int16_t *s, int sstr, - int n, int32_t i4) { + switch (r->work_format) { + case PA_SAMPLE_FLOAT32NE: + { + float *d, *s; - for (; n > 0; n--) { - *d = (int16_t) (*d + (((int32_t)*s * i4) >> 16)); + for (oc = 0; oc < n_oc; oc++) { + unsigned ic; - s = (const int16_t*) ((const uint8_t*) s + sstr); - d = (int16_t*) ((uint8_t*) d + dstr); + for (ic = 0; ic < n_ic; ic++) { + float vol; + + vol = r->map_table_f[oc][ic]; + + if (vol <= 0.0) + continue; + + d = (float *)dst + oc; + s = (float *)src + ic; + + for (; n > 0; n--, s += n_ic, d += n_oc) + *d += *s * vol; + } + } + + break; + } + case PA_SAMPLE_S16NE: + { + int16_t *d, *s; + + for (oc = 0; oc < n_oc; oc++) { + unsigned ic; + + for (ic = 0; ic < n_ic; ic++) { + int32_t vol; + + vol = r->map_table_i[oc][ic]; + + if (vol <= 0) + continue; + + d = (int16_t *)dst + oc; + s = (int16_t *)src + ic; + + if (vol >= 0x10000) { + for (; n > 0; n--, s += n_ic, d += n_oc) + *d += *s; + } else { + for (; n > 0; n--, s += n_ic, d += n_oc) + *d = (int16_t) (*d + (((int32_t)*s * vol) >> 16)); + } + } + } + break; + } + default: + pa_assert_not_reached(); } } static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) { unsigned in_n_samples, out_n_samples, n_frames; - int i_skip, o_skip; - unsigned oc; void *src, *dst; pa_assert(r); @@ -1119,70 +1198,12 @@ static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) { src = ((uint8_t*) pa_memblock_acquire(input->memblock) + input->index); dst = pa_memblock_acquire(r->buf2.memblock); - memset(dst, 0, r->buf2.length); - - o_skip = (int) (r->w_sz * r->o_ss.channels); - i_skip = (int) (r->w_sz * r->i_ss.channels); - - switch (r->work_format) { - case PA_SAMPLE_FLOAT32NE: - - for (oc = 0; oc < r->o_ss.channels; oc++) { - unsigned ic; - - for (ic = 0; ic < r->i_ss.channels; ic++) { - - if (r->map_table_f[oc][ic] <= 0.0) - continue; - - vectoradd_f32( - (float*) dst + oc, o_skip, - (float*) src + ic, i_skip, - (int) n_frames, - r->map_table_f[oc][ic]); - } - } - - break; - - case PA_SAMPLE_S16NE: - - for (oc = 0; oc < r->o_ss.channels; oc++) { - unsigned ic; - - for (ic = 0; ic < r->i_ss.channels; ic++) { - - if (r->map_table_f[oc][ic] <= 0.0) - continue; - - if (r->map_table_f[oc][ic] >= 1.0) { - - vectoradd_s16( - (int16_t*) dst + oc, o_skip, - (int16_t*) src + ic, i_skip, - (int) n_frames); - - } else - - vectoradd_s16_with_fraction( - (int16_t*) dst + oc, o_skip, - (int16_t*) src + ic, i_skip, - (int) n_frames, - r->map_table_i[oc][ic]); - } - } - - break; - - default: - pa_assert_not_reached(); - } + pa_assert (r->do_remap); + r->do_remap (r, dst, src, n_frames); pa_memblock_release(input->memblock); pa_memblock_release(r->buf2.memblock); - r->buf2.length = out_n_samples * r->w_sz; - return &r->buf2; } -- cgit From d04a6e935f8352a4ffd93cb1aeddac8f605a099a Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 19 Aug 2009 16:23:55 +0200 Subject: resample: fix counters --- src/pulsecore/resampler.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c index 4fb03ce7..cc57b54e 100644 --- a/src/pulsecore/resampler.c +++ b/src/pulsecore/resampler.c @@ -1099,7 +1099,7 @@ static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, un } static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, unsigned n) { - unsigned oc; + unsigned oc, i; unsigned n_ic, n_oc; n_ic = r->i_ss.channels; @@ -1126,7 +1126,7 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, d = (float *)dst + oc; s = (float *)src + ic; - for (; n > 0; n--, s += n_ic, d += n_oc) + for (i = n; i > 0; i--, s += n_ic, d += n_oc) *d += *s * vol; } } @@ -1152,10 +1152,10 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, s = (int16_t *)src + ic; if (vol >= 0x10000) { - for (; n > 0; n--, s += n_ic, d += n_oc) + for (i = n; i > 0; i--, s += n_ic, d += n_oc) *d += *s; } else { - for (; n > 0; n--, s += n_ic, d += n_oc) + for (i = n; i > 0; i--, s += n_ic, d += n_oc) *d = (int16_t) (*d + (((int32_t)*s * vol) >> 16)); } } -- cgit From 548b735ccf8474ebe60137375cdda4e58582efc3 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 19 Aug 2009 17:24:23 +0200 Subject: resampler: fix identity check Fix the identity matrix check for mono to stereo. Help the compiler generate better code for the C implementation of the channel remapping code. --- src/pulsecore/resampler.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c index cc57b54e..2256516e 100644 --- a/src/pulsecore/resampler.c +++ b/src/pulsecore/resampler.c @@ -1017,7 +1017,7 @@ static void calc_map_table(pa_resampler *r) { /* find some common channel remappings, fall back to full matrix operation. */ if (r->i_ss.channels == 1 && r->o_ss.channels == 2 && - r->map_table_i[0][0] == 1.0 && r->map_table_i[1][0] == 1.0) { + r->map_table_f[0][0] >= 1.0 && r->map_table_f[1][0] >= 1.0) { r->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo;; pa_log_debug("Using mono to stereo remapping"); } else { @@ -1074,10 +1074,8 @@ static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, un d = (float *) dst; s = (float *) src; - for (; n > 0; n--) { - *d++ = *s; - *d++ = *s++; - } + for (; n > 0; n--, s++, d += 2) + d[0] = d[1] = *s; break; } case PA_SAMPLE_S16NE: @@ -1087,10 +1085,8 @@ static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, un d = (int16_t *) dst; s = (int16_t *) src; - for (; n > 0; n--) { - *d++ = *s; - *d++ = *s++; - } + for (; n > 0; n--, s++, d += 2) + d[0] = d[1] = *s; break; } default: @@ -1156,7 +1152,7 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, *d += *s; } else { for (i = n; i > 0; i--, s += n_ic, d += n_oc) - *d = (int16_t) (*d + (((int32_t)*s * vol) >> 16)); + *d += (int16_t) (((int32_t)*s * vol) >> 16); } } } -- cgit From d2389ef96e21825bb4e945f6c71b5bd27c5fa2b4 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 19 Aug 2009 17:27:17 +0200 Subject: sample: manually inline table lookups Manually inline some table lookups to avoid excessive calls to pa_sample_spec_valid(). --- src/pulse/sample.c | 49 ++++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/pulse/sample.c b/src/pulse/sample.c index d5d38eda..9698d8a5 100644 --- a/src/pulse/sample.c +++ b/src/pulse/sample.c @@ -36,28 +36,27 @@ #include "sample.h" -size_t pa_sample_size_of_format(pa_sample_format_t f) { - - static const size_t table[] = { - [PA_SAMPLE_U8] = 1, - [PA_SAMPLE_ULAW] = 1, - [PA_SAMPLE_ALAW] = 1, - [PA_SAMPLE_S16LE] = 2, - [PA_SAMPLE_S16BE] = 2, - [PA_SAMPLE_FLOAT32LE] = 4, - [PA_SAMPLE_FLOAT32BE] = 4, - [PA_SAMPLE_S32LE] = 4, - [PA_SAMPLE_S32BE] = 4, - [PA_SAMPLE_S24LE] = 3, - [PA_SAMPLE_S24BE] = 3, - [PA_SAMPLE_S24_32LE] = 4, - [PA_SAMPLE_S24_32BE] = 4 - }; +static const size_t size_table[] = { + [PA_SAMPLE_U8] = 1, + [PA_SAMPLE_ULAW] = 1, + [PA_SAMPLE_ALAW] = 1, + [PA_SAMPLE_S16LE] = 2, + [PA_SAMPLE_S16BE] = 2, + [PA_SAMPLE_FLOAT32LE] = 4, + [PA_SAMPLE_FLOAT32BE] = 4, + [PA_SAMPLE_S32LE] = 4, + [PA_SAMPLE_S32BE] = 4, + [PA_SAMPLE_S24LE] = 3, + [PA_SAMPLE_S24BE] = 3, + [PA_SAMPLE_S24_32LE] = 4, + [PA_SAMPLE_S24_32BE] = 4 +}; +size_t pa_sample_size_of_format(pa_sample_format_t f) { pa_assert(f >= 0); pa_assert(f < PA_SAMPLE_MAX); - return table[f]; + return size_table[f]; } size_t pa_sample_size(const pa_sample_spec *spec) { @@ -65,35 +64,35 @@ size_t pa_sample_size(const pa_sample_spec *spec) { pa_assert(spec); pa_return_val_if_fail(pa_sample_spec_valid(spec), 0); - return pa_sample_size_of_format(spec->format); + return size_table[spec->format]; } size_t pa_frame_size(const pa_sample_spec *spec) { pa_assert(spec); pa_return_val_if_fail(pa_sample_spec_valid(spec), 0); - return pa_sample_size(spec) * spec->channels; + return size_table[spec->format] * spec->channels; } size_t pa_bytes_per_second(const pa_sample_spec *spec) { pa_assert(spec); pa_return_val_if_fail(pa_sample_spec_valid(spec), 0); - return spec->rate*pa_frame_size(spec); + return spec->rate * size_table[spec->format] * spec->channels; } pa_usec_t pa_bytes_to_usec(uint64_t length, const pa_sample_spec *spec) { pa_assert(spec); pa_return_val_if_fail(pa_sample_spec_valid(spec), 0); - return (((pa_usec_t) (length / pa_frame_size(spec)) * PA_USEC_PER_SEC) / spec->rate); + return (((pa_usec_t) (length / (size_table[spec->format] * spec->channels)) * PA_USEC_PER_SEC) / spec->rate); } size_t pa_usec_to_bytes(pa_usec_t t, const pa_sample_spec *spec) { pa_assert(spec); pa_return_val_if_fail(pa_sample_spec_valid(spec), 0); - return (size_t) (((t * spec->rate) / PA_USEC_PER_SEC)) * pa_frame_size(spec); + return (size_t) (((t * spec->rate) / PA_USEC_PER_SEC)) * (size_table[spec->format] * spec->channels); } pa_sample_spec* pa_sample_spec_init(pa_sample_spec *spec) { @@ -109,12 +108,12 @@ pa_sample_spec* pa_sample_spec_init(pa_sample_spec *spec) { int pa_sample_spec_valid(const pa_sample_spec *spec) { pa_assert(spec); - if (spec->rate <= 0 || + if (PA_UNLIKELY (spec->rate <= 0 || spec->rate > PA_RATE_MAX || spec->channels <= 0 || spec->channels > PA_CHANNELS_MAX || spec->format >= PA_SAMPLE_MAX || - spec->format < 0) + spec->format < 0)) return 0; return 1; -- cgit From 370016c0e73236830513a9ea9c16366c15bd30a2 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 19 Aug 2009 18:52:28 +0200 Subject: svolume: fix compilation in 32bits --- src/pulsecore/svolume_mmx.c | 12 ++++++------ src/pulsecore/svolume_sse.c | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c index fb4c82c6..86af76d3 100644 --- a/src/pulsecore/svolume_mmx.c +++ b/src/pulsecore/svolume_mmx.c @@ -114,11 +114,11 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi " je 2f \n\t" " movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */ - " movw (%0), %4 \n\t" /* .. | p0 | */ + " movw (%0), %w4 \n\t" /* .. | p0 | */ " movd %4, %%mm1 \n\t" VOLUME_32x16 (%%mm1, %%mm0) " movd %%mm0, %4 \n\t" /* .. | p0*v0 | */ - " movw %4, (%0) \n\t" + " movw %w4, (%0) \n\t" " add $2, %0 \n\t" MOD_ADD ($1, %5) @@ -184,13 +184,13 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi " je 2f \n\t" " movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */ - " movw (%0), %4 \n\t" /* .. | p0 | */ - " rorw $8, %4 \n\t" + " movw (%0), %w4 \n\t" /* .. | p0 | */ + " rorw $8, %w4 \n\t" " movd %4, %%mm1 \n\t" VOLUME_32x16 (%%mm1, %%mm0) " movd %%mm0, %4 \n\t" /* .. | p0*v0 | */ - " rorw $8, %4 \n\t" - " movw %4, (%0) \n\t" + " rorw $8, %w4 \n\t" + " movw %w4, (%0) \n\t" " add $2, %0 \n\t" MOD_ADD ($1, %5) diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c index 141c466e..0054d301 100644 --- a/src/pulsecore/svolume_sse.c +++ b/src/pulsecore/svolume_sse.c @@ -91,11 +91,11 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi " je 2f \n\t" " movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */ - " movw (%0), %4 \n\t" /* .. | p0 | */ + " movw (%0), %w4 \n\t" /* .. | p0 | */ " movd %4, %%xmm1 \n\t" VOLUME_32x16 (%%xmm1, %%xmm0) " movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */ - " movw %4, (%0) \n\t" + " movw %w4, (%0) \n\t" " add $2, %0 \n\t" MOD_ADD ($1, %5) @@ -168,13 +168,13 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi " je 2f \n\t" " movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */ - " movw (%0), %4 \n\t" /* .. | p0 | */ - " rorw $8, %4 \n\t" + " movw (%0), %w4 \n\t" /* .. | p0 | */ + " rorw $8, %w4 \n\t" " movd %4, %%xmm1 \n\t" VOLUME_32x16 (%%xmm1, %%xmm0) " movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */ - " rorw $8, %4 \n\t" - " movw %4, (%0) \n\t" + " rorw $8, %w4 \n\t" + " movw %w4, (%0) \n\t" " add $2, %0 \n\t" MOD_ADD ($1, %5) -- cgit From 078bde1b49a11f6c76e47fea19f9d920a45ce3f1 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 19 Aug 2009 19:50:42 +0200 Subject: x86: keep the cpu flags local --- src/pulsecore/cpu-x86.c | 57 +++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 30 deletions(-) (limited to 'src') diff --git a/src/pulsecore/cpu-x86.c b/src/pulsecore/cpu-x86.c index 2da31c92..453ecf5b 100644 --- a/src/pulsecore/cpu-x86.c +++ b/src/pulsecore/cpu-x86.c @@ -45,12 +45,11 @@ get_cpuid (uint32_t op, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) } #endif -static pa_cpu_x86_flag_t pa_cpu_x86_flags; - void pa_cpu_init_x86 (void) { #if defined (__i386__) || defined (__amd64__) uint32_t eax, ebx, ecx, edx; uint32_t level; + pa_cpu_x86_flag_t flags = 0; /* get standard level */ get_cpuid (0x00000000, &level, &ebx, &ecx, &edx); @@ -58,25 +57,25 @@ void pa_cpu_init_x86 (void) { get_cpuid (0x00000001, &eax, &ebx, &ecx, &edx); if (edx & (1<<23)) - pa_cpu_x86_flags |= PA_CPU_X86_MMX; + flags |= PA_CPU_X86_MMX; if (edx & (1<<25)) - pa_cpu_x86_flags |= PA_CPU_X86_SSE; + flags |= PA_CPU_X86_SSE; if (edx & (1<<26)) - pa_cpu_x86_flags |= PA_CPU_X86_SSE2; + flags |= PA_CPU_X86_SSE2; if (ecx & (1<<0)) - pa_cpu_x86_flags |= PA_CPU_X86_SSE3; + flags |= PA_CPU_X86_SSE3; if (ecx & (1<<9)) - pa_cpu_x86_flags |= PA_CPU_X86_SSSE3; + flags |= PA_CPU_X86_SSSE3; if (ecx & (1<<19)) - pa_cpu_x86_flags |= PA_CPU_X86_SSE4_1; + flags |= PA_CPU_X86_SSE4_1; if (ecx & (1<<20)) - pa_cpu_x86_flags |= PA_CPU_X86_SSE4_2; + flags |= PA_CPU_X86_SSE4_2; } /* get extended level */ @@ -85,38 +84,36 @@ void pa_cpu_init_x86 (void) { get_cpuid (0x80000001, &eax, &ebx, &ecx, &edx); if (edx & (1<<22)) - pa_cpu_x86_flags |= PA_CPU_X86_MMXEXT; + flags |= PA_CPU_X86_MMXEXT; if (edx & (1<<23)) - pa_cpu_x86_flags |= PA_CPU_X86_MMX; + flags |= PA_CPU_X86_MMX; if (edx & (1<<30)) - pa_cpu_x86_flags |= PA_CPU_X86_3DNOWEXT; + flags |= PA_CPU_X86_3DNOWEXT; if (edx & (1<<31)) - pa_cpu_x86_flags |= PA_CPU_X86_3DNOW; + flags |= PA_CPU_X86_3DNOW; } pa_log_info ("CPU flags: %s%s%s%s%s%s%s%s%s%s", - (pa_cpu_x86_flags & PA_CPU_X86_MMX) ? "MMX " : "", - (pa_cpu_x86_flags & PA_CPU_X86_SSE) ? "SSE " : "", - (pa_cpu_x86_flags & PA_CPU_X86_SSE2) ? "SSE2 " : "", - (pa_cpu_x86_flags & PA_CPU_X86_SSE3) ? "SSE3 " : "", - (pa_cpu_x86_flags & PA_CPU_X86_SSSE3) ? "SSSE3 " : "", - (pa_cpu_x86_flags & PA_CPU_X86_SSE4_1) ? "SSE4_1 " : "", - (pa_cpu_x86_flags & PA_CPU_X86_SSE4_2) ? "SSE4_2 " : "", - (pa_cpu_x86_flags & PA_CPU_X86_MMXEXT) ? "MMXEXT " : "", - (pa_cpu_x86_flags & PA_CPU_X86_3DNOW) ? "3DNOW " : "", - (pa_cpu_x86_flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : ""); + (flags & PA_CPU_X86_MMX) ? "MMX " : "", + (flags & PA_CPU_X86_SSE) ? "SSE " : "", + (flags & PA_CPU_X86_SSE2) ? "SSE2 " : "", + (flags & PA_CPU_X86_SSE3) ? "SSE3 " : "", + (flags & PA_CPU_X86_SSSE3) ? "SSSE3 " : "", + (flags & PA_CPU_X86_SSE4_1) ? "SSE4_1 " : "", + (flags & PA_CPU_X86_SSE4_2) ? "SSE4_2 " : "", + (flags & PA_CPU_X86_MMXEXT) ? "MMXEXT " : "", + (flags & PA_CPU_X86_3DNOW) ? "3DNOW " : "", + (flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : ""); /* activate various optimisations */ - if (pa_cpu_x86_flags & PA_CPU_X86_MMX) { - pa_volume_func_init_mmx (pa_cpu_x86_flags); + if (flags & PA_CPU_X86_MMX) { + pa_volume_func_init_mmx (flags); } - if (pa_cpu_x86_flags & PA_CPU_X86_SSE) { - pa_volume_func_init_sse (pa_cpu_x86_flags); + if (flags & PA_CPU_X86_SSE) { + pa_volume_func_init_sse (flags); } -#else - pa_cpu_x86_flags = 0; -#endif +#endif /* defined (__i386__) || defined (__amd64__) */ } -- cgit From 8aa86f5247103432faf660cba33f5ce80fbbc2c7 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 19 Aug 2009 19:51:11 +0200 Subject: arm: implement ARM cpu detection --- src/pulsecore/cpu-arm.c | 107 +++++++++++++++++++++++++++++++++++++++++++++--- src/pulsecore/cpu-arm.h | 7 +++- 2 files changed, 107 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/pulsecore/cpu-arm.c b/src/pulsecore/cpu-arm.c index 75646fe4..93ad3891 100644 --- a/src/pulsecore/cpu-arm.c +++ b/src/pulsecore/cpu-arm.c @@ -25,19 +25,116 @@ #endif #include +#include +#include +#include +#include #include #include "cpu-arm.h" -static pa_cpu_arm_flag_t pa_cpu_arm_flags; +#if defined (__arm__) && defined (__linux__) + +#define MAX_BUFFER 4096 +static char * +get_cpuinfo_line (char *cpuinfo, const char *tag) { + char *line, *end, *colon; + + if (!(line = strstr (cpuinfo, tag))) + return NULL; + + if (!(end = strchr (line, '\n'))) + return NULL; + + if (!(colon = strchr (line, ':'))) + return NULL; + + if (++colon >= end) + return NULL; + + return pa_xstrndup (colon, end - colon); +} + +static char *get_cpuinfo(void) { + char *cpuinfo; + int n, fd; + + if (!(cpuinfo = malloc(MAX_BUFFER))) + return NULL; + + if ((fd = open("/proc/cpuinfo", O_RDONLY)) < 0) { + free (cpuinfo); + return NULL; + } + + if ((n = read(fd, cpuinfo, MAX_BUFFER-1)) < 0) { + free (cpuinfo); + close (fd); + return NULL; + } + cpuinfo[n] = 0; + close (fd); + + return cpuinfo; +} +#endif /* defined (__arm__) && defined (__linux__) */ void pa_cpu_init_arm (void) { #if defined (__arm__) - pa_cpu_arm_flags = 0; - - pa_log ("ARM init\n"); +#if defined (__linux__) + char *cpuinfo, *line; + int arch; + pa_cpu_arm_flag_t flags = 0; + + /* We need to read the CPU flags from /proc/cpuinfo because there is no user + * space support to get the CPU features. This only works on linux AFAIK. */ + if (!(cpuinfo = get_cpuinfo ())) { + pa_log ("Can't read cpuinfo"); + return; + } + + /* get the CPU architecture */ + if ((line = get_cpuinfo_line (cpuinfo, "CPU architecture"))) { + arch = strtoul (line, NULL, 0); + if (arch >= 6) + flags |= PA_CPU_ARM_V6; + if (arch >= 7) + flags |= PA_CPU_ARM_V7; + + free (line); + } + /* get the CPU features */ + if ((line = get_cpuinfo_line (cpuinfo, "Features"))) { + char *state = NULL, *current; + + while ((current = pa_split_spaces (line, &state))) { + if (!strcmp (current, "vfp")) + flags |= PA_CPU_ARM_VFP; + else if (!strcmp (current, "edsp")) + flags |= PA_CPU_ARM_EDSP; + else if (!strcmp (current, "neon")) + flags |= PA_CPU_ARM_NEON; + else if (!strcmp (current, "vfpv3")) + flags |= PA_CPU_ARM_VFPV3; + + free (current); + } + } + free (cpuinfo); + + pa_log_info ("CPU flags: %s%s%s%s%s%s", + (flags & PA_CPU_ARM_V6) ? "V6 " : "", + (flags & PA_CPU_ARM_V7) ? "V7 " : "", + (flags & PA_CPU_ARM_VFP) ? "VFP " : "", + (flags & PA_CPU_ARM_EDSP) ? "EDSP " : "", + (flags & PA_CPU_ARM_NEON) ? "NEON " : "", + (flags & PA_CPU_ARM_VFPV3) ? "VFPV3 " : ""); +#else /* defined (__linux__) */ + pa_log ("ARM cpu features not yet supported on this OS"); +#endif /* defined (__linux__) */ - pa_volume_func_init_arm (pa_cpu_arm_flags); + if (flags & PA_CPU_ARM_V6) + pa_volume_func_init_arm (flags); #endif /* defined (__arm__) */ } diff --git a/src/pulsecore/cpu-arm.h b/src/pulsecore/cpu-arm.h index 1a0ac273..3ccd0708 100644 --- a/src/pulsecore/cpu-arm.h +++ b/src/pulsecore/cpu-arm.h @@ -27,8 +27,11 @@ typedef enum pa_cpu_arm_flag { PA_CPU_ARM_V6 = (1 << 0), - PA_CPU_ARM_NEON = (1 << 1), - PA_CPU_ARM_VFP = (1 << 2) + PA_CPU_ARM_V7 = (1 << 1), + PA_CPU_ARM_VFP = (1 << 2), + PA_CPU_ARM_EDSP = (1 << 3), + PA_CPU_ARM_NEON = (1 << 4), + PA_CPU_ARM_VFPV3 = (1 << 5) } pa_cpu_arm_flag_t; void pa_cpu_init_arm (void); -- cgit From aeae567f8861d2f068ebd0f054cd9d0aa6a7fe95 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 19 Aug 2009 20:00:28 +0200 Subject: svolume: add comment --- src/pulsecore/svolume_sse.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src') diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c index 0054d301..5979f7c2 100644 --- a/src/pulsecore/svolume_sse.c +++ b/src/pulsecore/svolume_sse.c @@ -117,6 +117,9 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi " test $1, %2 \n\t" " je 6f \n\t" + /* FIXME, we can do aligned access of the volume values if we can guarantee + * that the array is 16 bytes aligned, we probably have to do the odd values + * after this then. */ "5: \n\t" /* do samples in groups of 4 */ " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */ " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */ -- cgit From 3cc1278dcf44c9fb93bfd2725a2f75de1958cf23 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 19 Aug 2009 20:47:48 +0200 Subject: resampler: avoid some multiplies when we can --- src/pulsecore/resampler.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c index 2256516e..43771dc8 100644 --- a/src/pulsecore/resampler.c +++ b/src/pulsecore/resampler.c @@ -1122,8 +1122,13 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, d = (float *)dst + oc; s = (float *)src + ic; - for (i = n; i > 0; i--, s += n_ic, d += n_oc) - *d += *s * vol; + if (vol >= 1.0) { + for (i = n; i > 0; i--, s += n_ic, d += n_oc) + *d += *s; + } else { + for (i = n; i > 0; i--, s += n_ic, d += n_oc) + *d += *s * vol; + } } } -- cgit From f09b51198f43d79b22cb92b5223d01a7ab339d9f Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Thu, 20 Aug 2009 10:56:20 +0200 Subject: whitespace fixes --- src/pulsecore/cpu-arm.c | 32 ++-- src/pulsecore/cpu-x86.c | 49 +++--- src/pulsecore/resampler.c | 65 ++++--- src/pulsecore/sample-util.c | 11 +- src/pulsecore/svolume_arm.c | 242 +++++++++++++------------- src/pulsecore/svolume_c.c | 330 +++++++++++++++++------------------ src/pulsecore/svolume_mmx.c | 366 +++++++++++++++++++-------------------- src/pulsecore/svolume_sse.c | 410 ++++++++++++++++++++++---------------------- 8 files changed, 765 insertions(+), 740 deletions(-) (limited to 'src') diff --git a/src/pulsecore/cpu-arm.c b/src/pulsecore/cpu-arm.c index 93ad3891..5a994b71 100644 --- a/src/pulsecore/cpu-arm.c +++ b/src/pulsecore/cpu-arm.c @@ -36,14 +36,14 @@ #if defined (__arm__) && defined (__linux__) -#define MAX_BUFFER 4096 +#define MAX_BUFFER 4096 static char * get_cpuinfo_line (char *cpuinfo, const char *tag) { char *line, *end, *colon; if (!(line = strstr (cpuinfo, tag))) return NULL; - + if (!(end = strchr (line, '\n'))) return NULL; @@ -106,20 +106,20 @@ void pa_cpu_init_arm (void) { } /* get the CPU features */ if ((line = get_cpuinfo_line (cpuinfo, "Features"))) { - char *state = NULL, *current; - - while ((current = pa_split_spaces (line, &state))) { - if (!strcmp (current, "vfp")) - flags |= PA_CPU_ARM_VFP; - else if (!strcmp (current, "edsp")) - flags |= PA_CPU_ARM_EDSP; - else if (!strcmp (current, "neon")) - flags |= PA_CPU_ARM_NEON; - else if (!strcmp (current, "vfpv3")) - flags |= PA_CPU_ARM_VFPV3; - - free (current); - } + char *state = NULL, *current; + + while ((current = pa_split_spaces (line, &state))) { + if (!strcmp (current, "vfp")) + flags |= PA_CPU_ARM_VFP; + else if (!strcmp (current, "edsp")) + flags |= PA_CPU_ARM_EDSP; + else if (!strcmp (current, "neon")) + flags |= PA_CPU_ARM_NEON; + else if (!strcmp (current, "vfpv3")) + flags |= PA_CPU_ARM_VFPV3; + + free (current); + } } free (cpuinfo); diff --git a/src/pulsecore/cpu-x86.c b/src/pulsecore/cpu-x86.c index 453ecf5b..0457199d 100644 --- a/src/pulsecore/cpu-x86.c +++ b/src/pulsecore/cpu-x86.c @@ -2,7 +2,7 @@ This file is part of PulseAudio. Copyright 2004-2006 Lennart Poettering - Copyright 2009 Wim Taymans + Copyright 2009 Wim Taymans PulseAudio is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published @@ -34,14 +34,15 @@ static void get_cpuid (uint32_t op, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) { - __asm__ __volatile__ ( - " push %%"PA_REG_b" \n\t" - " cpuid \n\t" - " mov %%ebx, %%esi \n\t" - " pop %%"PA_REG_b" \n\t" - - : "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d) - : "0" (op)); + __asm__ __volatile__ ( + " push %%"PA_REG_b" \n\t" + " cpuid \n\t" + " mov %%ebx, %%esi \n\t" + " pop %%"PA_REG_b" \n\t" + + : "=a" (*a), "=S" (*b), "=c" (*c), "=d" (*d) + : "0" (op) + ); } #endif @@ -97,23 +98,23 @@ void pa_cpu_init_x86 (void) { } pa_log_info ("CPU flags: %s%s%s%s%s%s%s%s%s%s", - (flags & PA_CPU_X86_MMX) ? "MMX " : "", - (flags & PA_CPU_X86_SSE) ? "SSE " : "", - (flags & PA_CPU_X86_SSE2) ? "SSE2 " : "", - (flags & PA_CPU_X86_SSE3) ? "SSE3 " : "", - (flags & PA_CPU_X86_SSSE3) ? "SSSE3 " : "", - (flags & PA_CPU_X86_SSE4_1) ? "SSE4_1 " : "", - (flags & PA_CPU_X86_SSE4_2) ? "SSE4_2 " : "", - (flags & PA_CPU_X86_MMXEXT) ? "MMXEXT " : "", - (flags & PA_CPU_X86_3DNOW) ? "3DNOW " : "", - (flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : ""); + (flags & PA_CPU_X86_MMX) ? "MMX " : "", + (flags & PA_CPU_X86_SSE) ? "SSE " : "", + (flags & PA_CPU_X86_SSE2) ? "SSE2 " : "", + (flags & PA_CPU_X86_SSE3) ? "SSE3 " : "", + (flags & PA_CPU_X86_SSSE3) ? "SSSE3 " : "", + (flags & PA_CPU_X86_SSE4_1) ? "SSE4_1 " : "", + (flags & PA_CPU_X86_SSE4_2) ? "SSE4_2 " : "", + (flags & PA_CPU_X86_MMXEXT) ? "MMXEXT " : "", + (flags & PA_CPU_X86_3DNOW) ? "3DNOW " : "", + (flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : ""); /* activate various optimisations */ - if (flags & PA_CPU_X86_MMX) { + if (flags & PA_CPU_X86_MMX) pa_volume_func_init_mmx (flags); - } - if (flags & PA_CPU_X86_SSE) { - pa_volume_func_init_sse (flags); - } + + if (flags & PA_CPU_X86_SSE) + pa_volume_func_init_sse (flags); + #endif /* defined (__i386__) || defined (__amd64__) */ } diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c index 43771dc8..5a6c398e 100644 --- a/src/pulsecore/resampler.c +++ b/src/pulsecore/resampler.c @@ -1065,30 +1065,53 @@ static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input) } static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, unsigned n) { - + unsigned i; + switch (r->work_format) { case PA_SAMPLE_FLOAT32NE: { float *d, *s; - d = (float *) dst; - s = (float *) src; + d = (float *) dst; + s = (float *) src; - for (; n > 0; n--, s++, d += 2) - d[0] = d[1] = *s; - break; - } + for (i = n >> 2; i; i--) { + d[0] = d[1] = s[0]; + d[2] = d[3] = s[1]; + d[4] = d[5] = s[2]; + d[6] = d[7] = s[3]; + s += 4; + d += 8; + } + for (i = n & 3; i; i--) { + d[0] = d[1] = s[0]; + s++; + d += 2; + } + break; + } case PA_SAMPLE_S16NE: { int16_t *d, *s; - d = (int16_t *) dst; - s = (int16_t *) src; + d = (int16_t *) dst; + s = (int16_t *) src; - for (; n > 0; n--, s++, d += 2) - d[0] = d[1] = *s; - break; - } + for (i = n >> 2; i; i--) { + d[0] = d[1] = s[0]; + d[2] = d[3] = s[1]; + d[4] = d[5] = s[2]; + d[6] = d[7] = s[3]; + s += 4; + d += 8; + } + for (i = n & 3; i; i--) { + d[0] = d[1] = s[0]; + s++; + d += 2; + } + break; + } default: pa_assert_not_reached(); } @@ -1114,7 +1137,7 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, for (ic = 0; ic < n_ic; ic++) { float vol; - vol = r->map_table_f[oc][ic]; + vol = r->map_table_f[oc][ic]; if (vol <= 0.0) continue; @@ -1122,18 +1145,18 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, d = (float *)dst + oc; s = (float *)src + ic; - if (vol >= 1.0) { + if (vol >= 1.0) { for (i = n; i > 0; i--, s += n_ic, d += n_oc) *d += *s; - } else { + } else { for (i = n; i > 0; i--, s += n_ic, d += n_oc) *d += *s * vol; - } + } } } break; - } + } case PA_SAMPLE_S16NE: { int16_t *d, *s; @@ -1144,7 +1167,7 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, for (ic = 0; ic < n_ic; ic++) { int32_t vol; - vol = r->map_table_i[oc][ic]; + vol = r->map_table_i[oc][ic]; if (vol <= 0) continue; @@ -1158,11 +1181,11 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, } else { for (i = n; i > 0; i--, s += n_ic, d += n_oc) *d += (int16_t) (((int32_t)*s * vol) >> 16); - } + } } } break; - } + } default: pa_assert_not_reached(); } diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c index 677f914a..6e97e5a9 100644 --- a/src/pulsecore/sample-util.c +++ b/src/pulsecore/sample-util.c @@ -752,12 +752,13 @@ void pa_volume_memchunk( return; } - ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index; - do_volume = pa_get_volume_func (spec->format); pa_assert(do_volume); - + calc_volume_table[spec->format] ((void *)linear, volume); + + ptr = (uint8_t*) pa_memblock_acquire(c->memblock) + c->index; + do_volume (ptr, (void *)linear, spec->channels, c->length); pa_memblock_release(c->memblock); @@ -944,12 +945,12 @@ void pa_sample_clamp(pa_sample_format_t format, void *dst, size_t dstr, const vo for (; n > 0; n--) { float f; - f = *s; + f = *s; *d = PA_CLAMP_UNLIKELY(f, -1.0f, 1.0f); s = (const float*) ((const uint8_t*) s + sstr); d = (float*) ((uint8_t*) d + dstr); - } + } } else { pa_assert(format == PA_SAMPLE_FLOAT32RE); diff --git a/src/pulsecore/svolume_arm.c b/src/pulsecore/svolume_arm.c index 7e25a13c..0d39d105 100644 --- a/src/pulsecore/svolume_arm.c +++ b/src/pulsecore/svolume_arm.c @@ -40,86 +40,86 @@ #define MOD_INC() \ " subs r0, r6, %2 \n\t" \ " addcs r0, %1 \n\t" \ - " movcs r6, r0 \n\t" + " movcs r6, r0 \n\t" static void pa_volume_s16ne_arm (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - int32_t *ve; - - channels = MAX (4, channels); - ve = volumes + channels; - - __asm__ __volatile__ ( - " mov r6, %1 \n\t" - " mov %3, %3, LSR #1 \n\t" /* length /= sizeof (int16_t) */ - " tst %3, #1 \n\t" /* check for odd samples */ - " beq 2f \n\t" - - "1: \n\t" - " ldr r0, [r6], #4 \n\t" /* odd samples volumes */ - " ldrh r2, [%0] \n\t" - - " smulwb r0, r0, r2 \n\t" - " ssat r0, #16, r0 \n\t" - - " strh r0, [%0], #2 \n\t" - - MOD_INC() - - "2: \n\t" - " mov %3, %3, LSR #1 \n\t" - " tst %3, #1 \n\t" /* check for odd samples */ - " beq 4f \n\t" - - "3: \n\t" - " ldrd r2, [r6], #8 \n\t" /* 2 samples at a time */ - " ldr r0, [%0] \n\t" - - " smulwt r2, r2, r0 \n\t" - " smulwb r3, r3, r0 \n\t" - - " ssat r2, #16, r2 \n\t" - " ssat r3, #16, r3 \n\t" - - " pkhbt r0, r3, r2, LSL #16 \n\t" - " str r0, [%0], #4 \n\t" - - MOD_INC() - - "4: \n\t" - " movs %3, %3, LSR #1 \n\t" - " beq 6f \n\t" - - "5: \n\t" - " ldrd r2, [r6], #8 \n\t" /* 4 samples at a time */ - " ldrd r4, [r6], #8 \n\t" - " ldrd r0, [%0] \n\t" - - " smulwt r2, r2, r0 \n\t" - " smulwb r3, r3, r0 \n\t" - " smulwt r4, r4, r1 \n\t" - " smulwb r5, r5, r1 \n\t" - - " ssat r2, #16, r2 \n\t" - " ssat r3, #16, r3 \n\t" - " ssat r4, #16, r4 \n\t" - " ssat r5, #16, r5 \n\t" - - " pkhbt r0, r3, r2, LSL #16 \n\t" - " pkhbt r1, r5, r4, LSL #16 \n\t" - " strd r0, [%0], #8 \n\t" - - MOD_INC() - - " subs %3, %3, #1 \n\t" - " bne 5b \n\t" - "6: \n\t" - - : "+r" (samples), "+r" (volumes), "+r" (ve), "+r" (length) - : - : "r6", "r5", "r4", "r3", "r2", "r1", "r0", "cc" - ); + int32_t *ve; + + channels = MAX (4, channels); + ve = volumes + channels; + + __asm__ __volatile__ ( + " mov r6, %1 \n\t" + " mov %3, %3, LSR #1 \n\t" /* length /= sizeof (int16_t) */ + " tst %3, #1 \n\t" /* check for odd samples */ + " beq 2f \n\t" + + "1: \n\t" + " ldr r0, [r6], #4 \n\t" /* odd samples volumes */ + " ldrh r2, [%0] \n\t" + + " smulwb r0, r0, r2 \n\t" + " ssat r0, #16, r0 \n\t" + + " strh r0, [%0], #2 \n\t" + + MOD_INC() + + "2: \n\t" + " mov %3, %3, LSR #1 \n\t" + " tst %3, #1 \n\t" /* check for odd samples */ + " beq 4f \n\t" + + "3: \n\t" + " ldrd r2, [r6], #8 \n\t" /* 2 samples at a time */ + " ldr r0, [%0] \n\t" + + " smulwt r2, r2, r0 \n\t" + " smulwb r3, r3, r0 \n\t" + + " ssat r2, #16, r2 \n\t" + " ssat r3, #16, r3 \n\t" + + " pkhbt r0, r3, r2, LSL #16 \n\t" + " str r0, [%0], #4 \n\t" + + MOD_INC() + + "4: \n\t" + " movs %3, %3, LSR #1 \n\t" + " beq 6f \n\t" + + "5: \n\t" + " ldrd r2, [r6], #8 \n\t" /* 4 samples at a time */ + " ldrd r4, [r6], #8 \n\t" + " ldrd r0, [%0] \n\t" + + " smulwt r2, r2, r0 \n\t" + " smulwb r3, r3, r0 \n\t" + " smulwt r4, r4, r1 \n\t" + " smulwb r5, r5, r1 \n\t" + + " ssat r2, #16, r2 \n\t" + " ssat r3, #16, r3 \n\t" + " ssat r4, #16, r4 \n\t" + " ssat r5, #16, r5 \n\t" + + " pkhbt r0, r3, r2, LSL #16 \n\t" + " pkhbt r1, r5, r4, LSL #16 \n\t" + " strd r0, [%0], #8 \n\t" + + MOD_INC() + + " subs %3, %3, #1 \n\t" + " bne 5b \n\t" + "6: \n\t" + + : "+r" (samples), "+r" (volumes), "+r" (ve), "+r" (length) + : + : "r6", "r5", "r4", "r3", "r2", "r1", "r0", "cc" + ); } #undef RUN_TEST @@ -131,51 +131,51 @@ pa_volume_s16ne_arm (int16_t *samples, int32_t *volumes, unsigned channels, unsi #define PADDING 16 static void run_test (void) { - int16_t samples[SAMPLES]; - int16_t samples_ref[SAMPLES]; - int16_t samples_orig[SAMPLES]; - int32_t volumes[CHANNELS + PADDING]; - int i, j, padding; - pa_do_volume_func_t func; - struct timeval start, stop; - - func = pa_get_volume_func (PA_SAMPLE_S16NE); - - printf ("checking ARM %zd\n", sizeof (samples)); - - pa_random (samples, sizeof (samples)); - memcpy (samples_ref, samples, sizeof (samples)); - memcpy (samples_orig, samples, sizeof (samples)); - - for (i = 0; i < CHANNELS; i++) - volumes[i] = rand() >> 1; - for (padding = 0; padding < PADDING; padding++, i++) - volumes[i] = volumes[padding]; - - func (samples_ref, volumes, CHANNELS, sizeof (samples)); - pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples)); - for (i = 0; i < SAMPLES; i++) { - if (samples[i] != samples_ref[i]) { - printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], - samples_orig[i], volumes[i % CHANNELS]); - } - } + int16_t samples[SAMPLES]; + int16_t samples_ref[SAMPLES]; + int16_t samples_orig[SAMPLES]; + int32_t volumes[CHANNELS + PADDING]; + int i, j, padding; + pa_do_volume_func_t func; + struct timeval start, stop; - pa_gettimeofday(&start); - for (j = 0; j < TIMES; j++) { - memcpy (samples, samples_orig, sizeof (samples)); - pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples)); - } - pa_gettimeofday(&stop); - pa_log_info("ARM: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); + func = pa_get_volume_func (PA_SAMPLE_S16NE); + + printf ("checking ARM %zd\n", sizeof (samples)); + + pa_random (samples, sizeof (samples)); + memcpy (samples_ref, samples, sizeof (samples)); + memcpy (samples_orig, samples, sizeof (samples)); + + for (i = 0; i < CHANNELS; i++) + volumes[i] = rand() >> 1; + for (padding = 0; padding < PADDING; padding++, i++) + volumes[i] = volumes[padding]; - pa_gettimeofday(&start); - for (j = 0; j < TIMES; j++) { - memcpy (samples_ref, samples_orig, sizeof (samples)); func (samples_ref, volumes, CHANNELS, sizeof (samples)); - } - pa_gettimeofday(&stop); - pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); + pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples)); + for (i = 0; i < SAMPLES; i++) { + if (samples[i] != samples_ref[i]) { + printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], + samples_orig[i], volumes[i % CHANNELS]); + } + } + + pa_gettimeofday(&start); + for (j = 0; j < TIMES; j++) { + memcpy (samples, samples_orig, sizeof (samples)); + pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples)); + } + pa_gettimeofday(&stop); + pa_log_info("ARM: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); + + pa_gettimeofday(&start); + for (j = 0; j < TIMES; j++) { + memcpy (samples_ref, samples_orig, sizeof (samples)); + func (samples_ref, volumes, CHANNELS, sizeof (samples)); + } + pa_gettimeofday(&stop); + pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); } #endif @@ -184,12 +184,12 @@ static void run_test (void) { void pa_volume_func_init_arm (pa_cpu_arm_flag_t flags) { #if defined (__arm__) - pa_log_info("Initialising ARM optimized functions."); + pa_log_info("Initialising ARM optimized functions."); #ifdef RUN_TEST - run_test (); + run_test (); #endif - pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_arm); + pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_arm); #endif /* defined (__arm__) */ } diff --git a/src/pulsecore/svolume_c.c b/src/pulsecore/svolume_c.c index 2148a573..5fc052b8 100644 --- a/src/pulsecore/svolume_c.c +++ b/src/pulsecore/svolume_c.c @@ -35,289 +35,289 @@ static void pa_volume_u8_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - unsigned channel; + unsigned channel; - for (channel = 0; length; length--) { - int32_t t, hi, lo; + for (channel = 0; length; length--) { + int32_t t, hi, lo; - hi = volumes[channel] >> 16; - lo = volumes[channel] & 0xFFFF; + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; - t = (int32_t) *samples - 0x80; - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F); - *samples++ = (uint8_t) (t + 0x80); + t = (int32_t) *samples - 0x80; + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F); + *samples++ = (uint8_t) (t + 0x80); - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } } static void pa_volume_alaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - unsigned channel; + unsigned channel; - for (channel = 0; length; length--) { - int32_t t, hi, lo; + for (channel = 0; length; length--) { + int32_t t, hi, lo; - hi = volumes[channel] >> 16; - lo = volumes[channel] & 0xFFFF; + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; - t = (int32_t) st_alaw2linear16(*samples); - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); - *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3); + t = (int32_t) st_alaw2linear16(*samples); + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); + *samples++ = (uint8_t) st_13linear2alaw((int16_t) t >> 3); - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } } static void pa_volume_ulaw_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - unsigned channel; + unsigned channel; - for (channel = 0; length; length--) { - int32_t t, hi, lo; + for (channel = 0; length; length--) { + int32_t t, hi, lo; - hi = volumes[channel] >> 16; - lo = volumes[channel] & 0xFFFF; + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; - t = (int32_t) st_ulaw2linear16(*samples); - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); - *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2); + t = (int32_t) st_ulaw2linear16(*samples); + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); + *samples++ = (uint8_t) st_14linear2ulaw((int16_t) t >> 2); - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } } static void pa_volume_s16ne_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - unsigned channel; + unsigned channel; - length /= sizeof (int16_t); + length /= sizeof (int16_t); - for (channel = 0; length; length--) { - int32_t t, hi, lo; + for (channel = 0; length; length--) { + int32_t t, hi, lo; - /* Multiplying the 32bit volume factor with the 16bit - * sample might result in an 48bit value. We want to - * do without 64 bit integers and hence do the - * multiplication independantly for the HI and LO part - * of the volume. */ + /* Multiplying the 32bit volume factor with the 16bit + * sample might result in an 48bit value. We want to + * do without 64 bit integers and hence do the + * multiplication independantly for the HI and LO part + * of the volume. */ - hi = volumes[channel] >> 16; - lo = volumes[channel] & 0xFFFF; + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; - t = (int32_t)(*samples); - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); - *samples++ = (int16_t) t; + t = (int32_t)(*samples); + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); + *samples++ = (int16_t) t; - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } } static void pa_volume_s16re_c (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - unsigned channel; + unsigned channel; - length /= sizeof (int16_t); + length /= sizeof (int16_t); - for (channel = 0; length; length--) { - int32_t t, hi, lo; + for (channel = 0; length; length--) { + int32_t t, hi, lo; - hi = volumes[channel] >> 16; - lo = volumes[channel] & 0xFFFF; + hi = volumes[channel] >> 16; + lo = volumes[channel] & 0xFFFF; - t = (int32_t) PA_INT16_SWAP(*samples); - t = ((t * lo) >> 16) + (t * hi); - t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); - *samples++ = PA_INT16_SWAP((int16_t) t); + t = (int32_t) PA_INT16_SWAP(*samples); + t = ((t * lo) >> 16) + (t * hi); + t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF); + *samples++ = PA_INT16_SWAP((int16_t) t); - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } } static void pa_volume_float32ne_c (float *samples, float *volumes, unsigned channels, unsigned length) { - unsigned channel; + unsigned channel; - length /= sizeof (float); + length /= sizeof (float); - for (channel = 0; length; length--) { - *samples++ *= volumes[channel]; + for (channel = 0; length; length--) { + *samples++ *= volumes[channel]; - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } } static void pa_volume_float32re_c (float *samples, float *volumes, unsigned channels, unsigned length) { - unsigned channel; + unsigned channel; - length /= sizeof (float); + length /= sizeof (float); - for (channel = 0; length; length--) { - float t; + for (channel = 0; length; length--) { + float t; - t = PA_FLOAT32_SWAP(*samples); - t *= volumes[channel]; - *samples++ = PA_FLOAT32_SWAP(t); + t = PA_FLOAT32_SWAP(*samples); + t *= volumes[channel]; + *samples++ = PA_FLOAT32_SWAP(t); - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } } static void pa_volume_s32ne_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - unsigned channel; + unsigned channel; - length /= sizeof (int32_t); + length /= sizeof (int32_t); - for (channel = 0; length; length--) { - int64_t t; + for (channel = 0; length; length--) { + int64_t t; - t = (int64_t)(*samples); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - *samples++ = (int32_t) t; + t = (int64_t)(*samples); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + *samples++ = (int32_t) t; - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } } static void pa_volume_s32re_c (int32_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - unsigned channel; + unsigned channel; - length /= sizeof (int32_t); + length /= sizeof (int32_t); - for (channel = 0; length; length--) { - int64_t t; + for (channel = 0; length; length--) { + int64_t t; - t = (int64_t) PA_INT32_SWAP(*samples); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - *samples++ = PA_INT32_SWAP((int32_t) t); + t = (int64_t) PA_INT32_SWAP(*samples); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + *samples++ = PA_INT32_SWAP((int32_t) t); - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } } static void pa_volume_s24ne_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - unsigned channel; - uint8_t *e; + unsigned channel; + uint8_t *e; - e = samples + length; + e = samples + length; - for (channel = 0; samples < e; samples += 3) { - int64_t t; + for (channel = 0; samples < e; samples += 3) { + int64_t t; - t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8)); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8); + t = (int64_t)((int32_t) (PA_READ24NE(samples) << 8)); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + PA_WRITE24NE(samples, ((uint32_t) (int32_t) t) >> 8); - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } } static void pa_volume_s24re_c (uint8_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - unsigned channel; - uint8_t *e; + unsigned channel; + uint8_t *e; - e = samples + length; + e = samples + length; - for (channel = 0; samples < e; samples += 3) { - int64_t t; + for (channel = 0; samples < e; samples += 3) { + int64_t t; - t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8)); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8); + t = (int64_t)((int32_t) (PA_READ24RE(samples) << 8)); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + PA_WRITE24RE(samples, ((uint32_t) (int32_t) t) >> 8); - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } } static void pa_volume_s24_32ne_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - unsigned channel; + unsigned channel; - length /= sizeof (uint32_t); + length /= sizeof (uint32_t); - for (channel = 0; length; length--) { - int64_t t; + for (channel = 0; length; length--) { + int64_t t; - t = (int64_t) ((int32_t) (*samples << 8)); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - *samples++ = ((uint32_t) ((int32_t) t)) >> 8; + t = (int64_t) ((int32_t) (*samples << 8)); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + *samples++ = ((uint32_t) ((int32_t) t)) >> 8; - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } } static void pa_volume_s24_32re_c (uint32_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - unsigned channel; + unsigned channel; - length /= sizeof (uint32_t); + length /= sizeof (uint32_t); - for (channel = 0; length; length--) { - int64_t t; + for (channel = 0; length; length--) { + int64_t t; - t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8)); - t = (t * volumes[channel]) >> 16; - t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); - *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8); + t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*samples) << 8)); + t = (t * volumes[channel]) >> 16; + t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL); + *samples++ = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8); - if (PA_UNLIKELY(++channel >= channels)) - channel = 0; - } + if (PA_UNLIKELY(++channel >= channels)) + channel = 0; + } } static pa_do_volume_func_t do_volume_table[] = { - [PA_SAMPLE_U8] = (pa_do_volume_func_t) pa_volume_u8_c, - [PA_SAMPLE_ALAW] = (pa_do_volume_func_t) pa_volume_alaw_c, - [PA_SAMPLE_ULAW] = (pa_do_volume_func_t) pa_volume_ulaw_c, - [PA_SAMPLE_S16NE] = (pa_do_volume_func_t) pa_volume_s16ne_c, - [PA_SAMPLE_S16RE] = (pa_do_volume_func_t) pa_volume_s16re_c, - [PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c, - [PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c, - [PA_SAMPLE_S32NE] = (pa_do_volume_func_t) pa_volume_s32ne_c, - [PA_SAMPLE_S32RE] = (pa_do_volume_func_t) pa_volume_s32re_c, - [PA_SAMPLE_S24NE] = (pa_do_volume_func_t) pa_volume_s24ne_c, - [PA_SAMPLE_S24RE] = (pa_do_volume_func_t) pa_volume_s24re_c, - [PA_SAMPLE_S24_32NE] = (pa_do_volume_func_t) pa_volume_s24_32ne_c, - [PA_SAMPLE_S24_32RE] = (pa_do_volume_func_t) pa_volume_s24_32re_c + [PA_SAMPLE_U8] = (pa_do_volume_func_t) pa_volume_u8_c, + [PA_SAMPLE_ALAW] = (pa_do_volume_func_t) pa_volume_alaw_c, + [PA_SAMPLE_ULAW] = (pa_do_volume_func_t) pa_volume_ulaw_c, + [PA_SAMPLE_S16NE] = (pa_do_volume_func_t) pa_volume_s16ne_c, + [PA_SAMPLE_S16RE] = (pa_do_volume_func_t) pa_volume_s16re_c, + [PA_SAMPLE_FLOAT32NE] = (pa_do_volume_func_t) pa_volume_float32ne_c, + [PA_SAMPLE_FLOAT32RE] = (pa_do_volume_func_t) pa_volume_float32re_c, + [PA_SAMPLE_S32NE] = (pa_do_volume_func_t) pa_volume_s32ne_c, + [PA_SAMPLE_S32RE] = (pa_do_volume_func_t) pa_volume_s32re_c, + [PA_SAMPLE_S24NE] = (pa_do_volume_func_t) pa_volume_s24ne_c, + [PA_SAMPLE_S24RE] = (pa_do_volume_func_t) pa_volume_s24re_c, + [PA_SAMPLE_S24_32NE] = (pa_do_volume_func_t) pa_volume_s24_32ne_c, + [PA_SAMPLE_S24_32RE] = (pa_do_volume_func_t) pa_volume_s24_32re_c }; pa_do_volume_func_t pa_get_volume_func(pa_sample_format_t f) { diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c index 86af76d3..7e242684 100644 --- a/src/pulsecore/svolume_mmx.c +++ b/src/pulsecore/svolume_mmx.c @@ -73,7 +73,7 @@ " add "#a", %3 \n\t" \ " mov %3, %4 \n\t" \ " sub "#b", %4 \n\t" \ - " cmovae %4, %3 \n\t" + " cmovae %4, %3 \n\t" /* swap 16 bits */ #define SWAP_16(s) \ @@ -96,147 +96,147 @@ static void pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - pa_reg_x86 channel, temp; - - /* the max number of samples we process at a time, this is also the max amount - * we overread the volume array, which should have enough padding. */ - channels = MAX (4, channels); - - __asm__ __volatile__ ( - " xor %3, %3 \n\t" - " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */ - " pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */ - " pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */ - " pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */ - " psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */ - - " test $1, %2 \n\t" /* check for odd samples */ - " je 2f \n\t" - - " movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */ - " movw (%0), %w4 \n\t" /* .. | p0 | */ - " movd %4, %%mm1 \n\t" - VOLUME_32x16 (%%mm1, %%mm0) - " movd %%mm0, %4 \n\t" /* .. | p0*v0 | */ - " movw %w4, (%0) \n\t" - " add $2, %0 \n\t" - MOD_ADD ($1, %5) - - "2: \n\t" - " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */ - " test $1, %2 \n\t" /* check for odd samples */ - " je 4f \n\t" - - "3: \n\t" /* do samples in groups of 2 */ - " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ - " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ - VOLUME_32x16 (%%mm1, %%mm0) - " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ - " add $4, %0 \n\t" - MOD_ADD ($2, %5) - - "4: \n\t" - " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */ - " cmp $0, %2 \n\t" - " je 6f \n\t" - - "5: \n\t" /* do samples in groups of 4 */ - " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ - " movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */ - " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ - " movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */ - VOLUME_32x16 (%%mm1, %%mm0) - VOLUME_32x16 (%%mm3, %%mm2) - " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ - " movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */ - " add $8, %0 \n\t" - MOD_ADD ($4, %5) - " dec %2 \n\t" - " jne 5b \n\t" - - "6: \n\t" - " emms \n\t" - - : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp) - : "r" ((pa_reg_x86)channels) - : "cc" - ); + pa_reg_x86 channel, temp; + + /* the max number of samples we process at a time, this is also the max amount + * we overread the volume array, which should have enough padding. */ + channels = MAX (4, channels); + + __asm__ __volatile__ ( + " xor %3, %3 \n\t" + " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */ + " pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */ + " pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */ + " pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */ + " psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */ + + " test $1, %2 \n\t" /* check for odd samples */ + " je 2f \n\t" + + " movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */ + " movw (%0), %w4 \n\t" /* .. | p0 | */ + " movd %4, %%mm1 \n\t" + VOLUME_32x16 (%%mm1, %%mm0) + " movd %%mm0, %4 \n\t" /* .. | p0*v0 | */ + " movw %w4, (%0) \n\t" + " add $2, %0 \n\t" + MOD_ADD ($1, %5) + + "2: \n\t" + " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */ + " test $1, %2 \n\t" /* check for odd samples */ + " je 4f \n\t" + + "3: \n\t" /* do samples in groups of 2 */ + " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ + " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ + VOLUME_32x16 (%%mm1, %%mm0) + " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ + " add $4, %0 \n\t" + MOD_ADD ($2, %5) + + "4: \n\t" + " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */ + " cmp $0, %2 \n\t" + " je 6f \n\t" + + "5: \n\t" /* do samples in groups of 4 */ + " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ + " movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */ + " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ + " movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */ + VOLUME_32x16 (%%mm1, %%mm0) + VOLUME_32x16 (%%mm3, %%mm2) + " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ + " movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */ + " add $8, %0 \n\t" + MOD_ADD ($4, %5) + " dec %2 \n\t" + " jne 5b \n\t" + + "6: \n\t" + " emms \n\t" + + : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp) + : "r" ((pa_reg_x86)channels) + : "cc" + ); } static void pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - pa_reg_x86 channel, temp; - - /* the max number of samples we process at a time, this is also the max amount - * we overread the volume array, which should have enough padding. */ - channels = MAX (4, channels); - - __asm__ __volatile__ ( - " xor %3, %3 \n\t" - " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */ - " pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */ - " pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */ - " pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */ - " psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */ - - " test $1, %2 \n\t" /* check for odd samples */ - " je 2f \n\t" - - " movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */ - " movw (%0), %w4 \n\t" /* .. | p0 | */ - " rorw $8, %w4 \n\t" - " movd %4, %%mm1 \n\t" - VOLUME_32x16 (%%mm1, %%mm0) - " movd %%mm0, %4 \n\t" /* .. | p0*v0 | */ - " rorw $8, %w4 \n\t" - " movw %w4, (%0) \n\t" - " add $2, %0 \n\t" - MOD_ADD ($1, %5) - - "2: \n\t" - " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */ - " test $1, %2 \n\t" /* check for odd samples */ - " je 4f \n\t" - - "3: \n\t" /* do samples in groups of 2 */ - " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ - " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ - SWAP_16 (%%mm1) - VOLUME_32x16 (%%mm1, %%mm0) - SWAP_16 (%%mm0) - " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ - " add $4, %0 \n\t" - MOD_ADD ($2, %5) - - "4: \n\t" - " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */ - " cmp $0, %2 \n\t" - " je 6f \n\t" - - "5: \n\t" /* do samples in groups of 4 */ - " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ - " movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */ - " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ - " movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */ - SWAP_16_2 (%%mm1, %%mm3) - VOLUME_32x16 (%%mm1, %%mm0) - VOLUME_32x16 (%%mm3, %%mm2) - SWAP_16_2 (%%mm0, %%mm2) - " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ - " movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */ - " add $8, %0 \n\t" - MOD_ADD ($4, %5) - " dec %2 \n\t" - " jne 5b \n\t" - - "6: \n\t" - " emms \n\t" - - : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp) - : "r" ((pa_reg_x86)channels) - : "cc" - ); + pa_reg_x86 channel, temp; + + /* the max number of samples we process at a time, this is also the max amount + * we overread the volume array, which should have enough padding. */ + channels = MAX (4, channels); + + __asm__ __volatile__ ( + " xor %3, %3 \n\t" + " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */ + " pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */ + " pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */ + " pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */ + " psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */ + + " test $1, %2 \n\t" /* check for odd samples */ + " je 2f \n\t" + + " movd (%1, %3, 4), %%mm0 \n\t" /* | v0h | v0l | */ + " movw (%0), %w4 \n\t" /* .. | p0 | */ + " rorw $8, %w4 \n\t" + " movd %4, %%mm1 \n\t" + VOLUME_32x16 (%%mm1, %%mm0) + " movd %%mm0, %4 \n\t" /* .. | p0*v0 | */ + " rorw $8, %w4 \n\t" + " movw %w4, (%0) \n\t" + " add $2, %0 \n\t" + MOD_ADD ($1, %5) + + "2: \n\t" + " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */ + " test $1, %2 \n\t" /* check for odd samples */ + " je 4f \n\t" + + "3: \n\t" /* do samples in groups of 2 */ + " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ + " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ + SWAP_16 (%%mm1) + VOLUME_32x16 (%%mm1, %%mm0) + SWAP_16 (%%mm0) + " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ + " add $4, %0 \n\t" + MOD_ADD ($2, %5) + + "4: \n\t" + " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */ + " cmp $0, %2 \n\t" + " je 6f \n\t" + + "5: \n\t" /* do samples in groups of 4 */ + " movq (%1, %3, 4), %%mm0 \n\t" /* | v1h | v1l | v0h | v0l | */ + " movq 8(%1, %3, 4), %%mm2 \n\t" /* | v3h | v3l | v2h | v2l | */ + " movd (%0), %%mm1 \n\t" /* .. | p1 | p0 | */ + " movd 4(%0), %%mm3 \n\t" /* .. | p3 | p2 | */ + SWAP_16_2 (%%mm1, %%mm3) + VOLUME_32x16 (%%mm1, %%mm0) + VOLUME_32x16 (%%mm3, %%mm2) + SWAP_16_2 (%%mm0, %%mm2) + " movd %%mm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ + " movd %%mm2, 4(%0) \n\t" /* .. | p3*v3 | p2*v2 | */ + " add $8, %0 \n\t" + MOD_ADD ($4, %5) + " dec %2 \n\t" + " jne 5b \n\t" + + "6: \n\t" + " emms \n\t" + + : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp) + : "r" ((pa_reg_x86)channels) + : "cc" + ); } #undef RUN_TEST @@ -248,51 +248,51 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi #define PADDING 16 static void run_test (void) { - int16_t samples[SAMPLES]; - int16_t samples_ref[SAMPLES]; - int16_t samples_orig[SAMPLES]; - int32_t volumes[CHANNELS + PADDING]; - int i, j, padding; - pa_do_volume_func_t func; - struct timeval start, stop; - - func = pa_get_volume_func (PA_SAMPLE_S16NE); - - printf ("checking MMX %zd\n", sizeof (samples)); - - pa_random (samples, sizeof (samples)); - memcpy (samples_ref, samples, sizeof (samples)); - memcpy (samples_orig, samples, sizeof (samples)); - - for (i = 0; i < CHANNELS; i++) - volumes[i] = rand() >> 1; - for (padding = 0; padding < PADDING; padding++, i++) - volumes[i] = volumes[padding]; - - func (samples_ref, volumes, CHANNELS, sizeof (samples)); - pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples)); - for (i = 0; i < SAMPLES; i++) { - if (samples[i] != samples_ref[i]) { - printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], - samples_orig[i], volumes[i % CHANNELS]); - } - } + int16_t samples[SAMPLES]; + int16_t samples_ref[SAMPLES]; + int16_t samples_orig[SAMPLES]; + int32_t volumes[CHANNELS + PADDING]; + int i, j, padding; + pa_do_volume_func_t func; + struct timeval start, stop; - pa_gettimeofday(&start); - for (j = 0; j < TIMES; j++) { - memcpy (samples, samples_orig, sizeof (samples)); - pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples)); - } - pa_gettimeofday(&stop); - pa_log_info("MMX: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); + func = pa_get_volume_func (PA_SAMPLE_S16NE); + + printf ("checking MMX %zd\n", sizeof (samples)); + + pa_random (samples, sizeof (samples)); + memcpy (samples_ref, samples, sizeof (samples)); + memcpy (samples_orig, samples, sizeof (samples)); + + for (i = 0; i < CHANNELS; i++) + volumes[i] = rand() >> 1; + for (padding = 0; padding < PADDING; padding++, i++) + volumes[i] = volumes[padding]; - pa_gettimeofday(&start); - for (j = 0; j < TIMES; j++) { - memcpy (samples_ref, samples_orig, sizeof (samples)); func (samples_ref, volumes, CHANNELS, sizeof (samples)); - } - pa_gettimeofday(&stop); - pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); + pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples)); + for (i = 0; i < SAMPLES; i++) { + if (samples[i] != samples_ref[i]) { + printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], + samples_orig[i], volumes[i % CHANNELS]); + } + } + + pa_gettimeofday(&start); + for (j = 0; j < TIMES; j++) { + memcpy (samples, samples_orig, sizeof (samples)); + pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples)); + } + pa_gettimeofday(&stop); + pa_log_info("MMX: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); + + pa_gettimeofday(&start); + for (j = 0; j < TIMES; j++) { + memcpy (samples_ref, samples_orig, sizeof (samples)); + func (samples_ref, volumes, CHANNELS, sizeof (samples)); + } + pa_gettimeofday(&stop); + pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); } #endif @@ -301,13 +301,13 @@ static void run_test (void) { void pa_volume_func_init_mmx (pa_cpu_x86_flag_t flags) { #if defined (__i386__) || defined (__amd64__) - pa_log_info("Initialising MMX optimized functions."); + pa_log_info("Initialising MMX optimized functions."); #ifdef RUN_TEST - run_test (); + run_test (); #endif - pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx); - pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_mmx); + pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx); + pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_mmx); #endif /* defined (__i386__) || defined (__amd64__) */ } diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c index 5979f7c2..b5e3687f 100644 --- a/src/pulsecore/svolume_sse.c +++ b/src/pulsecore/svolume_sse.c @@ -48,7 +48,7 @@ " psrld $16, "#v" \n\t" /* .. | p0 | 0 | */ \ " pmaddwd %%xmm5, "#v" \n\t" /* .. | p0 * vh | */ \ " paddd "#s", "#v" \n\t" /* .. | p0 * v0 | */ \ - " packssdw "#v", "#v" \n\t" /* .. | p1*v1 | p0*v0 | */ + " packssdw "#v", "#v" \n\t" /* .. | p1*v1 | p0*v0 | */ #define MOD_ADD(a,b) \ " add "#a", %3 \n\t" /* channel += inc */ \ @@ -77,169 +77,169 @@ static void pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - pa_reg_x86 channel, temp; - - /* the max number of samples we process at a time, this is also the max amount - * we overread the volume array, which should have enough padding. */ - channels = MAX (8, channels); - - __asm__ __volatile__ ( - " xor %3, %3 \n\t" - " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */ - - " test $1, %2 \n\t" /* check for odd samples */ - " je 2f \n\t" - - " movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */ - " movw (%0), %w4 \n\t" /* .. | p0 | */ - " movd %4, %%xmm1 \n\t" - VOLUME_32x16 (%%xmm1, %%xmm0) - " movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */ - " movw %w4, (%0) \n\t" - " add $2, %0 \n\t" - MOD_ADD ($1, %5) - - "2: \n\t" - " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */ - " test $1, %2 \n\t" - " je 4f \n\t" - - "3: \n\t" /* do samples in groups of 2 */ - " movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */ - " movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */ - VOLUME_32x16 (%%xmm1, %%xmm0) - " movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ - " add $4, %0 \n\t" - MOD_ADD ($2, %5) - - "4: \n\t" - " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */ - " test $1, %2 \n\t" - " je 6f \n\t" - - /* FIXME, we can do aligned access of the volume values if we can guarantee - * that the array is 16 bytes aligned, we probably have to do the odd values - * after this then. */ - "5: \n\t" /* do samples in groups of 4 */ - " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */ - " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */ - VOLUME_32x16 (%%xmm1, %%xmm0) - " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */ - " add $8, %0 \n\t" - MOD_ADD ($4, %5) - - "6: \n\t" - " sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */ - " cmp $0, %2 \n\t" - " je 8f \n\t" - - "7: \n\t" /* do samples in groups of 8 */ - " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */ - " movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */ - " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */ - " movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */ - VOLUME_32x16 (%%xmm1, %%xmm0) - VOLUME_32x16 (%%xmm3, %%xmm2) - " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */ - " movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */ - " add $16, %0 \n\t" - MOD_ADD ($8, %5) - " dec %2 \n\t" - " jne 7b \n\t" - "8: \n\t" - - : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp) - : "r" ((pa_reg_x86)channels) - : "cc" - ); + pa_reg_x86 channel, temp; + + /* the max number of samples we process at a time, this is also the max amount + * we overread the volume array, which should have enough padding. */ + channels = MAX (8, channels); + + __asm__ __volatile__ ( + " xor %3, %3 \n\t" + " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */ + + " test $1, %2 \n\t" /* check for odd samples */ + " je 2f \n\t" + + " movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */ + " movw (%0), %w4 \n\t" /* .. | p0 | */ + " movd %4, %%xmm1 \n\t" + VOLUME_32x16 (%%xmm1, %%xmm0) + " movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */ + " movw %w4, (%0) \n\t" + " add $2, %0 \n\t" + MOD_ADD ($1, %5) + + "2: \n\t" + " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */ + " test $1, %2 \n\t" + " je 4f \n\t" + + "3: \n\t" /* do samples in groups of 2 */ + " movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */ + " movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */ + VOLUME_32x16 (%%xmm1, %%xmm0) + " movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ + " add $4, %0 \n\t" + MOD_ADD ($2, %5) + + "4: \n\t" + " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */ + " test $1, %2 \n\t" + " je 6f \n\t" + + /* FIXME, we can do aligned access of the volume values if we can guarantee + * that the array is 16 bytes aligned, we probably have to do the odd values + * after this then. */ + "5: \n\t" /* do samples in groups of 4 */ + " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */ + " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */ + VOLUME_32x16 (%%xmm1, %%xmm0) + " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */ + " add $8, %0 \n\t" + MOD_ADD ($4, %5) + + "6: \n\t" + " sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */ + " cmp $0, %2 \n\t" + " je 8f \n\t" + + "7: \n\t" /* do samples in groups of 8 */ + " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */ + " movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */ + " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */ + " movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */ + VOLUME_32x16 (%%xmm1, %%xmm0) + VOLUME_32x16 (%%xmm3, %%xmm2) + " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */ + " movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */ + " add $16, %0 \n\t" + MOD_ADD ($8, %5) + " dec %2 \n\t" + " jne 7b \n\t" + "8: \n\t" + + : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp) + : "r" ((pa_reg_x86)channels) + : "cc" + ); } static void pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) { - pa_reg_x86 channel, temp; - - /* the max number of samples we process at a time, this is also the max amount - * we overread the volume array, which should have enough padding. */ - channels = MAX (8, channels); - - __asm__ __volatile__ ( - " xor %3, %3 \n\t" - " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */ - - " test $1, %2 \n\t" /* check for odd samples */ - " je 2f \n\t" - - " movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */ - " movw (%0), %w4 \n\t" /* .. | p0 | */ - " rorw $8, %w4 \n\t" - " movd %4, %%xmm1 \n\t" - VOLUME_32x16 (%%xmm1, %%xmm0) - " movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */ - " rorw $8, %w4 \n\t" - " movw %w4, (%0) \n\t" - " add $2, %0 \n\t" - MOD_ADD ($1, %5) - - "2: \n\t" - " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */ - " test $1, %2 \n\t" - " je 4f \n\t" - - "3: \n\t" /* do samples in groups of 2 */ - " movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */ - " movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */ - SWAP_16 (%%xmm1) - VOLUME_32x16 (%%xmm1, %%xmm0) - SWAP_16 (%%xmm0) - " movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ - " add $4, %0 \n\t" - MOD_ADD ($2, %5) - - "4: \n\t" - " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */ - " test $1, %2 \n\t" - " je 6f \n\t" - - /* FIXME, we can do aligned access of the volume values if we can guarantee - * that the array is 16 bytes aligned, we probably have to do the odd values - * after this then. */ - "5: \n\t" /* do samples in groups of 4 */ - " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */ - " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */ - SWAP_16 (%%xmm1) - VOLUME_32x16 (%%xmm1, %%xmm0) - SWAP_16 (%%xmm0) - " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */ - " add $8, %0 \n\t" - MOD_ADD ($4, %5) - - "6: \n\t" - " sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */ - " cmp $0, %2 \n\t" - " je 8f \n\t" - - "7: \n\t" /* do samples in groups of 8 */ - " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */ - " movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */ - " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */ - " movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */ - SWAP_16_2 (%%xmm1, %%xmm3) - VOLUME_32x16 (%%xmm1, %%xmm0) - VOLUME_32x16 (%%xmm3, %%xmm2) - SWAP_16_2 (%%xmm0, %%xmm2) - " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */ - " movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */ - " add $16, %0 \n\t" - MOD_ADD ($8, %5) - " dec %2 \n\t" - " jne 7b \n\t" - "8: \n\t" - - : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp) - : "r" ((pa_reg_x86)channels) - : "cc" - ); + pa_reg_x86 channel, temp; + + /* the max number of samples we process at a time, this is also the max amount + * we overread the volume array, which should have enough padding. */ + channels = MAX (8, channels); + + __asm__ __volatile__ ( + " xor %3, %3 \n\t" + " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */ + + " test $1, %2 \n\t" /* check for odd samples */ + " je 2f \n\t" + + " movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */ + " movw (%0), %w4 \n\t" /* .. | p0 | */ + " rorw $8, %w4 \n\t" + " movd %4, %%xmm1 \n\t" + VOLUME_32x16 (%%xmm1, %%xmm0) + " movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */ + " rorw $8, %w4 \n\t" + " movw %w4, (%0) \n\t" + " add $2, %0 \n\t" + MOD_ADD ($1, %5) + + "2: \n\t" + " sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */ + " test $1, %2 \n\t" + " je 4f \n\t" + + "3: \n\t" /* do samples in groups of 2 */ + " movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */ + " movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */ + SWAP_16 (%%xmm1) + VOLUME_32x16 (%%xmm1, %%xmm0) + SWAP_16 (%%xmm0) + " movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */ + " add $4, %0 \n\t" + MOD_ADD ($2, %5) + + "4: \n\t" + " sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */ + " test $1, %2 \n\t" + " je 6f \n\t" + + /* FIXME, we can do aligned access of the volume values if we can guarantee + * that the array is 16 bytes aligned, we probably have to do the odd values + * after this then. */ + "5: \n\t" /* do samples in groups of 4 */ + " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */ + " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */ + SWAP_16 (%%xmm1) + VOLUME_32x16 (%%xmm1, %%xmm0) + SWAP_16 (%%xmm0) + " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */ + " add $8, %0 \n\t" + MOD_ADD ($4, %5) + + "6: \n\t" + " sar $1, %2 \n\t" /* prepare for processing 8 samples at a time */ + " cmp $0, %2 \n\t" + " je 8f \n\t" + + "7: \n\t" /* do samples in groups of 8 */ + " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */ + " movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */ + " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */ + " movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */ + SWAP_16_2 (%%xmm1, %%xmm3) + VOLUME_32x16 (%%xmm1, %%xmm0) + VOLUME_32x16 (%%xmm3, %%xmm2) + SWAP_16_2 (%%xmm0, %%xmm2) + " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */ + " movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */ + " add $16, %0 \n\t" + MOD_ADD ($8, %5) + " dec %2 \n\t" + " jne 7b \n\t" + "8: \n\t" + + : "+r" (samples), "+r" (volumes), "+r" (length), "=D" (channel), "=&r" (temp) + : "r" ((pa_reg_x86)channels) + : "cc" + ); } #undef RUN_TEST @@ -251,64 +251,64 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi #define PADDING 16 static void run_test (void) { - int16_t samples[SAMPLES]; - int16_t samples_ref[SAMPLES]; - int16_t samples_orig[SAMPLES]; - int32_t volumes[CHANNELS + PADDING]; - int i, j, padding; - pa_do_volume_func_t func; - struct timeval start, stop; - - func = pa_get_volume_func (PA_SAMPLE_S16NE); - - printf ("checking SSE %zd\n", sizeof (samples)); - - pa_random (samples, sizeof (samples)); - memcpy (samples_ref, samples, sizeof (samples)); - memcpy (samples_orig, samples, sizeof (samples)); - - for (i = 0; i < CHANNELS; i++) - volumes[i] = rand() >> 1; - for (padding = 0; padding < PADDING; padding++, i++) - volumes[i] = volumes[padding]; - - func (samples_ref, volumes, CHANNELS, sizeof (samples)); - pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples)); - for (i = 0; i < SAMPLES; i++) { - if (samples[i] != samples_ref[i]) { - printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], - samples_orig[i], volumes[i % CHANNELS]); - } - } + int16_t samples[SAMPLES]; + int16_t samples_ref[SAMPLES]; + int16_t samples_orig[SAMPLES]; + int32_t volumes[CHANNELS + PADDING]; + int i, j, padding; + pa_do_volume_func_t func; + struct timeval start, stop; - pa_gettimeofday(&start); - for (j = 0; j < TIMES; j++) { - memcpy (samples, samples_orig, sizeof (samples)); - pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples)); - } - pa_gettimeofday(&stop); - pa_log_info("SSE: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); + func = pa_get_volume_func (PA_SAMPLE_S16NE); + + printf ("checking SSE %zd\n", sizeof (samples)); + + pa_random (samples, sizeof (samples)); + memcpy (samples_ref, samples, sizeof (samples)); + memcpy (samples_orig, samples, sizeof (samples)); + + for (i = 0; i < CHANNELS; i++) + volumes[i] = rand() >> 1; + for (padding = 0; padding < PADDING; padding++, i++) + volumes[i] = volumes[padding]; - pa_gettimeofday(&start); - for (j = 0; j < TIMES; j++) { - memcpy (samples_ref, samples_orig, sizeof (samples)); func (samples_ref, volumes, CHANNELS, sizeof (samples)); - } - pa_gettimeofday(&stop); - pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); + pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples)); + for (i = 0; i < SAMPLES; i++) { + if (samples[i] != samples_ref[i]) { + printf ("%d: %04x != %04x (%04x * %04x)\n", i, samples[i], samples_ref[i], + samples_orig[i], volumes[i % CHANNELS]); + } + } + + pa_gettimeofday(&start); + for (j = 0; j < TIMES; j++) { + memcpy (samples, samples_orig, sizeof (samples)); + pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples)); + } + pa_gettimeofday(&stop); + pa_log_info("SSE: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); + + pa_gettimeofday(&start); + for (j = 0; j < TIMES; j++) { + memcpy (samples_ref, samples_orig, sizeof (samples)); + func (samples_ref, volumes, CHANNELS, sizeof (samples)); + } + pa_gettimeofday(&stop); + pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); } #endif #endif /* defined (__i386__) || defined (__amd64__) */ void pa_volume_func_init_sse (pa_cpu_x86_flag_t flags) { #if defined (__i386__) || defined (__amd64__) - pa_log_info("Initialising SSE optimized functions."); + pa_log_info("Initialising SSE optimized functions."); #ifdef RUN_TEST - run_test (); + run_test (); #endif - pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_sse); - pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_sse); + pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_sse); + pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_sse); #endif /* defined (__i386__) || defined (__amd64__) */ } -- cgit From f8ffe0dabcedf56437c00feb895d7d7229971ba0 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Thu, 20 Aug 2009 12:30:48 +0200 Subject: svolume: cleanups Use PA_MAX Use pa_rtclock_now() for benchmarks --- src/pulsecore/svolume_arm.c | 16 ++++++++-------- src/pulsecore/svolume_mmx.c | 18 +++++++++--------- src/pulsecore/svolume_sse.c | 18 +++++++++--------- 3 files changed, 26 insertions(+), 26 deletions(-) (limited to 'src') diff --git a/src/pulsecore/svolume_arm.c b/src/pulsecore/svolume_arm.c index 0d39d105..5bd1448f 100644 --- a/src/pulsecore/svolume_arm.c +++ b/src/pulsecore/svolume_arm.c @@ -47,7 +47,7 @@ pa_volume_s16ne_arm (int16_t *samples, int32_t *volumes, unsigned channels, unsi { int32_t *ve; - channels = MAX (4, channels); + channels = PA_MAX (4U, channels); ve = volumes + channels; __asm__ __volatile__ ( @@ -137,7 +137,7 @@ static void run_test (void) { int32_t volumes[CHANNELS + PADDING]; int i, j, padding; pa_do_volume_func_t func; - struct timeval start, stop; + pa_usec_t start, stop; func = pa_get_volume_func (PA_SAMPLE_S16NE); @@ -161,21 +161,21 @@ static void run_test (void) { } } - pa_gettimeofday(&start); + start = pa_rtclock_now(); for (j = 0; j < TIMES; j++) { memcpy (samples, samples_orig, sizeof (samples)); pa_volume_s16ne_arm (samples, volumes, CHANNELS, sizeof (samples)); } - pa_gettimeofday(&stop); - pa_log_info("ARM: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); + stop = pa_rtclock_now(); + pa_log_info("ARM: %llu usec.", (long long unsigned int) (stop - start)); - pa_gettimeofday(&start); + start = pa_rtclock_now(); for (j = 0; j < TIMES; j++) { memcpy (samples_ref, samples_orig, sizeof (samples)); func (samples_ref, volumes, CHANNELS, sizeof (samples)); } - pa_gettimeofday(&stop); - pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); + stop = pa_rtclock_now(); + pa_log_info("ref: %llu usec.", (long long unsigned int) (stop - start)); } #endif diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c index 7e242684..8510b0c4 100644 --- a/src/pulsecore/svolume_mmx.c +++ b/src/pulsecore/svolume_mmx.c @@ -100,7 +100,7 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi /* the max number of samples we process at a time, this is also the max amount * we overread the volume array, which should have enough padding. */ - channels = MAX (4, channels); + channels = PA_MAX (4U, channels); __asm__ __volatile__ ( " xor %3, %3 \n\t" @@ -170,7 +170,7 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi /* the max number of samples we process at a time, this is also the max amount * we overread the volume array, which should have enough padding. */ - channels = MAX (4, channels); + channels = PA_MAX (4U, channels); __asm__ __volatile__ ( " xor %3, %3 \n\t" @@ -254,7 +254,7 @@ static void run_test (void) { int32_t volumes[CHANNELS + PADDING]; int i, j, padding; pa_do_volume_func_t func; - struct timeval start, stop; + pa_usec_t start, stop; func = pa_get_volume_func (PA_SAMPLE_S16NE); @@ -278,21 +278,21 @@ static void run_test (void) { } } - pa_gettimeofday(&start); + start = pa_rtclock_now(); for (j = 0; j < TIMES; j++) { memcpy (samples, samples_orig, sizeof (samples)); pa_volume_s16ne_mmx (samples, volumes, CHANNELS, sizeof (samples)); } - pa_gettimeofday(&stop); - pa_log_info("MMX: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); + stop = pa_rtclock_now(); + pa_log_info("MMX: %llu usec.", (long long unsigned int)(stop - start)); - pa_gettimeofday(&start); + start = pa_rtclock_now(); for (j = 0; j < TIMES; j++) { memcpy (samples_ref, samples_orig, sizeof (samples)); func (samples_ref, volumes, CHANNELS, sizeof (samples)); } - pa_gettimeofday(&stop); - pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); + stop = pa_rtclock_now(); + pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); } #endif diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c index b5e3687f..54af4a57 100644 --- a/src/pulsecore/svolume_sse.c +++ b/src/pulsecore/svolume_sse.c @@ -81,7 +81,7 @@ pa_volume_s16ne_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi /* the max number of samples we process at a time, this is also the max amount * we overread the volume array, which should have enough padding. */ - channels = MAX (8, channels); + channels = PA_MAX (8U, channels); __asm__ __volatile__ ( " xor %3, %3 \n\t" @@ -161,7 +161,7 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi /* the max number of samples we process at a time, this is also the max amount * we overread the volume array, which should have enough padding. */ - channels = MAX (8, channels); + channels = PA_MAX (8U, channels); __asm__ __volatile__ ( " xor %3, %3 \n\t" @@ -257,7 +257,7 @@ static void run_test (void) { int32_t volumes[CHANNELS + PADDING]; int i, j, padding; pa_do_volume_func_t func; - struct timeval start, stop; + pa_usec_t start, stop; func = pa_get_volume_func (PA_SAMPLE_S16NE); @@ -281,21 +281,21 @@ static void run_test (void) { } } - pa_gettimeofday(&start); + start = pa_rtclock_now(); for (j = 0; j < TIMES; j++) { memcpy (samples, samples_orig, sizeof (samples)); pa_volume_s16ne_sse (samples, volumes, CHANNELS, sizeof (samples)); } - pa_gettimeofday(&stop); - pa_log_info("SSE: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); + stop = pa_rtclock_now(); + pa_log_info("SSE: %llu usec.", (long long unsigned int)(stop - start)); - pa_gettimeofday(&start); + start = pa_rtclock_now(); for (j = 0; j < TIMES; j++) { memcpy (samples_ref, samples_orig, sizeof (samples)); func (samples_ref, volumes, CHANNELS, sizeof (samples)); } - pa_gettimeofday(&stop); - pa_log_info("ref: %llu usec.", (long long unsigned int)pa_timeval_diff (&stop, &start)); + stop = pa_rtclock_now(); + pa_log_info("ref: %llu usec.", (long long unsigned int)(stop - start)); } #endif #endif /* defined (__i386__) || defined (__amd64__) */ -- cgit From c1b6a87b27b569cda135da05b53cc98aa9ca37cb Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Thu, 20 Aug 2009 13:40:27 +0200 Subject: alsa-sink: reduce the amount of smoother updates Exponentially increase the amount of time between smoother updates. We start with a 2ms interval and increase up to 200ms intervals. Smoother updates and the resulting linear regression take a fair amount of CPU so we want to reduce the amount of updates. --- src/modules/alsa/alsa-sink.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/modules/alsa/alsa-sink.c b/src/modules/alsa/alsa-sink.c index e3707ae7..c3694729 100644 --- a/src/modules/alsa/alsa-sink.c +++ b/src/modules/alsa/alsa-sink.c @@ -68,6 +68,9 @@ #define TSCHED_MIN_SLEEP_USEC (10*PA_USEC_PER_MSEC) /* 10ms -- Sleep at least 10ms on each iteration */ #define TSCHED_MIN_WAKEUP_USEC (4*PA_USEC_PER_MSEC) /* 4ms -- Wakeup at least this long before the buffer runs empty*/ +#define SMOOTHER_MIN_INTERVAL (2*PA_USEC_PER_MSEC) /* 2ms -- min smoother update interval */ +#define SMOOTHER_MAX_INTERVAL (200*PA_USEC_PER_MSEC) /* 200ms -- max smoother update inteval */ + #define VOLUME_ACCURACY (PA_VOLUME_NORM/100) /* don't require volume adjustments to be perfectly correct. don't necessarily extend granularity in software unless the differences get greater than this level */ struct userdata { @@ -115,6 +118,8 @@ struct userdata { pa_smoother *smoother; uint64_t write_count; uint64_t since_start; + pa_usec_t smoother_interval; + pa_usec_t last_smoother_update; pa_reserve_wrapper *reserve; pa_hook_slot *reserve_slot; @@ -723,17 +728,27 @@ static void update_smoother(struct userdata *u) { now1 = pa_timespec_load(&htstamp); } + /* Hmm, if the timestamp is 0, then it wasn't set and we take the current time */ + if (now1 <= 0) + now1 = pa_rtclock_now(); + + /* check if the time since the last update is bigger than the interval */ + if (u->last_smoother_update > 0) { + if (u->last_smoother_update + u->smoother_interval > now1) + return; + } + position = (int64_t) u->write_count - ((int64_t) delay * (int64_t) u->frame_size); if (PA_UNLIKELY(position < 0)) position = 0; - /* Hmm, if the timestamp is 0, then it wasn't set and we take the current time */ - if (now1 <= 0) - now1 = pa_rtclock_now(); - now2 = pa_bytes_to_usec((uint64_t) position, &u->sink->sample_spec); + u->last_smoother_update = now1; + /* exponentially increase the update interval up to the MAX limit */ + u->smoother_interval = PA_MIN (u->smoother_interval * 2, SMOOTHER_MAX_INTERVAL); + pa_smoother_put(u->smoother, now1, now2); } @@ -906,6 +921,8 @@ static int unsuspend(struct userdata *u) { u->write_count = 0; pa_smoother_reset(u->smoother, pa_rtclock_now(), TRUE); + u->smoother_interval = SMOOTHER_MIN_INTERVAL; + u->last_smoother_update = 0; u->first = TRUE; u->since_start = 0; @@ -1622,6 +1639,7 @@ pa_sink *pa_alsa_sink_new(pa_module *m, pa_modargs *ma, const char*driver, pa_ca 5, pa_rtclock_now(), TRUE); + u->smoother_interval = SMOOTHER_MIN_INTERVAL; dev_id = pa_modargs_get_value( ma, "device_id", -- cgit From 05fef5f551ac7f295d2f2cb74642cb359be1b12d Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Thu, 20 Aug 2009 15:50:02 +0200 Subject: sconv: allow for setting custom functions Add methods to override the default conversion functions. --- src/pulsecore/sconv.c | 160 ++++++++++++++++++++++++++++++-------------------- src/pulsecore/sconv.h | 6 ++ 2 files changed, 102 insertions(+), 64 deletions(-) (limited to 'src') diff --git a/src/pulsecore/sconv.c b/src/pulsecore/sconv.c index 937bf5d1..d06d6985 100644 --- a/src/pulsecore/sconv.c +++ b/src/pulsecore/sconv.c @@ -184,98 +184,130 @@ static void alaw_from_s16ne(unsigned n, const int16_t *a, uint8_t *b) { *b = st_13linear2alaw(*a >> 3); } +static pa_convert_func_t to_float32ne_table[] = { + [PA_SAMPLE_U8] = (pa_convert_func_t) u8_to_float32ne, + [PA_SAMPLE_ALAW] = (pa_convert_func_t) alaw_to_float32ne, + [PA_SAMPLE_ULAW] = (pa_convert_func_t) ulaw_to_float32ne, + [PA_SAMPLE_S16LE] = (pa_convert_func_t) pa_sconv_s16le_to_float32ne, + [PA_SAMPLE_S16BE] = (pa_convert_func_t) pa_sconv_s16be_to_float32ne, + [PA_SAMPLE_S32LE] = (pa_convert_func_t) pa_sconv_s32le_to_float32ne, + [PA_SAMPLE_S32BE] = (pa_convert_func_t) pa_sconv_s32be_to_float32ne, + [PA_SAMPLE_S24LE] = (pa_convert_func_t) pa_sconv_s24le_to_float32ne, + [PA_SAMPLE_S24BE] = (pa_convert_func_t) pa_sconv_s24be_to_float32ne, + [PA_SAMPLE_S24_32LE] = (pa_convert_func_t) pa_sconv_s24_32le_to_float32ne, + [PA_SAMPLE_S24_32BE] = (pa_convert_func_t) pa_sconv_s24_32be_to_float32ne, + [PA_SAMPLE_FLOAT32NE] = (pa_convert_func_t) float32ne_to_float32ne, + [PA_SAMPLE_FLOAT32RE] = (pa_convert_func_t) float32re_to_float32ne, +}; + pa_convert_func_t pa_get_convert_to_float32ne_function(pa_sample_format_t f) { - static const pa_convert_func_t table[] = { - [PA_SAMPLE_U8] = (pa_convert_func_t) u8_to_float32ne, - [PA_SAMPLE_ALAW] = (pa_convert_func_t) alaw_to_float32ne, - [PA_SAMPLE_ULAW] = (pa_convert_func_t) ulaw_to_float32ne, - [PA_SAMPLE_S16LE] = (pa_convert_func_t) pa_sconv_s16le_to_float32ne, - [PA_SAMPLE_S16BE] = (pa_convert_func_t) pa_sconv_s16be_to_float32ne, - [PA_SAMPLE_S32LE] = (pa_convert_func_t) pa_sconv_s32le_to_float32ne, - [PA_SAMPLE_S32BE] = (pa_convert_func_t) pa_sconv_s32be_to_float32ne, - [PA_SAMPLE_S24LE] = (pa_convert_func_t) pa_sconv_s24le_to_float32ne, - [PA_SAMPLE_S24BE] = (pa_convert_func_t) pa_sconv_s24be_to_float32ne, - [PA_SAMPLE_S24_32LE] = (pa_convert_func_t) pa_sconv_s24_32le_to_float32ne, - [PA_SAMPLE_S24_32BE] = (pa_convert_func_t) pa_sconv_s24_32be_to_float32ne, - [PA_SAMPLE_FLOAT32NE] = (pa_convert_func_t) float32ne_to_float32ne, - [PA_SAMPLE_FLOAT32RE] = (pa_convert_func_t) float32re_to_float32ne, - }; + pa_assert(f >= 0); + pa_assert(f < PA_SAMPLE_MAX); + + return to_float32ne_table[f]; +} + +void pa_set_convert_to_float32ne_function(pa_sample_format_t f, pa_convert_func_t func) { pa_assert(f >= 0); pa_assert(f < PA_SAMPLE_MAX); - return table[f]; + to_float32ne_table[f] = func; } +static pa_convert_func_t from_float32ne_table[] = { + [PA_SAMPLE_U8] = (pa_convert_func_t) u8_from_float32ne, + [PA_SAMPLE_S16LE] = (pa_convert_func_t) pa_sconv_s16le_from_float32ne, + [PA_SAMPLE_S16BE] = (pa_convert_func_t) pa_sconv_s16be_from_float32ne, + [PA_SAMPLE_S32LE] = (pa_convert_func_t) pa_sconv_s32le_from_float32ne, + [PA_SAMPLE_S32BE] = (pa_convert_func_t) pa_sconv_s32be_from_float32ne, + [PA_SAMPLE_S24LE] = (pa_convert_func_t) pa_sconv_s24le_from_float32ne, + [PA_SAMPLE_S24BE] = (pa_convert_func_t) pa_sconv_s24be_from_float32ne, + [PA_SAMPLE_S24_32LE] = (pa_convert_func_t) pa_sconv_s24_32le_from_float32ne, + [PA_SAMPLE_S24_32BE] = (pa_convert_func_t) pa_sconv_s24_32be_from_float32ne, + [PA_SAMPLE_FLOAT32NE] = (pa_convert_func_t) float32ne_to_float32ne, + [PA_SAMPLE_FLOAT32RE] = (pa_convert_func_t) float32re_to_float32ne, + [PA_SAMPLE_ALAW] = (pa_convert_func_t) alaw_from_float32ne, + [PA_SAMPLE_ULAW] = (pa_convert_func_t) ulaw_from_float32ne +}; + pa_convert_func_t pa_get_convert_from_float32ne_function(pa_sample_format_t f) { - static const pa_convert_func_t table[] = { - [PA_SAMPLE_U8] = (pa_convert_func_t) u8_from_float32ne, - [PA_SAMPLE_S16LE] = (pa_convert_func_t) pa_sconv_s16le_from_float32ne, - [PA_SAMPLE_S16BE] = (pa_convert_func_t) pa_sconv_s16be_from_float32ne, - [PA_SAMPLE_S32LE] = (pa_convert_func_t) pa_sconv_s32le_from_float32ne, - [PA_SAMPLE_S32BE] = (pa_convert_func_t) pa_sconv_s32be_from_float32ne, - [PA_SAMPLE_S24LE] = (pa_convert_func_t) pa_sconv_s24le_from_float32ne, - [PA_SAMPLE_S24BE] = (pa_convert_func_t) pa_sconv_s24be_from_float32ne, - [PA_SAMPLE_S24_32LE] = (pa_convert_func_t) pa_sconv_s24_32le_from_float32ne, - [PA_SAMPLE_S24_32BE] = (pa_convert_func_t) pa_sconv_s24_32be_from_float32ne, - [PA_SAMPLE_FLOAT32NE] = (pa_convert_func_t) float32ne_to_float32ne, - [PA_SAMPLE_FLOAT32RE] = (pa_convert_func_t) float32re_to_float32ne, - [PA_SAMPLE_ALAW] = (pa_convert_func_t) alaw_from_float32ne, - [PA_SAMPLE_ULAW] = (pa_convert_func_t) ulaw_from_float32ne - }; + pa_assert(f >= 0); + pa_assert(f < PA_SAMPLE_MAX); + + return from_float32ne_table[f]; +} + +void pa_set_convert_from_float32ne_function(pa_sample_format_t f, pa_convert_func_t func) { pa_assert(f >= 0); pa_assert(f < PA_SAMPLE_MAX); - return table[f]; + from_float32ne_table[f] = func; } +static pa_convert_func_t to_s16ne_table[] = { + [PA_SAMPLE_U8] = (pa_convert_func_t) u8_to_s16ne, + [PA_SAMPLE_S16NE] = (pa_convert_func_t) s16ne_to_s16ne, + [PA_SAMPLE_S16RE] = (pa_convert_func_t) s16re_to_s16ne, + [PA_SAMPLE_FLOAT32BE] = (pa_convert_func_t) pa_sconv_float32be_to_s16ne, + [PA_SAMPLE_FLOAT32LE] = (pa_convert_func_t) pa_sconv_float32le_to_s16ne, + [PA_SAMPLE_S32BE] = (pa_convert_func_t) pa_sconv_s32be_to_s16ne, + [PA_SAMPLE_S32LE] = (pa_convert_func_t) pa_sconv_s32le_to_s16ne, + [PA_SAMPLE_S24BE] = (pa_convert_func_t) pa_sconv_s24be_to_s16ne, + [PA_SAMPLE_S24LE] = (pa_convert_func_t) pa_sconv_s24le_to_s16ne, + [PA_SAMPLE_S24_32BE] = (pa_convert_func_t) pa_sconv_s24_32be_to_s16ne, + [PA_SAMPLE_S24_32LE] = (pa_convert_func_t) pa_sconv_s24_32le_to_s16ne, + [PA_SAMPLE_ALAW] = (pa_convert_func_t) alaw_to_s16ne, + [PA_SAMPLE_ULAW] = (pa_convert_func_t) ulaw_to_s16ne +}; + pa_convert_func_t pa_get_convert_to_s16ne_function(pa_sample_format_t f) { - static const pa_convert_func_t table[] = { - [PA_SAMPLE_U8] = (pa_convert_func_t) u8_to_s16ne, - [PA_SAMPLE_S16NE] = (pa_convert_func_t) s16ne_to_s16ne, - [PA_SAMPLE_S16RE] = (pa_convert_func_t) s16re_to_s16ne, - [PA_SAMPLE_FLOAT32BE] = (pa_convert_func_t) pa_sconv_float32be_to_s16ne, - [PA_SAMPLE_FLOAT32LE] = (pa_convert_func_t) pa_sconv_float32le_to_s16ne, - [PA_SAMPLE_S32BE] = (pa_convert_func_t) pa_sconv_s32be_to_s16ne, - [PA_SAMPLE_S32LE] = (pa_convert_func_t) pa_sconv_s32le_to_s16ne, - [PA_SAMPLE_S24BE] = (pa_convert_func_t) pa_sconv_s24be_to_s16ne, - [PA_SAMPLE_S24LE] = (pa_convert_func_t) pa_sconv_s24le_to_s16ne, - [PA_SAMPLE_S24_32BE] = (pa_convert_func_t) pa_sconv_s24_32be_to_s16ne, - [PA_SAMPLE_S24_32LE] = (pa_convert_func_t) pa_sconv_s24_32le_to_s16ne, - [PA_SAMPLE_ALAW] = (pa_convert_func_t) alaw_to_s16ne, - [PA_SAMPLE_ULAW] = (pa_convert_func_t) ulaw_to_s16ne - }; + pa_assert(f >= 0); + pa_assert(f < PA_SAMPLE_MAX); + + return to_s16ne_table[f]; +} + +void pa_set_convert_to_s16ne_function(pa_sample_format_t f, pa_convert_func_t func) { pa_assert(f >= 0); pa_assert(f < PA_SAMPLE_MAX); - return table[f]; + to_s16ne_table[f] = func; } +static pa_convert_func_t from_s16ne_table[] = { + [PA_SAMPLE_U8] = (pa_convert_func_t) u8_from_s16ne, + [PA_SAMPLE_S16NE] = (pa_convert_func_t) s16ne_to_s16ne, + [PA_SAMPLE_S16RE] = (pa_convert_func_t) s16re_to_s16ne, + [PA_SAMPLE_FLOAT32BE] = (pa_convert_func_t) pa_sconv_float32be_from_s16ne, + [PA_SAMPLE_FLOAT32LE] = (pa_convert_func_t) pa_sconv_float32le_from_s16ne, + [PA_SAMPLE_S32BE] = (pa_convert_func_t) pa_sconv_s32be_from_s16ne, + [PA_SAMPLE_S32LE] = (pa_convert_func_t) pa_sconv_s32le_from_s16ne, + [PA_SAMPLE_S24BE] = (pa_convert_func_t) pa_sconv_s24be_from_s16ne, + [PA_SAMPLE_S24LE] = (pa_convert_func_t) pa_sconv_s24le_from_s16ne, + [PA_SAMPLE_S24_32BE] = (pa_convert_func_t) pa_sconv_s24_32be_from_s16ne, + [PA_SAMPLE_S24_32LE] = (pa_convert_func_t) pa_sconv_s24_32le_from_s16ne, + [PA_SAMPLE_ALAW] = (pa_convert_func_t) alaw_from_s16ne, + [PA_SAMPLE_ULAW] = (pa_convert_func_t) ulaw_from_s16ne, +}; + pa_convert_func_t pa_get_convert_from_s16ne_function(pa_sample_format_t f) { - static const pa_convert_func_t table[] = { - [PA_SAMPLE_U8] = (pa_convert_func_t) u8_from_s16ne, - [PA_SAMPLE_S16NE] = (pa_convert_func_t) s16ne_to_s16ne, - [PA_SAMPLE_S16RE] = (pa_convert_func_t) s16re_to_s16ne, - [PA_SAMPLE_FLOAT32BE] = (pa_convert_func_t) pa_sconv_float32be_from_s16ne, - [PA_SAMPLE_FLOAT32LE] = (pa_convert_func_t) pa_sconv_float32le_from_s16ne, - [PA_SAMPLE_S32BE] = (pa_convert_func_t) pa_sconv_s32be_from_s16ne, - [PA_SAMPLE_S32LE] = (pa_convert_func_t) pa_sconv_s32le_from_s16ne, - [PA_SAMPLE_S24BE] = (pa_convert_func_t) pa_sconv_s24be_from_s16ne, - [PA_SAMPLE_S24LE] = (pa_convert_func_t) pa_sconv_s24le_from_s16ne, - [PA_SAMPLE_S24_32BE] = (pa_convert_func_t) pa_sconv_s24_32be_from_s16ne, - [PA_SAMPLE_S24_32LE] = (pa_convert_func_t) pa_sconv_s24_32le_from_s16ne, - [PA_SAMPLE_ALAW] = (pa_convert_func_t) alaw_from_s16ne, - [PA_SAMPLE_ULAW] = (pa_convert_func_t) ulaw_from_s16ne, - }; + pa_assert(f >= 0); + pa_assert(f < PA_SAMPLE_MAX); + + return from_s16ne_table[f]; +} + +void pa_set_convert_from_s16ne_function(pa_sample_format_t f, pa_convert_func_t func) { pa_assert(f >= 0); pa_assert(f < PA_SAMPLE_MAX); - return table[f]; + from_s16ne_table[f] = func; } diff --git a/src/pulsecore/sconv.h b/src/pulsecore/sconv.h index b00a16a4..cd937559 100644 --- a/src/pulsecore/sconv.h +++ b/src/pulsecore/sconv.h @@ -33,4 +33,10 @@ pa_convert_func_t pa_get_convert_from_float32ne_function(pa_sample_format_t f) P pa_convert_func_t pa_get_convert_to_s16ne_function(pa_sample_format_t f) PA_GCC_PURE; pa_convert_func_t pa_get_convert_from_s16ne_function(pa_sample_format_t f) PA_GCC_PURE; +void pa_set_convert_to_float32ne_function(pa_sample_format_t f, pa_convert_func_t func); +void pa_set_convert_from_float32ne_function(pa_sample_format_t f, pa_convert_func_t func); + +void pa_set_convert_to_s16ne_function(pa_sample_format_t f, pa_convert_func_t func); +void pa_set_convert_from_s16ne_function(pa_sample_format_t f, pa_convert_func_t func); + #endif -- cgit From a3f4a4f6ba741a996442d7a80cc3e267fab705fb Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Thu, 20 Aug 2009 17:54:45 +0200 Subject: resamples; refactor the channel remapping bits Move the channel remapping bits into a separate structure. We'll make this structure global so that optimized versions can use it to perform the channel remapping. --- src/pulsecore/resampler.c | 186 ++++++++++++++++++++++++++-------------------- 1 file changed, 105 insertions(+), 81 deletions(-) (limited to 'src') diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c index 5a6c398e..0d8ca010 100644 --- a/src/pulsecore/resampler.c +++ b/src/pulsecore/resampler.c @@ -44,10 +44,20 @@ /* Number of samples of extra space we allow the resamplers to return */ #define EXTRA_FRAMES 128 -typedef void (*pa_do_remap_func_t) (pa_resampler *r, void *d, const void *s, unsigned n); +typedef struct pa_remap pa_remap_t; -static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, unsigned n); -static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, unsigned n); +typedef void (*pa_do_remap_func_t) (pa_remap_t *m, void *d, const void *s, unsigned n); + +struct pa_remap { + pa_sample_format_t *format; + pa_sample_spec *i_ss, *o_ss; + float map_table_f[PA_CHANNELS_MAX][PA_CHANNELS_MAX]; + int32_t map_table_i[PA_CHANNELS_MAX][PA_CHANNELS_MAX]; + pa_do_remap_func_t do_remap; +}; + +static void remap_channels_matrix (pa_remap_t *m, void *dst, const void *src, unsigned n); +static void remap_mono_to_stereo(pa_remap_t *m, void *dst, const void *src, unsigned n); struct pa_resampler { pa_resample_method_t method; @@ -66,10 +76,8 @@ struct pa_resampler { pa_convert_func_t to_work_format_func; pa_convert_func_t from_work_format_func; - float map_table_f[PA_CHANNELS_MAX][PA_CHANNELS_MAX]; - int32_t map_table_i[PA_CHANNELS_MAX][PA_CHANNELS_MAX]; + pa_remap_t remap; pa_bool_t map_required; - pa_do_remap_func_t do_remap; void (*impl_free)(pa_resampler *r); void (*impl_update_rates)(pa_resampler *r); @@ -218,6 +226,11 @@ pa_resampler* pa_resampler_new( r->i_ss = *a; r->o_ss = *b; + /* set up the remap structure */ + r->remap.i_ss = &r->i_ss; + r->remap.o_ss = &r->o_ss; + r->remap.format = &r->work_format; + if (am) r->i_cm = *am; else if (!pa_channel_map_init_auto(&r->i_cm, r->i_ss.channels, PA_CHANNEL_MAP_DEFAULT)) @@ -584,33 +597,41 @@ static int front_rear_side(pa_channel_position_t p) { static void calc_map_table(pa_resampler *r) { unsigned oc, ic; + unsigned n_oc, n_ic; pa_bool_t ic_connected[PA_CHANNELS_MAX]; pa_bool_t remix; pa_strbuf *s; char *t; + pa_remap_t *m; pa_assert(r); if (!(r->map_required = (r->i_ss.channels != r->o_ss.channels || (!(r->flags & PA_RESAMPLER_NO_REMAP) && !pa_channel_map_equal(&r->i_cm, &r->o_cm))))) return; - memset(r->map_table_f, 0, sizeof(r->map_table_f)); - memset(r->map_table_i, 0, sizeof(r->map_table_i)); + m = &r->remap; + + n_oc = r->o_ss.channels; + n_ic = r->i_ss.channels; + + memset(m->map_table_f, 0, sizeof(m->map_table_f)); + memset(m->map_table_i, 0, sizeof(m->map_table_i)); + memset(ic_connected, 0, sizeof(ic_connected)); remix = (r->flags & (PA_RESAMPLER_NO_REMAP|PA_RESAMPLER_NO_REMIX)) == 0; - for (oc = 0; oc < r->o_ss.channels; oc++) { + for (oc = 0; oc < n_oc; oc++) { pa_bool_t oc_connected = FALSE; pa_channel_position_t b = r->o_cm.map[oc]; - for (ic = 0; ic < r->i_ss.channels; ic++) { + for (ic = 0; ic < n_ic; ic++) { pa_channel_position_t a = r->i_cm.map[ic]; if (r->flags & PA_RESAMPLER_NO_REMAP) { /* We shall not do any remapping. Hence, just check by index */ if (ic == oc) - r->map_table_f[oc][ic] = 1.0; + m->map_table_f[oc][ic] = 1.0; continue; } @@ -619,7 +640,7 @@ static void calc_map_table(pa_resampler *r) { /* We shall not do any remixing. Hence, just check by name */ if (a == b) - r->map_table_f[oc][ic] = 1.0; + m->map_table_f[oc][ic] = 1.0; continue; } @@ -694,7 +715,7 @@ static void calc_map_table(pa_resampler *r) { */ if (a == b || a == PA_CHANNEL_POSITION_MONO || b == PA_CHANNEL_POSITION_MONO) { - r->map_table_f[oc][ic] = 1.0; + m->map_table_f[oc][ic] = 1.0; oc_connected = TRUE; ic_connected[ic] = TRUE; @@ -712,14 +733,14 @@ static void calc_map_table(pa_resampler *r) { /* We are not connected and on the left side, let's * average all left side input channels. */ - for (ic = 0; ic < r->i_ss.channels; ic++) + for (ic = 0; ic < n_ic; ic++) if (on_left(r->i_cm.map[ic])) n++; if (n > 0) - for (ic = 0; ic < r->i_ss.channels; ic++) + for (ic = 0; ic < n_ic; ic++) if (on_left(r->i_cm.map[ic])) { - r->map_table_f[oc][ic] = 1.0f / (float) n; + m->map_table_f[oc][ic] = 1.0f / (float) n; ic_connected[ic] = TRUE; } @@ -733,14 +754,14 @@ static void calc_map_table(pa_resampler *r) { /* We are not connected and on the right side, let's * average all right side input channels. */ - for (ic = 0; ic < r->i_ss.channels; ic++) + for (ic = 0; ic < n_ic; ic++) if (on_right(r->i_cm.map[ic])) n++; if (n > 0) - for (ic = 0; ic < r->i_ss.channels; ic++) + for (ic = 0; ic < n_ic; ic++) if (on_right(r->i_cm.map[ic])) { - r->map_table_f[oc][ic] = 1.0f / (float) n; + m->map_table_f[oc][ic] = 1.0f / (float) n; ic_connected[ic] = TRUE; } @@ -754,14 +775,14 @@ static void calc_map_table(pa_resampler *r) { /* We are not connected and at the center. Let's * average all center input channels. */ - for (ic = 0; ic < r->i_ss.channels; ic++) + for (ic = 0; ic < n_ic; ic++) if (on_center(r->i_cm.map[ic])) n++; if (n > 0) { - for (ic = 0; ic < r->i_ss.channels; ic++) + for (ic = 0; ic < n_ic; ic++) if (on_center(r->i_cm.map[ic])) { - r->map_table_f[oc][ic] = 1.0f / (float) n; + m->map_table_f[oc][ic] = 1.0f / (float) n; ic_connected[ic] = TRUE; } } else { @@ -771,14 +792,14 @@ static void calc_map_table(pa_resampler *r) { n = 0; - for (ic = 0; ic < r->i_ss.channels; ic++) + for (ic = 0; ic < n_ic; ic++) if (on_left(r->i_cm.map[ic]) || on_right(r->i_cm.map[ic])) n++; if (n > 0) - for (ic = 0; ic < r->i_ss.channels; ic++) + for (ic = 0; ic < n_ic; ic++) if (on_left(r->i_cm.map[ic]) || on_right(r->i_cm.map[ic])) { - r->map_table_f[oc][ic] = 1.0f / (float) n; + m->map_table_f[oc][ic] = 1.0f / (float) n; ic_connected[ic] = TRUE; } @@ -792,12 +813,12 @@ static void calc_map_table(pa_resampler *r) { /* We are not connected and an LFE. Let's average all * channels for LFE. */ - for (ic = 0; ic < r->i_ss.channels; ic++) { + for (ic = 0; ic < n_ic; ic++) { if (!(r->flags & PA_RESAMPLER_NO_LFE)) - r->map_table_f[oc][ic] = 1.0f / (float) r->i_ss.channels; + m->map_table_f[oc][ic] = 1.0f / (float) n_ic; else - r->map_table_f[oc][ic] = 0; + m->map_table_f[oc][ic] = 0; /* Please note that a channel connected to LFE * doesn't really count as connected. */ @@ -813,7 +834,7 @@ static void calc_map_table(pa_resampler *r) { ic_unconnected_center = 0, ic_unconnected_lfe = 0; - for (ic = 0; ic < r->i_ss.channels; ic++) { + for (ic = 0; ic < n_ic; ic++) { pa_channel_position_t a = r->i_cm.map[ic]; if (ic_connected[ic]) @@ -836,20 +857,20 @@ static void calc_map_table(pa_resampler *r) { * the left side by .9 and add in our averaged unconnected * channels multplied by .1 */ - for (oc = 0; oc < r->o_ss.channels; oc++) { + for (oc = 0; oc < n_oc; oc++) { if (!on_left(r->o_cm.map[oc])) continue; - for (ic = 0; ic < r->i_ss.channels; ic++) { + for (ic = 0; ic < n_ic; ic++) { if (ic_connected[ic]) { - r->map_table_f[oc][ic] *= .9f; + m->map_table_f[oc][ic] *= .9f; continue; } if (on_left(r->i_cm.map[ic])) - r->map_table_f[oc][ic] = .1f / (float) ic_unconnected_left; + m->map_table_f[oc][ic] = .1f / (float) ic_unconnected_left; } } } @@ -861,20 +882,20 @@ static void calc_map_table(pa_resampler *r) { * the right side by .9 and add in our averaged unconnected * channels multplied by .1 */ - for (oc = 0; oc < r->o_ss.channels; oc++) { + for (oc = 0; oc < n_oc; oc++) { if (!on_right(r->o_cm.map[oc])) continue; - for (ic = 0; ic < r->i_ss.channels; ic++) { + for (ic = 0; ic < n_ic; ic++) { if (ic_connected[ic]) { - r->map_table_f[oc][ic] *= .9f; + m->map_table_f[oc][ic] *= .9f; continue; } if (on_right(r->i_cm.map[ic])) - r->map_table_f[oc][ic] = .1f / (float) ic_unconnected_right; + m->map_table_f[oc][ic] = .1f / (float) ic_unconnected_right; } } } @@ -887,20 +908,20 @@ static void calc_map_table(pa_resampler *r) { * the center side by .9 and add in our averaged unconnected * channels multplied by .1 */ - for (oc = 0; oc < r->o_ss.channels; oc++) { + for (oc = 0; oc < n_oc; oc++) { if (!on_center(r->o_cm.map[oc])) continue; - for (ic = 0; ic < r->i_ss.channels; ic++) { + for (ic = 0; ic < n_ic; ic++) { if (ic_connected[ic]) { - r->map_table_f[oc][ic] *= .9f; + m->map_table_f[oc][ic] *= .9f; continue; } if (on_center(r->i_cm.map[ic])) { - r->map_table_f[oc][ic] = .1f / (float) ic_unconnected_center; + m->map_table_f[oc][ic] = .1f / (float) ic_unconnected_center; mixed_in = TRUE; } } @@ -918,7 +939,7 @@ static void calc_map_table(pa_resampler *r) { it into left and right. Using .375 and 0.75 as factors. */ - for (ic = 0; ic < r->i_ss.channels; ic++) { + for (ic = 0; ic < n_ic; ic++) { if (ic_connected[ic]) continue; @@ -926,7 +947,7 @@ static void calc_map_table(pa_resampler *r) { if (!on_center(r->i_cm.map[ic])) continue; - for (oc = 0; oc < r->o_ss.channels; oc++) { + for (oc = 0; oc < n_oc; oc++) { if (!on_left(r->o_cm.map[oc]) && !on_right(r->o_cm.map[oc])) continue; @@ -937,7 +958,7 @@ static void calc_map_table(pa_resampler *r) { } } - for (oc = 0; oc < r->o_ss.channels; oc++) { + for (oc = 0; oc < n_oc; oc++) { if (!on_left(r->o_cm.map[oc]) && !on_right(r->o_cm.map[oc])) continue; @@ -947,7 +968,7 @@ static void calc_map_table(pa_resampler *r) { } } - for (oc = 0; oc < r->o_ss.channels; oc++) { + for (oc = 0; oc < n_oc; oc++) { if (!on_left(r->o_cm.map[oc]) && !on_right(r->o_cm.map[oc])) continue; @@ -955,10 +976,10 @@ static void calc_map_table(pa_resampler *r) { if (ncenter[oc] <= 0) continue; - for (ic = 0; ic < r->i_ss.channels; ic++) { + for (ic = 0; ic < n_ic; ic++) { if (ic_connected[ic]) { - r->map_table_f[oc][ic] *= .75f; + m->map_table_f[oc][ic] *= .75f; continue; } @@ -966,7 +987,7 @@ static void calc_map_table(pa_resampler *r) { continue; if (!found_frs[ic] || front_rear_side(r->i_cm.map[ic]) == front_rear_side(r->o_cm.map[oc])) - r->map_table_f[oc][ic] = .375f / (float) ncenter[oc]; + m->map_table_f[oc][ic] = .375f / (float) ncenter[oc]; } } } @@ -977,37 +998,37 @@ static void calc_map_table(pa_resampler *r) { /* OK, so there is an unconnected LFE channel. Let's mix * it into all channels, with factor 0.375 */ - for (ic = 0; ic < r->i_ss.channels; ic++) { + for (ic = 0; ic < n_ic; ic++) { if (!on_lfe(r->i_cm.map[ic])) continue; - for (oc = 0; oc < r->o_ss.channels; oc++) - r->map_table_f[oc][ic] = 0.375f / (float) ic_unconnected_lfe; + for (oc = 0; oc < n_oc; oc++) + m->map_table_f[oc][ic] = 0.375f / (float) ic_unconnected_lfe; } } } /* make an 16:16 int version of the matrix */ - for (oc = 0; oc < r->o_ss.channels; oc++) - for (ic = 0; ic < r->i_ss.channels; ic++) - r->map_table_i[oc][ic] = (int32_t) (r->map_table_f[oc][ic] * 0x10000); + for (oc = 0; oc < n_oc; oc++) + for (ic = 0; ic < n_ic; ic++) + m->map_table_i[oc][ic] = (int32_t) (m->map_table_f[oc][ic] * 0x10000); s = pa_strbuf_new(); pa_strbuf_printf(s, " "); - for (ic = 0; ic < r->i_ss.channels; ic++) + for (ic = 0; ic < n_ic; ic++) pa_strbuf_printf(s, " I%02u ", ic); pa_strbuf_puts(s, "\n +"); - for (ic = 0; ic < r->i_ss.channels; ic++) + for (ic = 0; ic < n_ic; ic++) pa_strbuf_printf(s, "------"); pa_strbuf_puts(s, "\n"); - for (oc = 0; oc < r->o_ss.channels; oc++) { + for (oc = 0; oc < n_oc; oc++) { pa_strbuf_printf(s, "O%02u |", oc); - for (ic = 0; ic < r->i_ss.channels; ic++) - pa_strbuf_printf(s, " %1.3f", r->map_table_f[oc][ic]); + for (ic = 0; ic < n_ic; ic++) + pa_strbuf_printf(s, " %1.3f", m->map_table_f[oc][ic]); pa_strbuf_puts(s, "\n"); } @@ -1016,13 +1037,13 @@ static void calc_map_table(pa_resampler *r) { pa_xfree(t); /* find some common channel remappings, fall back to full matrix operation. */ - if (r->i_ss.channels == 1 && r->o_ss.channels == 2 && - r->map_table_f[0][0] >= 1.0 && r->map_table_f[1][0] >= 1.0) { - r->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo;; - pa_log_debug("Using mono to stereo remapping"); + if (n_ic == 1 && n_oc == 2 && + m->map_table_f[0][0] >= 1.0 && m->map_table_f[1][0] >= 1.0) { + m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo;; + pa_log_info("Using mono to stereo remapping"); } else { - r->do_remap = (pa_do_remap_func_t) remap_channels_matrix; - pa_log_debug("Using generic matrix remapping"); + m->do_remap = (pa_do_remap_func_t) remap_channels_matrix; + pa_log_info("Using generic matrix remapping"); } } @@ -1064,10 +1085,10 @@ static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input) return &r->buf1; } -static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, unsigned n) { +static void remap_mono_to_stereo(pa_remap_t *m, void *dst, const void *src, unsigned n) { unsigned i; - switch (r->work_format) { + switch (*m->format) { case PA_SAMPLE_FLOAT32NE: { float *d, *s; @@ -1117,27 +1138,26 @@ static void remap_mono_to_stereo(pa_resampler *r, void *dst, const void *src, un } } -static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, unsigned n) { - unsigned oc, i; +static void remap_channels_matrix (pa_remap_t *m, void *dst, const void *src, unsigned n) { + unsigned oc, ic, i; unsigned n_ic, n_oc; - n_ic = r->i_ss.channels; - n_oc = r->o_ss.channels; + n_ic = m->i_ss->channels; + n_oc = m->o_ss->channels; - memset(dst, 0, r->buf2.length); - - switch (r->work_format) { + switch (*m->format) { case PA_SAMPLE_FLOAT32NE: { float *d, *s; + memset(dst, 0, n * sizeof (float) * n_oc); + for (oc = 0; oc < n_oc; oc++) { - unsigned ic; for (ic = 0; ic < n_ic; ic++) { float vol; - vol = r->map_table_f[oc][ic]; + vol = m->map_table_f[oc][ic]; if (vol <= 0.0) continue; @@ -1161,13 +1181,14 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, { int16_t *d, *s; + memset(dst, 0, n * sizeof (int16_t) * n_oc); + for (oc = 0; oc < n_oc; oc++) { - unsigned ic; for (ic = 0; ic < n_ic; ic++) { int32_t vol; - vol = r->map_table_i[oc][ic]; + vol = m->map_table_i[oc][ic]; if (vol <= 0) continue; @@ -1181,7 +1202,7 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, } else { for (i = n; i > 0; i--, s += n_ic, d += n_oc) *d += (int16_t) (((int32_t)*s * vol) >> 16); - } + } } } break; @@ -1194,6 +1215,7 @@ static void remap_channels_matrix (pa_resampler *r, void *dst, const void *src, static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) { unsigned in_n_samples, out_n_samples, n_frames; void *src, *dst; + pa_remap_t *remap; pa_assert(r); pa_assert(input); @@ -1222,8 +1244,10 @@ static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) { src = ((uint8_t*) pa_memblock_acquire(input->memblock) + input->index); dst = pa_memblock_acquire(r->buf2.memblock); - pa_assert (r->do_remap); - r->do_remap (r, dst, src, n_frames); + remap = &r->remap; + + pa_assert (remap->do_remap); + remap->do_remap (remap, dst, src, n_frames); pa_memblock_release(input->memblock); pa_memblock_release(r->buf2.memblock); -- cgit From ac1f2e0a2e0707636aabd48baa57c124a877f834 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Thu, 20 Aug 2009 18:23:42 +0200 Subject: remap: move remapping code in separate file Move the remapping code into a separate file. Have functions to install custom init functions that can install optimized versions, when they want. --- src/Makefile.am | 1 + src/pulsecore/remap.c | 197 ++++++++++++++++++++++++++++++++++++++++++++++ src/pulsecore/remap.h | 48 +++++++++++ src/pulsecore/resampler.c | 155 +----------------------------------- 4 files changed, 249 insertions(+), 152 deletions(-) create mode 100644 src/pulsecore/remap.c create mode 100644 src/pulsecore/remap.h (limited to 'src') diff --git a/src/Makefile.am b/src/Makefile.am index eca68b16..b818c3e7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -822,6 +822,7 @@ libpulsecore_@PA_MAJORMINORMICRO@_la_SOURCES = \ pulsecore/object.c pulsecore/object.h \ pulsecore/play-memblockq.c pulsecore/play-memblockq.h \ pulsecore/play-memchunk.c pulsecore/play-memchunk.h \ + pulsecore/remap.c pulsecore/remap.h \ pulsecore/resampler.c pulsecore/resampler.h \ pulsecore/rtpoll.c pulsecore/rtpoll.h \ pulsecore/sample-util.c pulsecore/sample-util.h \ diff --git a/src/pulsecore/remap.c b/src/pulsecore/remap.c new file mode 100644 index 00000000..2e93afce --- /dev/null +++ b/src/pulsecore/remap.c @@ -0,0 +1,197 @@ +/*** + This file is part of PulseAudio. + + Copyright 2004-2006 Lennart Poettering + Copyright 2009 Wim Taymans + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of the License, + or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with PulseAudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include +#include +#include + +#include "remap.h" + +static void remap_mono_to_stereo_c (pa_remap_t *m, void *dst, const void *src, unsigned n) { + unsigned i; + + switch (*m->format) { + case PA_SAMPLE_FLOAT32NE: + { + float *d, *s; + + d = (float *) dst; + s = (float *) src; + + for (i = n >> 2; i; i--) { + d[0] = d[1] = s[0]; + d[2] = d[3] = s[1]; + d[4] = d[5] = s[2]; + d[6] = d[7] = s[3]; + s += 4; + d += 8; + } + for (i = n & 3; i; i--) { + d[0] = d[1] = s[0]; + s++; + d += 2; + } + break; + } + case PA_SAMPLE_S16NE: + { + int16_t *d, *s; + + d = (int16_t *) dst; + s = (int16_t *) src; + + for (i = n >> 2; i; i--) { + d[0] = d[1] = s[0]; + d[2] = d[3] = s[1]; + d[4] = d[5] = s[2]; + d[6] = d[7] = s[3]; + s += 4; + d += 8; + } + for (i = n & 3; i; i--) { + d[0] = d[1] = s[0]; + s++; + d += 2; + } + break; + } + default: + pa_assert_not_reached(); + } +} + +static void remap_channels_matrix_c (pa_remap_t *m, void *dst, const void *src, unsigned n) { + unsigned oc, ic, i; + unsigned n_ic, n_oc; + + n_ic = m->i_ss->channels; + n_oc = m->o_ss->channels; + + switch (*m->format) { + case PA_SAMPLE_FLOAT32NE: + { + float *d, *s; + + memset(dst, 0, n * sizeof (float) * n_oc); + + for (oc = 0; oc < n_oc; oc++) { + + for (ic = 0; ic < n_ic; ic++) { + float vol; + + vol = m->map_table_f[oc][ic]; + + if (vol <= 0.0) + continue; + + d = (float *)dst + oc; + s = (float *)src + ic; + + if (vol >= 1.0) { + for (i = n; i > 0; i--, s += n_ic, d += n_oc) + *d += *s; + } else { + for (i = n; i > 0; i--, s += n_ic, d += n_oc) + *d += *s * vol; + } + } + } + + break; + } + case PA_SAMPLE_S16NE: + { + int16_t *d, *s; + + memset(dst, 0, n * sizeof (int16_t) * n_oc); + + for (oc = 0; oc < n_oc; oc++) { + + for (ic = 0; ic < n_ic; ic++) { + int32_t vol; + + vol = m->map_table_i[oc][ic]; + + if (vol <= 0) + continue; + + d = (int16_t *)dst + oc; + s = (int16_t *)src + ic; + + if (vol >= 0x10000) { + for (i = n; i > 0; i--, s += n_ic, d += n_oc) + *d += *s; + } else { + for (i = n; i > 0; i--, s += n_ic, d += n_oc) + *d += (int16_t) (((int32_t)*s * vol) >> 16); + } + } + } + break; + } + default: + pa_assert_not_reached(); + } +} + +/* set the function that will execute the remapping based on the matrices */ +static void init_remap_c (pa_remap_t *m) { + unsigned n_oc, n_ic; + + n_oc = m->o_ss->channels; + n_ic = m->i_ss->channels; + + /* find some common channel remappings, fall back to full matrix operation. */ + if (n_ic == 1 && n_oc == 2 && + m->map_table_f[0][0] >= 1.0 && m->map_table_f[1][0] >= 1.0) { + m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo_c; + pa_log_info("Using mono to stereo remapping"); + } else { + m->do_remap = (pa_do_remap_func_t) remap_channels_matrix_c; + pa_log_info("Using generic matrix remapping"); + } +} + + +/* default C implementation */ +static pa_init_remap_func_t remap_func = init_remap_c; + +void pa_init_remap (pa_remap_t *m) { + pa_assert (remap_func); + + /* just call the installed remap init functions */ + remap_func (m); +} + +pa_init_remap_func_t pa_get_init_remap_func(void) { + return remap_func; +} + +void pa_set_init_remap_func(pa_init_remap_func_t func) { + remap_func = func; +} diff --git a/src/pulsecore/remap.h b/src/pulsecore/remap.h new file mode 100644 index 00000000..32a67cdd --- /dev/null +++ b/src/pulsecore/remap.h @@ -0,0 +1,48 @@ +#ifndef fooremapfoo +#define fooremapfoo + +/*** + This file is part of PulseAudio. + + Copyright 2004-2006 Lennart Poettering + Copyright 2009 Wim Taymans + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of the License, + or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with PulseAudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#include + +typedef struct pa_remap pa_remap_t; + +typedef void (*pa_do_remap_func_t) (pa_remap_t *m, void *d, const void *s, unsigned n); + +struct pa_remap { + pa_sample_format_t *format; + pa_sample_spec *i_ss, *o_ss; + float map_table_f[PA_CHANNELS_MAX][PA_CHANNELS_MAX]; + int32_t map_table_i[PA_CHANNELS_MAX][PA_CHANNELS_MAX]; + pa_do_remap_func_t do_remap; +}; + +void pa_init_remap (pa_remap_t *m); + +/* custom installation of init functions */ +typedef void (*pa_init_remap_func_t) (pa_remap_t *m); + +pa_init_remap_func_t pa_get_init_remap_func(void); +void pa_set_init_remap_func(pa_init_remap_func_t func); + +#endif /* fooremapfoo */ diff --git a/src/pulsecore/resampler.c b/src/pulsecore/resampler.c index 0d8ca010..f1bfa156 100644 --- a/src/pulsecore/resampler.c +++ b/src/pulsecore/resampler.c @@ -40,25 +40,11 @@ #include "ffmpeg/avcodec.h" #include "resampler.h" +#include "remap.h" /* Number of samples of extra space we allow the resamplers to return */ #define EXTRA_FRAMES 128 -typedef struct pa_remap pa_remap_t; - -typedef void (*pa_do_remap_func_t) (pa_remap_t *m, void *d, const void *s, unsigned n); - -struct pa_remap { - pa_sample_format_t *format; - pa_sample_spec *i_ss, *o_ss; - float map_table_f[PA_CHANNELS_MAX][PA_CHANNELS_MAX]; - int32_t map_table_i[PA_CHANNELS_MAX][PA_CHANNELS_MAX]; - pa_do_remap_func_t do_remap; -}; - -static void remap_channels_matrix (pa_remap_t *m, void *dst, const void *src, unsigned n); -static void remap_mono_to_stereo(pa_remap_t *m, void *dst, const void *src, unsigned n); - struct pa_resampler { pa_resample_method_t method; pa_resample_flags_t flags; @@ -1036,16 +1022,8 @@ static void calc_map_table(pa_resampler *r) { pa_log_debug("Channel matrix:\n%s", t = pa_strbuf_tostring_free(s)); pa_xfree(t); - /* find some common channel remappings, fall back to full matrix operation. */ - if (n_ic == 1 && n_oc == 2 && - m->map_table_f[0][0] >= 1.0 && m->map_table_f[1][0] >= 1.0) { - m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo;; - pa_log_info("Using mono to stereo remapping"); - } else { - m->do_remap = (pa_do_remap_func_t) remap_channels_matrix; - pa_log_info("Using generic matrix remapping"); - } - + /* initialize the remapping function */ + pa_init_remap (m); } static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input) { @@ -1085,133 +1063,6 @@ static pa_memchunk* convert_to_work_format(pa_resampler *r, pa_memchunk *input) return &r->buf1; } -static void remap_mono_to_stereo(pa_remap_t *m, void *dst, const void *src, unsigned n) { - unsigned i; - - switch (*m->format) { - case PA_SAMPLE_FLOAT32NE: - { - float *d, *s; - - d = (float *) dst; - s = (float *) src; - - for (i = n >> 2; i; i--) { - d[0] = d[1] = s[0]; - d[2] = d[3] = s[1]; - d[4] = d[5] = s[2]; - d[6] = d[7] = s[3]; - s += 4; - d += 8; - } - for (i = n & 3; i; i--) { - d[0] = d[1] = s[0]; - s++; - d += 2; - } - break; - } - case PA_SAMPLE_S16NE: - { - int16_t *d, *s; - - d = (int16_t *) dst; - s = (int16_t *) src; - - for (i = n >> 2; i; i--) { - d[0] = d[1] = s[0]; - d[2] = d[3] = s[1]; - d[4] = d[5] = s[2]; - d[6] = d[7] = s[3]; - s += 4; - d += 8; - } - for (i = n & 3; i; i--) { - d[0] = d[1] = s[0]; - s++; - d += 2; - } - break; - } - default: - pa_assert_not_reached(); - } -} - -static void remap_channels_matrix (pa_remap_t *m, void *dst, const void *src, unsigned n) { - unsigned oc, ic, i; - unsigned n_ic, n_oc; - - n_ic = m->i_ss->channels; - n_oc = m->o_ss->channels; - - switch (*m->format) { - case PA_SAMPLE_FLOAT32NE: - { - float *d, *s; - - memset(dst, 0, n * sizeof (float) * n_oc); - - for (oc = 0; oc < n_oc; oc++) { - - for (ic = 0; ic < n_ic; ic++) { - float vol; - - vol = m->map_table_f[oc][ic]; - - if (vol <= 0.0) - continue; - - d = (float *)dst + oc; - s = (float *)src + ic; - - if (vol >= 1.0) { - for (i = n; i > 0; i--, s += n_ic, d += n_oc) - *d += *s; - } else { - for (i = n; i > 0; i--, s += n_ic, d += n_oc) - *d += *s * vol; - } - } - } - - break; - } - case PA_SAMPLE_S16NE: - { - int16_t *d, *s; - - memset(dst, 0, n * sizeof (int16_t) * n_oc); - - for (oc = 0; oc < n_oc; oc++) { - - for (ic = 0; ic < n_ic; ic++) { - int32_t vol; - - vol = m->map_table_i[oc][ic]; - - if (vol <= 0) - continue; - - d = (int16_t *)dst + oc; - s = (int16_t *)src + ic; - - if (vol >= 0x10000) { - for (i = n; i > 0; i--, s += n_ic, d += n_oc) - *d += *s; - } else { - for (i = n; i > 0; i--, s += n_ic, d += n_oc) - *d += (int16_t) (((int32_t)*s * vol) >> 16); - } - } - } - break; - } - default: - pa_assert_not_reached(); - } -} - static pa_memchunk *remap_channels(pa_resampler *r, pa_memchunk *input) { unsigned in_n_samples, out_n_samples, n_frames; void *src, *dst; -- cgit From 28baa53d55fa51d5fbbb1be54db3581fc3d151dd Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Thu, 20 Aug 2009 18:29:02 +0200 Subject: remap: allow specialisations to install NULL Fallback to the default C implementation when the remap init function did not set a function. --- src/pulsecore/remap.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src') diff --git a/src/pulsecore/remap.c b/src/pulsecore/remap.c index 2e93afce..108df900 100644 --- a/src/pulsecore/remap.c +++ b/src/pulsecore/remap.c @@ -186,6 +186,11 @@ void pa_init_remap (pa_remap_t *m) { /* just call the installed remap init functions */ remap_func (m); + + if (m->do_remap == NULL) { + /* nothing was installed, fallback to C versions */ + init_remap_c (m); + } } pa_init_remap_func_t pa_get_init_remap_func(void) { -- cgit From e961efc130481ff4c5a053eb03dd3ec4d513c615 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Thu, 20 Aug 2009 18:32:51 +0200 Subject: remap: init the do_remap function to NULL --- src/pulsecore/remap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/pulsecore/remap.c b/src/pulsecore/remap.c index 108df900..a0fc85b9 100644 --- a/src/pulsecore/remap.c +++ b/src/pulsecore/remap.c @@ -184,11 +184,13 @@ static pa_init_remap_func_t remap_func = init_remap_c; void pa_init_remap (pa_remap_t *m) { pa_assert (remap_func); - /* just call the installed remap init functions */ + m->do_remap = NULL; + + /* call the installed remap init function */ remap_func (m); if (m->do_remap == NULL) { - /* nothing was installed, fallback to C versions */ + /* nothing was installed, fallback to C version */ init_remap_c (m); } } -- cgit From 6e5dbed51ee508759ed8b5adabc998ba8faf4774 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Thu, 20 Aug 2009 19:46:06 +0200 Subject: remap: add MMX mono to stereo --- src/Makefile.am | 1 + src/pulsecore/cpu-x86.c | 4 +- src/pulsecore/cpu-x86.h | 2 + src/pulsecore/remap_mmx.c | 174 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 src/pulsecore/remap_mmx.c (limited to 'src') diff --git a/src/Makefile.am b/src/Makefile.am index b818c3e7..ab91be83 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -823,6 +823,7 @@ libpulsecore_@PA_MAJORMINORMICRO@_la_SOURCES = \ pulsecore/play-memblockq.c pulsecore/play-memblockq.h \ pulsecore/play-memchunk.c pulsecore/play-memchunk.h \ pulsecore/remap.c pulsecore/remap.h \ + pulsecore/remap_mmx.c \ pulsecore/resampler.c pulsecore/resampler.h \ pulsecore/rtpoll.c pulsecore/rtpoll.h \ pulsecore/sample-util.c pulsecore/sample-util.h \ diff --git a/src/pulsecore/cpu-x86.c b/src/pulsecore/cpu-x86.c index 0457199d..bc093ec0 100644 --- a/src/pulsecore/cpu-x86.c +++ b/src/pulsecore/cpu-x86.c @@ -110,8 +110,10 @@ void pa_cpu_init_x86 (void) { (flags & PA_CPU_X86_3DNOWEXT) ? "3DNOWEXT " : ""); /* activate various optimisations */ - if (flags & PA_CPU_X86_MMX) + if (flags & PA_CPU_X86_MMX) { pa_volume_func_init_mmx (flags); + pa_remap_func_init_mmx (flags); + } if (flags & PA_CPU_X86_SSE) pa_volume_func_init_sse (flags); diff --git a/src/pulsecore/cpu-x86.h b/src/pulsecore/cpu-x86.h index 07e630ea..b11ef6ea 100644 --- a/src/pulsecore/cpu-x86.h +++ b/src/pulsecore/cpu-x86.h @@ -63,4 +63,6 @@ typedef int64_t pa_reg_x86; void pa_volume_func_init_mmx(pa_cpu_x86_flag_t flags); void pa_volume_func_init_sse(pa_cpu_x86_flag_t flags); +void pa_remap_func_init_mmx(pa_cpu_x86_flag_t flags); + #endif /* foocpux86hfoo */ diff --git a/src/pulsecore/remap_mmx.c b/src/pulsecore/remap_mmx.c new file mode 100644 index 00000000..6690cfa4 --- /dev/null +++ b/src/pulsecore/remap_mmx.c @@ -0,0 +1,174 @@ +/*** + This file is part of PulseAudio. + + Copyright 2004-2006 Lennart Poettering + Copyright 2009 Wim Taymans + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2.1 of the License, + or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with PulseAudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include +#include +#include + +#include "cpu-x86.h" +#include "remap.h" + +#define LOAD_SAMPLES \ + " movq (%1), %%mm0 \n\t" \ + " movq 8(%1), %%mm2 \n\t" \ + " movq 16(%1), %%mm4 \n\t" \ + " movq 24(%1), %%mm6 \n\t" \ + " movq %%mm0, %%mm1 \n\t" \ + " movq %%mm2, %%mm3 \n\t" \ + " movq %%mm4, %%mm5 \n\t" \ + " movq %%mm6, %%mm7 \n\t" + +#define UNPACK_SAMPLES(s) \ + " punpckl"#s" %%mm0, %%mm0 \n\t" \ + " punpckh"#s" %%mm1, %%mm1 \n\t" \ + " punpckl"#s" %%mm2, %%mm2 \n\t" \ + " punpckh"#s" %%mm3, %%mm3 \n\t" \ + " punpckl"#s" %%mm4, %%mm4 \n\t" \ + " punpckh"#s" %%mm5, %%mm5 \n\t" \ + " punpckl"#s" %%mm6, %%mm6 \n\t" \ + " punpckh"#s" %%mm7, %%mm7 \n\t" \ + +#define STORE_SAMPLES \ + " movq %%mm0, (%0) \n\t" \ + " movq %%mm1, 8(%0) \n\t" \ + " movq %%mm2, 16(%0) \n\t" \ + " movq %%mm3, 24(%0) \n\t" \ + " movq %%mm4, 32(%0) \n\t" \ + " movq %%mm5, 40(%0) \n\t" \ + " movq %%mm6, 48(%0) \n\t" \ + " movq %%mm7, 56(%0) \n\t" \ + " add $32, %1 \n\t" \ + " add $64, %0 \n\t" + +#define HANDLE_SINGLE(s) \ + " movd (%1), %%mm0 \n\t" \ + " movq %%mm0, %%mm1 \n\t" \ + " punpckl"#s" %%mm0, %%mm0 \n\t" \ + " movq %%mm0, (%0) \n\t" \ + " add $4, %1 \n\t" \ + " add $8, %0 \n\t" + +static void remap_mono_to_stereo_mmx (pa_remap_t *m, void *dst, const void *src, unsigned n) { + pa_reg_x86 temp; + + switch (*m->format) { + case PA_SAMPLE_FLOAT32NE: + { + __asm__ __volatile__ ( + " mov %3, %2 \n\t" + " sar $3, %2 \n\t" /* prepare for processing 8 samples at a time */ + " cmp $0, %2 \n\t" + " je 2f \n\t" + + "1: \n\t" /* do samples in groups of 8 */ + LOAD_SAMPLES + UNPACK_SAMPLES(dq) + STORE_SAMPLES + " dec %2 \n\t" + " jne 1b \n\t" + + "2: \n\t" + " mov %3, %2 \n\t" + " and $7, %2 \n\t" /* prepare for processing the remaining samples */ + " je 4f \n\t" + + "3: \n\t" + HANDLE_SINGLE(dq) + " dec %2 \n\t" + " jne 3b \n\t" + + "4: \n\t" + " emms \n\t" + + : "+r" (dst), "+r" (src), "=&r" (temp) + : "r" ((pa_reg_x86)n) + : "cc" + ); + break; + } + case PA_SAMPLE_S16NE: + { + __asm__ __volatile__ ( + " mov %3, %2 \n\t" + " sar $3, %2 \n\t" /* prepare for processing 8 samples at a time */ + " cmp $0, %2 \n\t" + " je 2f \n\t" + + "1: \n\t" /* do samples in groups of 16 */ + LOAD_SAMPLES + UNPACK_SAMPLES(wd) + STORE_SAMPLES + " dec %2 \n\t" + " jne 1b \n\t" + + "2: \n\t" + " mov %3, %2 \n\t" + " and $7, %2 \n\t" /* prepare for processing the remaining samples */ + " je 4f \n\t" + + "3: \n\t" + HANDLE_SINGLE(wd) + " dec %2 \n\t" + " jne 3b \n\t" + + "4: \n\t" + " emms \n\t" + + : "+r" (dst), "+r" (src), "=&r" (temp) + : "r" ((pa_reg_x86)n) + : "cc" + ); + break; + } + default: + pa_assert_not_reached(); + } +} + +/* set the function that will execute the remapping based on the matrices */ +static void init_remap_mmx (pa_remap_t *m) { + unsigned n_oc, n_ic; + + n_oc = m->o_ss->channels; + n_ic = m->i_ss->channels; + + /* find some common channel remappings, fall back to full matrix operation. */ + if (n_ic == 1 && n_oc == 2 && + m->map_table_f[0][0] >= 1.0 && m->map_table_f[1][0] >= 1.0) { + m->do_remap = (pa_do_remap_func_t) remap_mono_to_stereo_mmx; + pa_log_info("Using MMX mono to stereo remapping"); + } +} + +void pa_remap_func_init_mmx (pa_cpu_x86_flag_t flags) { +#if defined (__i386__) || defined (__amd64__) + pa_log_info("Initialising MMX optimized remappers."); + + pa_set_init_remap_func ((pa_init_remap_func_t) init_remap_mmx); +#endif /* defined (__i386__) || defined (__amd64__) */ +} -- cgit From 6076cef2092391d8b46aa84f86857cffebce4583 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Thu, 20 Aug 2009 20:00:50 +0200 Subject: remap: make the MMX code pretier --- src/pulsecore/remap_mmx.c | 74 +++++++++++++++-------------------------------- 1 file changed, 24 insertions(+), 50 deletions(-) (limited to 'src') diff --git a/src/pulsecore/remap_mmx.c b/src/pulsecore/remap_mmx.c index 6690cfa4..bfcae6c5 100644 --- a/src/pulsecore/remap_mmx.c +++ b/src/pulsecore/remap_mmx.c @@ -73,6 +73,28 @@ " add $4, %1 \n\t" \ " add $8, %0 \n\t" +#define MONO_TO_STEREO(s) \ + " mov %3, %2 \n\t" \ + " sar $3, %2 \n\t" \ + " cmp $0, %2 \n\t" \ + " je 2f \n\t" \ + "1: \n\t" \ + LOAD_SAMPLES \ + UNPACK_SAMPLES(s) \ + STORE_SAMPLES \ + " dec %2 \n\t" \ + " jne 1b \n\t" \ + "2: \n\t" \ + " mov %3, %2 \n\t" \ + " and $7, %2 \n\t" \ + " je 4f \n\t" \ + "3: \n\t" \ + HANDLE_SINGLE(s) \ + " dec %2 \n\t" \ + " jne 3b \n\t" \ + "4: \n\t" \ + " emms \n\t" + static void remap_mono_to_stereo_mmx (pa_remap_t *m, void *dst, const void *src, unsigned n) { pa_reg_x86 temp; @@ -80,31 +102,7 @@ static void remap_mono_to_stereo_mmx (pa_remap_t *m, void *dst, const void *src, case PA_SAMPLE_FLOAT32NE: { __asm__ __volatile__ ( - " mov %3, %2 \n\t" - " sar $3, %2 \n\t" /* prepare for processing 8 samples at a time */ - " cmp $0, %2 \n\t" - " je 2f \n\t" - - "1: \n\t" /* do samples in groups of 8 */ - LOAD_SAMPLES - UNPACK_SAMPLES(dq) - STORE_SAMPLES - " dec %2 \n\t" - " jne 1b \n\t" - - "2: \n\t" - " mov %3, %2 \n\t" - " and $7, %2 \n\t" /* prepare for processing the remaining samples */ - " je 4f \n\t" - - "3: \n\t" - HANDLE_SINGLE(dq) - " dec %2 \n\t" - " jne 3b \n\t" - - "4: \n\t" - " emms \n\t" - + MONO_TO_STEREO(dq) /* do doubles to quads */ : "+r" (dst), "+r" (src), "=&r" (temp) : "r" ((pa_reg_x86)n) : "cc" @@ -114,31 +112,7 @@ static void remap_mono_to_stereo_mmx (pa_remap_t *m, void *dst, const void *src, case PA_SAMPLE_S16NE: { __asm__ __volatile__ ( - " mov %3, %2 \n\t" - " sar $3, %2 \n\t" /* prepare for processing 8 samples at a time */ - " cmp $0, %2 \n\t" - " je 2f \n\t" - - "1: \n\t" /* do samples in groups of 16 */ - LOAD_SAMPLES - UNPACK_SAMPLES(wd) - STORE_SAMPLES - " dec %2 \n\t" - " jne 1b \n\t" - - "2: \n\t" - " mov %3, %2 \n\t" - " and $7, %2 \n\t" /* prepare for processing the remaining samples */ - " je 4f \n\t" - - "3: \n\t" - HANDLE_SINGLE(wd) - " dec %2 \n\t" - " jne 3b \n\t" - - "4: \n\t" - " emms \n\t" - + MONO_TO_STEREO(wd) /* do words to doubles */ : "+r" (dst), "+r" (src), "=&r" (temp) : "r" ((pa_reg_x86)n) : "cc" -- cgit From 9f97b7cbe13b3a4f0fefd8588a3dec95f0d14e58 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Aug 2009 02:56:17 +0200 Subject: sink-input: add callbacks that are called whenever the mute/volume changes --- src/pulsecore/sink-input.c | 16 +++++++++++++++- src/pulsecore/sink-input.h | 12 ++++++++++-- src/pulsecore/sink.c | 16 +++++++++++++--- 3 files changed, 38 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/pulsecore/sink-input.c b/src/pulsecore/sink-input.c index a29334f9..975fda01 100644 --- a/src/pulsecore/sink-input.c +++ b/src/pulsecore/sink-input.c @@ -126,6 +126,8 @@ static void reset_callbacks(pa_sink_input *i) { i->state_change = NULL; i->may_move_to = NULL; i->send_event = NULL; + i->volume_changed = NULL; + i->mute_changed = NULL; } /* Called from main context */ @@ -968,7 +970,10 @@ void pa_sink_input_set_volume(pa_sink_input *i, const pa_cvolume *volume, pa_boo pa_assert_se(pa_asyncmsgq_send(i->sink->asyncmsgq, PA_MSGOBJECT(i), PA_SINK_INPUT_MESSAGE_SET_SOFT_VOLUME, NULL, 0, NULL) == 0); } - /* The virtual volume changed, let's tell people so */ + /* The volume changed, let's tell people so */ + if (i->volume_changed) + i->volume_changed(i); + pa_subscription_post(i->core, PA_SUBSCRIPTION_EVENT_SINK_INPUT|PA_SUBSCRIPTION_EVENT_CHANGE, i->index); } @@ -999,6 +1004,11 @@ void pa_sink_input_set_mute(pa_sink_input *i, pa_bool_t mute, pa_bool_t save) { i->save_muted = save; pa_assert_se(pa_asyncmsgq_send(i->sink->asyncmsgq, PA_MSGOBJECT(i), PA_SINK_INPUT_MESSAGE_SET_SOFT_MUTE, NULL, 0, NULL) == 0); + + /* The mute status changed, let's tell people so */ + if (i->mute_changed) + i->mute_changed(i); + pa_subscription_post(i->core, PA_SUBSCRIPTION_EVENT_SINK_INPUT|PA_SUBSCRIPTION_EVENT_CHANGE, i->index); } @@ -1263,6 +1273,10 @@ int pa_sink_input_finish_move(pa_sink_input *i, pa_sink *dest, pa_bool_t save) { /* Notify everyone */ pa_hook_fire(&i->core->hooks[PA_CORE_HOOK_SINK_INPUT_MOVE_FINISH], i); + + if (i->volume_changed) + i->volume_changed(i); + pa_subscription_post(i->core, PA_SUBSCRIPTION_EVENT_SINK_INPUT|PA_SUBSCRIPTION_EVENT_CHANGE, i->index); return 0; diff --git a/src/pulsecore/sink-input.h b/src/pulsecore/sink-input.h index ea0f8c0e..5285e618 100644 --- a/src/pulsecore/sink-input.h +++ b/src/pulsecore/sink-input.h @@ -192,8 +192,16 @@ struct pa_sink_input { pa_bool_t (*may_move_to) (pa_sink_input *i, pa_sink *s); /* may be NULL */ /* If non-NULL this function is used to dispatch asynchronous - * control events. */ - void (*send_event)(pa_sink_input *i, const char *event, pa_proplist* data); + * control events. Called from main context. */ + void (*send_event)(pa_sink_input *i, const char *event, pa_proplist* data); /* may be NULL */ + + /* If non-NULL this function is called whenever the sink input + * volume changes. Called from main context */ + void (*volume_changed)(pa_sink_input *i); /* may be NULL */ + + /* If non-NULL this function is called whenever the sink input + * mute status changes. Called from main context */ + void (*mute_changed)(pa_sink_input *i); /* may be NULL */ struct { pa_sink_input_state_t state; diff --git a/src/pulsecore/sink.c b/src/pulsecore/sink.c index 1cce8e6b..fab88755 100644 --- a/src/pulsecore/sink.c +++ b/src/pulsecore/sink.c @@ -1380,9 +1380,14 @@ static void propagate_reference_volume(pa_sink *s) { pa_cvolume_remap(&remapped, &s->channel_map, &i->channel_map); pa_sw_cvolume_multiply(&i->volume, &remapped, &i->reference_ratio); - /* The reference volume changed, let's tell people so */ - if (!pa_cvolume_equal(&old_volume, &i->volume)) + /* The volume changed, let's tell people so */ + if (!pa_cvolume_equal(&old_volume, &i->volume)) { + + if (i->volume_changed) + i->volume_changed(i); + pa_subscription_post(i->core, PA_SUBSCRIPTION_EVENT_SINK_INPUT|PA_SUBSCRIPTION_EVENT_CHANGE, i->index); + } } } @@ -1522,8 +1527,13 @@ static void propagate_real_volume(pa_sink *s, const pa_cvolume *old_real_volume) pa_sw_cvolume_multiply(&i->volume, &remapped, &i->reference_ratio); /* Notify if something changed */ - if (!pa_cvolume_equal(&old_volume, &i->volume)) + if (!pa_cvolume_equal(&old_volume, &i->volume)) { + + if (i->volume_changed) + i->volume_changed(i); + pa_subscription_post(i->core, PA_SUBSCRIPTION_EVENT_SINK_INPUT|PA_SUBSCRIPTION_EVENT_CHANGE, i->index); + } } } -- cgit From a562978509674f37f3cc7d5d5d1002f52a59654d Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Aug 2009 02:59:26 +0200 Subject: ladspa: forward volume changes from ladspa sink to stream and hence via flat volume logic to master sink --- src/modules/module-ladspa-sink.c | 72 +++++++++++++++++++++++++++++++++++----- 1 file changed, 63 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/modules/module-ladspa-sink.c b/src/modules/module-ladspa-sink.c index f2d53d00..233f90c4 100644 --- a/src/modules/module-ladspa-sink.c +++ b/src/modules/module-ladspa-sink.c @@ -99,7 +99,7 @@ static const char* const valid_modargs[] = { }; /* Called from I/O thread context */ -static int sink_process_msg(pa_msgobject *o, int code, void *data, int64_t offset, pa_memchunk *chunk) { +static int sink_process_msg_cb(pa_msgobject *o, int code, void *data, int64_t offset, pa_memchunk *chunk) { struct userdata *u = PA_SINK(o)->userdata; switch (code) { @@ -130,7 +130,7 @@ static int sink_process_msg(pa_msgobject *o, int code, void *data, int64_t offse } /* Called from main context */ -static int sink_set_state(pa_sink *s, pa_sink_state_t state) { +static int sink_set_state_cb(pa_sink *s, pa_sink_state_t state) { struct userdata *u; pa_sink_assert_ref(s); @@ -145,7 +145,7 @@ static int sink_set_state(pa_sink *s, pa_sink_state_t state) { } /* Called from I/O thread context */ -static void sink_request_rewind(pa_sink *s) { +static void sink_request_rewind_cb(pa_sink *s) { struct userdata *u; pa_sink_assert_ref(s); @@ -160,7 +160,7 @@ static void sink_request_rewind(pa_sink *s) { } /* Called from I/O thread context */ -static void sink_update_requested_latency(pa_sink *s) { +static void sink_update_requested_latency_cb(pa_sink *s) { struct userdata *u; pa_sink_assert_ref(s); @@ -176,6 +176,34 @@ static void sink_update_requested_latency(pa_sink *s) { pa_sink_get_requested_latency_within_thread(s)); } +/* Called from main context */ +static void sink_set_volume_cb(pa_sink *s) { + struct userdata *u; + + pa_sink_assert_ref(s); + pa_assert_se(u = s->userdata); + + if (!PA_SINK_IS_LINKED(pa_sink_get_state(s)) || + !PA_SINK_INPUT_IS_LINKED(pa_sink_input_get_state(u->sink_input))) + return; + + pa_sink_input_set_volume(u->sink_input, &s->real_volume, s->save_volume, TRUE); +} + +/* Called from main context */ +static void sink_set_mute_cb(pa_sink *s) { + struct userdata *u; + + pa_sink_assert_ref(s); + pa_assert_se(u = s->userdata); + + if (!PA_SINK_IS_LINKED(pa_sink_get_state(s)) || + !PA_SINK_INPUT_IS_LINKED(pa_sink_input_get_state(u->sink_input))) + return; + + pa_sink_input_set_mute(u->sink_input, s->muted, s->save_muted); +} + /* Called from I/O thread context */ static int sink_input_pop_cb(pa_sink_input *i, size_t nbytes, pa_memchunk *chunk) { struct userdata *u; @@ -394,6 +422,26 @@ static void sink_input_moving_cb(pa_sink_input *i, pa_sink *dest) { pa_sink_update_flags(u->sink, PA_SINK_LATENCY|PA_SINK_DYNAMIC_LATENCY, dest->flags); } +/* Called from main context */ +static void sink_input_volume_changed_cb(pa_sink_input *i) { + struct userdata *u; + + pa_sink_input_assert_ref(i); + pa_assert_se(u = i->userdata); + + pa_sink_volume_changed(u->sink, &i->volume); +} + +/* Called from main context */ +static void sink_input_mute_changed_cb(pa_sink_input *i) { + struct userdata *u; + + pa_sink_input_assert_ref(i); + pa_assert_se(u = i->userdata); + + pa_sink_mute_changed(u->sink, i->muted); +} + int pa__init(pa_module*m) { struct userdata *u; pa_sample_spec ss; @@ -731,7 +779,9 @@ int pa__init(pa_module*m) { goto fail; } - u->sink = pa_sink_new(m->core, &sink_data, master->flags & (PA_SINK_LATENCY|PA_SINK_DYNAMIC_LATENCY)); + u->sink = pa_sink_new(m->core, &sink_data, + PA_SINK_HW_MUTE_CTRL|PA_SINK_HW_VOLUME_CTRL|PA_SINK_DECIBEL_VOLUME| + (master->flags & (PA_SINK_LATENCY|PA_SINK_DYNAMIC_LATENCY))); pa_sink_new_data_done(&sink_data); if (!u->sink) { @@ -739,10 +789,12 @@ int pa__init(pa_module*m) { goto fail; } - u->sink->parent.process_msg = sink_process_msg; - u->sink->set_state = sink_set_state; - u->sink->update_requested_latency = sink_update_requested_latency; - u->sink->request_rewind = sink_request_rewind; + u->sink->parent.process_msg = sink_process_msg_cb; + u->sink->set_state = sink_set_state_cb; + u->sink->update_requested_latency = sink_update_requested_latency_cb; + u->sink->request_rewind = sink_request_rewind_cb; + u->sink->set_volume = sink_set_volume_cb; + u->sink->set_mute = sink_set_mute_cb; u->sink->userdata = u; pa_sink_set_asyncmsgq(u->sink, master->asyncmsgq); @@ -775,6 +827,8 @@ int pa__init(pa_module*m) { u->sink_input->state_change = sink_input_state_change_cb; u->sink_input->may_move_to = sink_input_may_move_to_cb; u->sink_input->moving = sink_input_moving_cb; + u->sink_input->volume_changed = sink_input_volume_changed_cb; + u->sink_input->mute_changed = sink_input_mute_changed_cb; u->sink_input->userdata = u; pa_sink_put(u->sink); -- cgit From 8a2a6b2004cd299467de1955f7f99e25033faa63 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Aug 2009 03:43:53 +0200 Subject: adjust various data/library paths automatically if we are run from a build tree --- src/Makefile.am | 5 +++-- src/daemon/daemon-conf.c | 20 ++++++++++++++++++-- src/daemon/main.c | 2 ++ src/modules/alsa/alsa-mixer.c | 16 +++++++++++++--- src/pulsecore/core-util.c | 19 +++++++++++++++++++ src/pulsecore/core-util.h | 4 ++++ 6 files changed, 59 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/Makefile.am b/src/Makefile.am index 17011cd3..fd440991 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -74,6 +74,7 @@ AM_CFLAGS = \ $(LIBSAMPLERATE_CFLAGS) \ $(LIBSNDFILE_CFLAGS) \ $(LIBSPEEX_CFLAGS) \ + -DPA_BUILDDIR=\"$(abs_builddir)\" \ -DPA_DLSEARCHPATH=\"$(modlibexecdir)\" \ -DPA_DEFAULT_CONFIG_DIR=\"$(PA_DEFAULT_CONFIG_DIR)\" \ -DPA_BINARY=\"$(PA_BINARY)\" \ @@ -83,8 +84,8 @@ AM_CFLAGS = \ -DAO_REQUIRE_CAS \ -DPULSE_LOCALEDIR=\"$(pulselocaledir)\" \ -DPA_MACHINE_ID=\"$(localstatedir)/lib/dbus/machine-id\" \ - -DPA_ALSA_PATHS_DIR=\"$(alsapathsdir)\" \ - -DPA_ALSA_PROFILE_SETS_DIR=\"$(alsaprofilesetsdir)\" + -DPA_ALSA_PATHS_DIR=\"$(alsapathsdir)\" \ + -DPA_ALSA_PROFILE_SETS_DIR=\"$(alsaprofilesetsdir)\" AM_LIBADD = $(PTHREAD_LIBS) $(INTLLIBS) AM_LDADD = $(PTHREAD_LIBS) $(INTLLIBS) diff --git a/src/daemon/daemon-conf.c b/src/daemon/daemon-conf.c index 9a87b555..ec1ec5ce 100644 --- a/src/daemon/daemon-conf.c +++ b/src/daemon/daemon-conf.c @@ -133,9 +133,25 @@ static const pa_daemon_conf default_conf = { }; pa_daemon_conf* pa_daemon_conf_new(void) { - pa_daemon_conf *c = pa_xnewdup(pa_daemon_conf, &default_conf, 1); + pa_daemon_conf *c; + + c = pa_xnewdup(pa_daemon_conf, &default_conf, 1); + +#if defined(__linux__) && !defined(__OPTIMIZE__) + + /* We abuse __OPTIMIZE__ as a check whether we are a debug build + * or not. If we are and are run from the build tree then we + * override the search path to point to our build tree */ + + if (pa_run_from_build_tree()) { + pa_log_notice("Detected that we are run from the build tree, fixing search path."); + c->dl_search_path = pa_xstrdup(PA_BUILDDIR "/.libs/"); + + } else + +#endif + c->dl_search_path = pa_xstrdup(PA_DLSEARCHPATH); - c->dl_search_path = pa_xstrdup(PA_DLSEARCHPATH); return c; } diff --git a/src/daemon/main.c b/src/daemon/main.c index 8521e720..72984590 100644 --- a/src/daemon/main.c +++ b/src/daemon/main.c @@ -774,6 +774,8 @@ int main(int argc, char *argv[]) { pa_log_info(_("Using state directory %s."), s); pa_xfree(s); + pa_log_info(_("Using modules directory %s."), conf->dl_search_path); + pa_log_info(_("Running in system mode: %s"), pa_yes_no(pa_in_system_mode())); if (pa_in_system_mode()) diff --git a/src/modules/alsa/alsa-mixer.c b/src/modules/alsa/alsa-mixer.c index a4c2ee0f..61c92cd0 100644 --- a/src/modules/alsa/alsa-mixer.c +++ b/src/modules/alsa/alsa-mixer.c @@ -929,7 +929,7 @@ static int element_zero_volume(pa_alsa_element *e, snd_mixer_t *m) { int pa_alsa_path_select(pa_alsa_path *p, snd_mixer_t *m) { pa_alsa_element *e; - int r; + int r = 0; pa_assert(m); pa_assert(p); @@ -1849,7 +1849,12 @@ pa_alsa_path* pa_alsa_path_new(const char *fname, pa_alsa_direction_t direction) items[1].data = &p->description; items[2].data = &p->name; - fn = pa_maybe_prefix_path(fname, PA_ALSA_PATHS_DIR); + fn = pa_maybe_prefix_path(fname, +#if defined(__linux__) && !defined(__OPTIMIZE__) + pa_run_from_build_tree() ? PA_BUILDDIR "/modules/alsa/mixer/paths/" : +#endif + PA_ALSA_PATHS_DIR); + r = pa_config_parse(fn, NULL, items, p); pa_xfree(fn); @@ -3110,7 +3115,12 @@ pa_alsa_profile_set* pa_alsa_profile_set_new(const char *fname, const pa_channel if (!fname) fname = "default.conf"; - fn = pa_maybe_prefix_path(fname, PA_ALSA_PROFILE_SETS_DIR); + fn = pa_maybe_prefix_path(fname, +#if defined(__linux__) && !defined(__OPTIMIZE__) + pa_run_from_build_tree() ? PA_BUILDDIR "/modules/alsa/mixer/profile-sets/" : +#endif + PA_ALSA_PROFILE_SETS_DIR); + r = pa_config_parse(fn, NULL, items, ps); pa_xfree(fn); diff --git a/src/pulsecore/core-util.c b/src/pulsecore/core-util.c index ef8c8472..843c8377 100644 --- a/src/pulsecore/core-util.c +++ b/src/pulsecore/core-util.c @@ -2862,3 +2862,22 @@ void pa_reset_personality(void) { #endif } + +#if defined(__linux__) && !defined(__OPTIMIZE__) + +pa_bool_t pa_run_from_build_tree(void) { + char *rp; + pa_bool_t b = FALSE; + + /* We abuse __OPTIMIZE__ as a check whether we are a debug build + * or not. */ + + if ((rp = pa_readlink("/proc/self/exe"))) { + b = pa_startswith(rp, PA_BUILDDIR); + pa_xfree(rp); + } + + return b; +} + +#endif diff --git a/src/pulsecore/core-util.h b/src/pulsecore/core-util.h index 3d3aec71..2551f794 100644 --- a/src/pulsecore/core-util.h +++ b/src/pulsecore/core-util.h @@ -243,4 +243,8 @@ size_t pa_pipe_buf(int fd); void pa_reset_personality(void); +#if defined(__linux__) && !defined(__OPTIMIZE__) +pa_bool_t pa_run_from_build_tree(void); +#endif + #endif -- cgit From ac056191410b67466b429720778fe87279d9912a Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Aug 2009 03:45:17 +0200 Subject: combine: quieten gcc a bit --- src/modules/module-combine.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src') diff --git a/src/modules/module-combine.c b/src/modules/module-combine.c index 582cbce1..e90ef11c 100644 --- a/src/modules/module-combine.c +++ b/src/modules/module-combine.c @@ -1161,6 +1161,8 @@ int pa__init(pa_module*m) { pa_channel_map slaves_map; pa_bool_t is_first_slave = TRUE; + pa_sample_spec_init(&slaves_spec); + while ((n = pa_split(slaves, ",", &split_state))) { pa_sink *slave_sink; -- cgit From fe9a577cf2799c0864a05a08c785161ba3738d88 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Aug 2009 03:45:58 +0200 Subject: alsa: leave headphone jack enabled in normal mixer paths --- src/modules/alsa/mixer/paths/analog-output-lfe-on-mono.conf | 7 +++++-- src/modules/alsa/mixer/paths/analog-output-mono.conf | 7 +++++-- src/modules/alsa/mixer/paths/analog-output.conf | 7 +++++-- 3 files changed, 15 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/modules/alsa/mixer/paths/analog-output-lfe-on-mono.conf b/src/modules/alsa/mixer/paths/analog-output-lfe-on-mono.conf index 2db976a5..3457d4a2 100644 --- a/src/modules/alsa/mixer/paths/analog-output-lfe-on-mono.conf +++ b/src/modules/alsa/mixer/paths/analog-output-lfe-on-mono.conf @@ -41,9 +41,12 @@ volume = merge override-map.1 = lfe override-map.2 = lfe,lfe +; This profile path is intended to control the speaker, not the +; headphones. But it should not hurt if we leave the headphone jack +; enabled nonetheless. [Element Headphone] -switch = off -volume = off +switch = mute +volume = zero [Element Speaker] switch = mute diff --git a/src/modules/alsa/mixer/paths/analog-output-mono.conf b/src/modules/alsa/mixer/paths/analog-output-mono.conf index a58cc970..dc270cfe 100644 --- a/src/modules/alsa/mixer/paths/analog-output-mono.conf +++ b/src/modules/alsa/mixer/paths/analog-output-mono.conf @@ -38,9 +38,12 @@ volume = merge override-map.1 = all override-map.2 = all-left,all-right +; This profile path is intended to control the speaker, not the +; headphones. But it should not hurt if we leave the headphone jack +; enabled nonetheless. [Element Headphone] -switch = off -volume = off +switch = mute +volume = zero [Element Speaker] switch = mute diff --git a/src/modules/alsa/mixer/paths/analog-output.conf b/src/modules/alsa/mixer/paths/analog-output.conf index b412a437..f71a05a1 100644 --- a/src/modules/alsa/mixer/paths/analog-output.conf +++ b/src/modules/alsa/mixer/paths/analog-output.conf @@ -37,9 +37,12 @@ override-map.2 = all-left,all-right switch = off volume = off +; This profile path is intended to control the speaker, not the +; headphones. But it should not hurt if we leave the headphone jack +; enabled nonetheless. [Element Headphone] -switch = off -volume = off +switch = mute +volume = zero [Element Speaker] switch = mute -- cgit From 5317e35543ab208a416cc662e2a6a88899a96704 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Aug 2009 14:55:05 +0200 Subject: udev: when a device appears that we cannot access right-away try again later on inotify --- src/modules/module-udev-detect.c | 91 +++++++++++++++++++++++++--------------- 1 file changed, 57 insertions(+), 34 deletions(-) (limited to 'src') diff --git a/src/modules/module-udev-detect.c b/src/modules/module-udev-detect.c index 0b30fd54..22ce8c3c 100644 --- a/src/modules/module-udev-detect.c +++ b/src/modules/module-udev-detect.c @@ -47,6 +47,7 @@ struct device { char *path; pa_bool_t accessible; char *card_name; + char *args; uint32_t module; }; @@ -78,6 +79,7 @@ static void device_free(struct device *d) { pa_xfree(d->path); pa_xfree(d->card_name); + pa_xfree(d->args); pa_xfree(d); } @@ -103,22 +105,43 @@ static void verify_access(struct userdata *u, struct device *d) { pa_assert(u); pa_assert(d); - if (!(card = pa_namereg_get(u->core, d->card_name, PA_NAMEREG_CARD))) - return; - cd = pa_sprintf_malloc("%s/snd/controlC%s", udev_get_dev_path(u->udev), path_get_card_id(d->path)); - d->accessible = access(cd, W_OK) >= 0; - pa_log_info("%s is accessible: %s", cd, pa_yes_no(d->accessible)); + d->accessible = access(cd, R_OK|W_OK) >= 0; pa_xfree(cd); - pa_card_suspend(card, !d->accessible, PA_SUSPEND_SESSION); + pa_log_info("%s is accessible: %s", cd, pa_yes_no(d->accessible)); + + if (d->module == PA_INVALID_INDEX) { + + /* If we not loaded, try to load */ + + if (d->accessible) { + pa_module *m; + + pa_log_debug("Loading module-alsa-card with arguments '%s'", d->args); + m = pa_module_load(u->core, "module-alsa-card", d->args); + + if (m) { + d->module = m->index; + pa_log_info("Card %s (%s) module loaded.", d->path, d->card_name); + } else + pa_log_info("Card %s (%s) failed to load module.", d->path, d->card_name); + } + + } else { + + /* If we are already loaded update suspend status with + * accessible boolean */ + + if ((card = pa_namereg_get(u->core, d->card_name, PA_NAMEREG_CARD))) + pa_card_suspend(card, !d->accessible, PA_SUSPEND_SESSION); + } } static void card_changed(struct userdata *u, struct udev_device *dev) { struct device *d; const char *path; const char *t; - char *card_name, *args; pa_module *m; char *n; @@ -135,44 +158,41 @@ static void card_changed(struct userdata *u, struct udev_device *dev) { return; } + d = pa_xnew0(struct device, 1); + d->path = pa_xstrdup(path); + d->accessible = TRUE; + d->module = PA_INVALID_INDEX; + if (!(t = udev_device_get_property_value(dev, "PULSE_NAME"))) if (!(t = udev_device_get_property_value(dev, "ID_ID"))) if (!(t = udev_device_get_property_value(dev, "ID_PATH"))) t = path_get_card_id(path); n = pa_namereg_make_valid_name(t); + d->card_name = pa_sprintf_malloc("alsa_card.%s", n); + d->args = pa_sprintf_malloc("device_id=\"%s\" " + "name=\"%s\" " + "card_name=\"%s\" " + "tsched=%s " + "ignore_dB=%s " + "card_properties=\"module-udev-detect.discovered=1\"", + path_get_card_id(path), + n, + d->card_name, + pa_yes_no(u->use_tsched), + pa_yes_no(u->ignore_dB)); + pa_xfree(n); - card_name = pa_sprintf_malloc("alsa_card.%s", n); - args = pa_sprintf_malloc("device_id=\"%s\" " - "name=\"%s\" " - "card_name=\"%s\" " - "tsched=%s " - "ignore_dB=%s " - "card_properties=\"module-udev-detect.discovered=1\"", - path_get_card_id(path), - n, - card_name, - pa_yes_no(u->use_tsched), - pa_yes_no(u->ignore_dB)); - - pa_log_debug("Loading module-alsa-card with arguments '%s'", args); - m = pa_module_load(u->core, "module-alsa-card", args); - pa_xfree(args); + pa_log_debug("Loading module-alsa-card with arguments '%s'", d->args); + m = pa_module_load(u->core, "module-alsa-card", d->args); if (m) { - pa_log_info("Card %s (%s) added.", path, n); - - d = pa_xnew(struct device, 1); - d->path = pa_xstrdup(path); - d->card_name = card_name; d->module = m->index; - d->accessible = TRUE; - - pa_hashmap_put(u->devices, d->path, d); + pa_log_info("Card %s (%s) added and module loaded.", path, d->card_name); } else - pa_xfree(card_name); + pa_log_info("Card %s (%s) added but failed to load module.", path, d->card_name); - pa_xfree(n); + pa_hashmap_put(u->devices, d->path, d); } static void remove_card(struct userdata *u, struct udev_device *dev) { @@ -185,7 +205,10 @@ static void remove_card(struct userdata *u, struct udev_device *dev) { return; pa_log_info("Card %s removed.", d->path); - pa_module_unload_request_by_index(u->core, d->module, TRUE); + + if (d->module != PA_INVALID_INDEX) + pa_module_unload_request_by_index(u->core, d->module, TRUE); + device_free(d); } -- cgit From 9abc010c930999eed67253f5b83f7c226b1a17f6 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Aug 2009 21:27:44 +0200 Subject: object: speed up type verification by not relying on strcmp() Instead of using string contents for type identification use the address of a constant string array. This should speed up type verifications a little sind we only need to compare one machine word instead of a full string. Also, this saves a few strings. To make clear that types must be compared via address and not string contents 'type_name' is now called 'type_id'. This also simplifies the macros for declaring and defining public and private subclasses. --- src/pulsecore/core.c | 2 +- src/pulsecore/core.h | 2 +- src/pulsecore/msgobject.c | 14 +++++------ src/pulsecore/msgobject.h | 8 +++--- src/pulsecore/object.c | 18 ++++++++------ src/pulsecore/object.h | 51 ++++++++++++++++++++++++++------------- src/pulsecore/play-memblockq.c | 3 +-- src/pulsecore/protocol-esound.c | 3 +-- src/pulsecore/protocol-native.c | 15 ++++-------- src/pulsecore/protocol-simple.c | 3 +-- src/pulsecore/sink-input.c | 2 +- src/pulsecore/sink-input.h | 2 +- src/pulsecore/sink.c | 2 +- src/pulsecore/sink.h | 2 +- src/pulsecore/sound-file-stream.c | 3 +-- src/pulsecore/source-output.c | 2 +- src/pulsecore/source-output.h | 2 +- src/pulsecore/source.c | 2 +- src/pulsecore/source.h | 2 +- 19 files changed, 73 insertions(+), 65 deletions(-) (limited to 'src') diff --git a/src/pulsecore/core.c b/src/pulsecore/core.c index f5eb8352..f0726453 100644 --- a/src/pulsecore/core.c +++ b/src/pulsecore/core.c @@ -47,7 +47,7 @@ #include "core.h" -static PA_DEFINE_CHECK_TYPE(pa_core, pa_msgobject); +PA_DEFINE_PUBLIC_CLASS(pa_core, pa_msgobject); static int core_process_msg(pa_msgobject *o, int code, void *userdata, int64_t offset, pa_memchunk *chunk) { pa_core *c = PA_CORE(o); diff --git a/src/pulsecore/core.h b/src/pulsecore/core.h index f6ec7122..c1002f93 100644 --- a/src/pulsecore/core.h +++ b/src/pulsecore/core.h @@ -165,7 +165,7 @@ struct pa_core { pa_hook hooks[PA_CORE_HOOK_MAX]; }; -PA_DECLARE_CLASS(pa_core); +PA_DECLARE_PUBLIC_CLASS(pa_core); #define PA_CORE(o) pa_core_cast(o) enum { diff --git a/src/pulsecore/msgobject.c b/src/pulsecore/msgobject.c index 6a2a612d..075a28c5 100644 --- a/src/pulsecore/msgobject.c +++ b/src/pulsecore/msgobject.c @@ -26,22 +26,22 @@ #include "msgobject.h" -PA_DEFINE_CHECK_TYPE(pa_msgobject, pa_object); +PA_DEFINE_PUBLIC_CLASS(pa_msgobject, pa_object); -pa_msgobject *pa_msgobject_new_internal(size_t size, const char *type_name, int (*check_type)(const char *type_name)) { +pa_msgobject *pa_msgobject_new_internal(size_t size, const char *type_id, pa_bool_t (*check_type)(const char *type_name)) { pa_msgobject *o; pa_assert(size > sizeof(pa_msgobject)); - pa_assert(type_name); + pa_assert(type_id); if (!check_type) check_type = pa_msgobject_check_type; - pa_assert(check_type(type_name)); - pa_assert(check_type("pa_object")); - pa_assert(check_type("pa_msgobject")); + pa_assert(check_type(type_id)); + pa_assert(check_type(pa_object_type_id)); + pa_assert(check_type(pa_msgobject_type_id)); - o = PA_MSGOBJECT(pa_object_new_internal(size, type_name, check_type)); + o = PA_MSGOBJECT(pa_object_new_internal(size, type_id, check_type)); o->process_msg = NULL; return o; } diff --git a/src/pulsecore/msgobject.h b/src/pulsecore/msgobject.h index a35a23b5..ee0ec1ed 100644 --- a/src/pulsecore/msgobject.h +++ b/src/pulsecore/msgobject.h @@ -38,15 +38,13 @@ struct pa_msgobject { int (*process_msg)(pa_msgobject *o, int code, void *userdata, int64_t offset, pa_memchunk *chunk); }; -pa_msgobject *pa_msgobject_new_internal(size_t size, const char *type_name, int (*check_type)(const char *type_name)); +pa_msgobject *pa_msgobject_new_internal(size_t size, const char *type_id, pa_bool_t (*check_type)(const char *type_name)); -int pa_msgobject_check_type(const char *type); - -#define pa_msgobject_new(type) ((type*) pa_msgobject_new_internal(sizeof(type), #type, type##_check_type)) +#define pa_msgobject_new(type) ((type*) pa_msgobject_new_internal(sizeof(type), type##_type_id, type##_check_type)) #define pa_msgobject_free ((void (*) (pa_msgobject* o)) pa_object_free) #define PA_MSGOBJECT(o) pa_msgobject_cast(o) -PA_DECLARE_CLASS(pa_msgobject); +PA_DECLARE_PUBLIC_CLASS(pa_msgobject); #endif diff --git a/src/pulsecore/object.c b/src/pulsecore/object.c index f3ead9c5..099d50d9 100644 --- a/src/pulsecore/object.c +++ b/src/pulsecore/object.c @@ -28,21 +28,23 @@ #include "object.h" -pa_object *pa_object_new_internal(size_t size, const char *type_name, int (*check_type)(const char *type_name)) { +const char pa_object_type_id[] = "pa_object"; + +pa_object *pa_object_new_internal(size_t size, const char *type_id, pa_bool_t (*check_type)(const char *type_id)) { pa_object *o; pa_assert(size > sizeof(pa_object)); - pa_assert(type_name); + pa_assert(type_id); if (!check_type) check_type = pa_object_check_type; - pa_assert(check_type(type_name)); - pa_assert(check_type("pa_object")); + pa_assert(check_type(type_id)); + pa_assert(check_type(pa_object_type_id)); o = pa_xmalloc(size); PA_REFCNT_INIT(o); - o->type_name = type_name; + o->type_id = type_id; o->free = pa_object_free; o->check_type = check_type; @@ -65,8 +67,8 @@ void pa_object_unref(pa_object *o) { } } -int pa_object_check_type(const char *type_name) { - pa_assert(type_name); +pa_bool_t pa_object_check_type(const char *type_id) { + pa_assert(type_id); - return pa_streq(type_name, "pa_object"); + return type_id == pa_object_type_id; } diff --git a/src/pulsecore/object.h b/src/pulsecore/object.h index 43e79327..4c120cd5 100644 --- a/src/pulsecore/object.h +++ b/src/pulsecore/object.h @@ -34,21 +34,23 @@ typedef struct pa_object pa_object; struct pa_object { PA_REFCNT_DECLARE; - const char *type_name; + const char *type_id; void (*free)(pa_object *o); - int (*check_type)(const char *type_name); + pa_bool_t (*check_type)(const char *type_name); }; -pa_object *pa_object_new_internal(size_t size, const char *type_name, int (*check_type)(const char *type_name)); -#define pa_object_new(type) ((type*) pa_object_new_internal(sizeof(type), #type, type##_check_type) +pa_object *pa_object_new_internal(size_t size, const char *type_id, pa_bool_t (*check_type)(const char *type_id)); +#define pa_object_new(type) ((type*) pa_object_new_internal(sizeof(type), type##_type_id, type##_check_type) #define pa_object_free ((void (*) (pa_object* _obj)) pa_xfree) -int pa_object_check_type(const char *type); +pa_bool_t pa_object_check_type(const char *type_id); -static inline int pa_object_isinstance(void *o) { +extern const char pa_object_type_id[]; + +static inline pa_bool_t pa_object_isinstance(void *o) { pa_object *obj = (pa_object*) o; - return obj ? obj->check_type("pa_object") : 0; + return obj ? obj->check_type(pa_object_type_id) : TRUE; } pa_object *pa_object_ref(pa_object *o); @@ -60,7 +62,7 @@ static inline int pa_object_refcnt(pa_object *o) { static inline pa_object* pa_object_cast(void *o) { pa_object *obj = (pa_object*) o; - pa_assert(!obj || obj->check_type("pa_object")); + pa_assert(!obj || obj->check_type(pa_object_type_id)); return obj; } @@ -68,10 +70,10 @@ static inline pa_object* pa_object_cast(void *o) { #define PA_OBJECT(o) pa_object_cast(o) -#define PA_DECLARE_CLASS(c) \ - static inline int c##_isinstance(void *o) { \ +#define PA_DECLARE_CLASS_COMMON(c) \ + static inline pa_bool_t c##_isinstance(void *o) { \ pa_object *obj = (pa_object*) o; \ - return obj ? obj->check_type(#c) : 1; \ + return obj ? obj->check_type(c##_type_id) : TRUE; \ } \ static inline c* c##_cast(void *o) { \ pa_assert(c##_isinstance(o)); \ @@ -91,12 +93,27 @@ static inline pa_object* pa_object_cast(void *o) { } \ struct __stupid_useless_struct_to_allow_trailing_semicolon -#define PA_DEFINE_CHECK_TYPE(c, parent) \ - int c##_check_type(const char *type) { \ - pa_assert(type); \ - if (strcmp(type, #c) == 0) \ - return 1; \ - return parent##_check_type(type); \ +#define PA_DECLARE_PUBLIC_CLASS(c) \ + extern const char c##_type_id[]; \ + PA_DECLARE_CLASS_COMMON(c); \ + pa_bool_t c##_check_type(const char *type_id) + +#define PA_DEFINE_PUBLIC_CLASS(c, parent) \ + const char c##_type_id[] = #c; \ + pa_bool_t c##_check_type(const char *type_id) { \ + if (type_id == c##_type_id) \ + return TRUE; \ + return parent##_check_type(type_id); \ + } \ + struct __stupid_useless_struct_to_allow_trailing_semicolon + +#define PA_DEFINE_PRIVATE_CLASS(c, parent) \ + static const char c##_type_id[] = #c; \ + PA_DECLARE_CLASS_COMMON(c); \ + static pa_bool_t c##_check_type(const char *type_id) { \ + if (type_id == c##_type_id) \ + return TRUE; \ + return parent##_check_type(type_id); \ } \ struct __stupid_useless_struct_to_allow_trailing_semicolon diff --git a/src/pulsecore/play-memblockq.c b/src/pulsecore/play-memblockq.c index fceb2ca1..b0d76993 100644 --- a/src/pulsecore/play-memblockq.c +++ b/src/pulsecore/play-memblockq.c @@ -47,9 +47,8 @@ enum { MEMBLOCKQ_STREAM_MESSAGE_UNLINK, }; -PA_DECLARE_CLASS(memblockq_stream); +PA_DEFINE_PRIVATE_CLASS(memblockq_stream, pa_msgobject); #define MEMBLOCKQ_STREAM(o) (memblockq_stream_cast(o)) -static PA_DEFINE_CHECK_TYPE(memblockq_stream, pa_msgobject); static void memblockq_stream_unlink(memblockq_stream *u) { pa_assert(u); diff --git a/src/pulsecore/protocol-esound.c b/src/pulsecore/protocol-esound.c index f64552aa..cfbaee6f 100644 --- a/src/pulsecore/protocol-esound.c +++ b/src/pulsecore/protocol-esound.c @@ -120,9 +120,8 @@ typedef struct connection { pa_time_event *auth_timeout_event; } connection; -PA_DECLARE_CLASS(connection); +PA_DEFINE_PRIVATE_CLASS(connection, pa_msgobject); #define CONNECTION(o) (connection_cast(o)) -static PA_DEFINE_CHECK_TYPE(connection, pa_msgobject); struct pa_esound_protocol { PA_REFCNT_DECLARE; diff --git a/src/pulsecore/protocol-native.c b/src/pulsecore/protocol-native.c index b1285e15..6678d847 100644 --- a/src/pulsecore/protocol-native.c +++ b/src/pulsecore/protocol-native.c @@ -98,17 +98,15 @@ typedef struct record_stream { pa_usec_t current_source_latency; } record_stream; -PA_DECLARE_CLASS(record_stream); #define RECORD_STREAM(o) (record_stream_cast(o)) -static PA_DEFINE_CHECK_TYPE(record_stream, pa_msgobject); +PA_DEFINE_PRIVATE_CLASS(record_stream, pa_msgobject); typedef struct output_stream { pa_msgobject parent; } output_stream; -PA_DECLARE_CLASS(output_stream); #define OUTPUT_STREAM(o) (output_stream_cast(o)) -static PA_DEFINE_CHECK_TYPE(output_stream, pa_msgobject); +PA_DEFINE_PRIVATE_CLASS(output_stream, pa_msgobject); typedef struct playback_stream { output_stream parent; @@ -138,9 +136,8 @@ typedef struct playback_stream { uint64_t playing_for, underrun_for; } playback_stream; -PA_DECLARE_CLASS(playback_stream); #define PLAYBACK_STREAM(o) (playback_stream_cast(o)) -static PA_DEFINE_CHECK_TYPE(playback_stream, output_stream); +PA_DEFINE_PRIVATE_CLASS(playback_stream, output_stream); typedef struct upload_stream { output_stream parent; @@ -156,9 +153,8 @@ typedef struct upload_stream { pa_proplist *proplist; } upload_stream; -PA_DECLARE_CLASS(upload_stream); #define UPLOAD_STREAM(o) (upload_stream_cast(o)) -static PA_DEFINE_CHECK_TYPE(upload_stream, output_stream); +PA_DEFINE_PRIVATE_CLASS(upload_stream, output_stream); struct pa_native_connection { pa_msgobject parent; @@ -176,9 +172,8 @@ struct pa_native_connection { pa_time_event *auth_timeout_event; }; -PA_DECLARE_CLASS(pa_native_connection); #define PA_NATIVE_CONNECTION(o) (pa_native_connection_cast(o)) -static PA_DEFINE_CHECK_TYPE(pa_native_connection, pa_msgobject); +PA_DEFINE_PRIVATE_CLASS(pa_native_connection, pa_msgobject); struct pa_native_protocol { PA_REFCNT_DECLARE; diff --git a/src/pulsecore/protocol-simple.c b/src/pulsecore/protocol-simple.c index 776d74b6..95ec6ac8 100644 --- a/src/pulsecore/protocol-simple.c +++ b/src/pulsecore/protocol-simple.c @@ -69,9 +69,8 @@ typedef struct connection { } playback; } connection; -PA_DECLARE_CLASS(connection); +PA_DEFINE_PRIVATE_CLASS(connection, pa_msgobject); #define CONNECTION(o) (connection_cast(o)) -static PA_DEFINE_CHECK_TYPE(connection, pa_msgobject); struct pa_simple_protocol { PA_REFCNT_DECLARE; diff --git a/src/pulsecore/sink-input.c b/src/pulsecore/sink-input.c index 975fda01..4137a425 100644 --- a/src/pulsecore/sink-input.c +++ b/src/pulsecore/sink-input.c @@ -44,7 +44,7 @@ #define MEMBLOCKQ_MAXLENGTH (32*1024*1024) #define CONVERT_BUFFER_LENGTH (PA_PAGE_SIZE) -static PA_DEFINE_CHECK_TYPE(pa_sink_input, pa_msgobject); +PA_DEFINE_PUBLIC_CLASS(pa_sink_input, pa_msgobject); static void sink_input_free(pa_object *o); static void set_real_ratio(pa_sink_input *i, const pa_cvolume *v); diff --git a/src/pulsecore/sink-input.h b/src/pulsecore/sink-input.h index 5285e618..fe6cf75c 100644 --- a/src/pulsecore/sink-input.h +++ b/src/pulsecore/sink-input.h @@ -235,7 +235,7 @@ struct pa_sink_input { void *userdata; }; -PA_DECLARE_CLASS(pa_sink_input); +PA_DECLARE_PUBLIC_CLASS(pa_sink_input); #define PA_SINK_INPUT(o) pa_sink_input_cast(o) enum { diff --git a/src/pulsecore/sink.c b/src/pulsecore/sink.c index fab88755..5cec7747 100644 --- a/src/pulsecore/sink.c +++ b/src/pulsecore/sink.c @@ -52,7 +52,7 @@ #define ABSOLUTE_MAX_LATENCY (10*PA_USEC_PER_SEC) #define DEFAULT_FIXED_LATENCY (250*PA_USEC_PER_MSEC) -static PA_DEFINE_CHECK_TYPE(pa_sink, pa_msgobject); +PA_DEFINE_PUBLIC_CLASS(pa_sink, pa_msgobject); static void sink_free(pa_object *s); diff --git a/src/pulsecore/sink.h b/src/pulsecore/sink.h index 936d1c2a..b5284b71 100644 --- a/src/pulsecore/sink.h +++ b/src/pulsecore/sink.h @@ -191,7 +191,7 @@ struct pa_sink { void *userdata; }; -PA_DECLARE_CLASS(pa_sink); +PA_DECLARE_PUBLIC_CLASS(pa_sink); #define PA_SINK(s) (pa_sink_cast(s)) typedef enum pa_sink_message { diff --git a/src/pulsecore/sound-file-stream.c b/src/pulsecore/sound-file-stream.c index 502e5c69..f41c53f3 100644 --- a/src/pulsecore/sound-file-stream.c +++ b/src/pulsecore/sound-file-stream.c @@ -64,9 +64,8 @@ enum { FILE_STREAM_MESSAGE_UNLINK }; -PA_DECLARE_CLASS(file_stream); +PA_DEFINE_PRIVATE_CLASS(file_stream, pa_msgobject); #define FILE_STREAM(o) (file_stream_cast(o)) -static PA_DEFINE_CHECK_TYPE(file_stream, pa_msgobject); /* Called from main context */ static void file_stream_unlink(file_stream *u) { diff --git a/src/pulsecore/source-output.c b/src/pulsecore/source-output.c index 3803a6cc..b0298616 100644 --- a/src/pulsecore/source-output.c +++ b/src/pulsecore/source-output.c @@ -41,7 +41,7 @@ #define MEMBLOCKQ_MAXLENGTH (32*1024*1024) -static PA_DEFINE_CHECK_TYPE(pa_source_output, pa_msgobject); +PA_DEFINE_PUBLIC_CLASS(pa_source_output, pa_msgobject); static void source_output_free(pa_object* mo); diff --git a/src/pulsecore/source-output.h b/src/pulsecore/source-output.h index a70a3fdb..aca9ddf2 100644 --- a/src/pulsecore/source-output.h +++ b/src/pulsecore/source-output.h @@ -182,7 +182,7 @@ struct pa_source_output { void *userdata; }; -PA_DECLARE_CLASS(pa_source_output); +PA_DECLARE_PUBLIC_CLASS(pa_source_output); #define PA_SOURCE_OUTPUT(o) pa_source_output_cast(o) enum { diff --git a/src/pulsecore/source.c b/src/pulsecore/source.c index 8aa07f5e..3026654e 100644 --- a/src/pulsecore/source.c +++ b/src/pulsecore/source.c @@ -46,7 +46,7 @@ #define ABSOLUTE_MAX_LATENCY (10*PA_USEC_PER_SEC) #define DEFAULT_FIXED_LATENCY (250*PA_USEC_PER_MSEC) -static PA_DEFINE_CHECK_TYPE(pa_source, pa_msgobject); +PA_DEFINE_PUBLIC_CLASS(pa_source, pa_msgobject); static void source_free(pa_object *o); diff --git a/src/pulsecore/source.h b/src/pulsecore/source.h index 7b3e4953..df3f99df 100644 --- a/src/pulsecore/source.h +++ b/src/pulsecore/source.h @@ -158,7 +158,7 @@ struct pa_source { void *userdata; }; -PA_DECLARE_CLASS(pa_source); +PA_DECLARE_PUBLIC_CLASS(pa_source); #define PA_SOURCE(s) pa_source_cast(s) typedef enum pa_source_message { -- cgit From 14c27c7ade403683e06705e45b9a3df28102a909 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Aug 2009 22:05:27 +0200 Subject: gconf: use correct path for gconf-helper tool when running from build tree --- src/modules/gconf/module-gconf.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/modules/gconf/module-gconf.c b/src/modules/gconf/module-gconf.c index c01ebbf6..85523b39 100644 --- a/src/modules/gconf/module-gconf.c +++ b/src/modules/gconf/module-gconf.c @@ -52,9 +52,6 @@ PA_MODULE_LOAD_ONCE(TRUE); #define MAX_MODULES 10 #define BUF_MAX 2048 -/* #undef PA_GCONF_HELPER */ -/* #define PA_GCONF_HELPER "/home/lennart/projects/pulseaudio/src/gconf-helper" */ - struct module_item { char *name; char *args; @@ -343,7 +340,11 @@ int pa__init(pa_module*m) { u->io_event = NULL; u->buf_fill = 0; - if ((u->fd = pa_start_child_for_read(PA_GCONF_HELPER, NULL, &u->pid)) < 0) + if ((u->fd = pa_start_child_for_read( +#if defined(__linux__) && !defined(__OPTIMIZE__) + pa_run_from_build_tree() ? PA_BUILDDIR "/.libs/gconf-helper" : +#endif + PA_GCONF_HELPER, NULL, &u->pid)) < 0) goto fail; u->io_event = m->core->mainloop->io_new( -- cgit From d06680afe88d14a46ce8a4541d43d514a225732f Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Aug 2009 22:06:23 +0200 Subject: udev: always verify access before loading modules --- src/modules/module-udev-detect.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'src') diff --git a/src/modules/module-udev-detect.c b/src/modules/module-udev-detect.c index 22ce8c3c..2c7e7dc3 100644 --- a/src/modules/module-udev-detect.c +++ b/src/modules/module-udev-detect.c @@ -142,7 +142,6 @@ static void card_changed(struct userdata *u, struct udev_device *dev) { struct device *d; const char *path; const char *t; - pa_module *m; char *n; pa_assert(u); @@ -183,16 +182,9 @@ static void card_changed(struct userdata *u, struct udev_device *dev) { pa_yes_no(u->ignore_dB)); pa_xfree(n); - pa_log_debug("Loading module-alsa-card with arguments '%s'", d->args); - m = pa_module_load(u->core, "module-alsa-card", d->args); - - if (m) { - d->module = m->index; - pa_log_info("Card %s (%s) added and module loaded.", path, d->card_name); - } else - pa_log_info("Card %s (%s) added but failed to load module.", path, d->card_name); - pa_hashmap_put(u->devices, d->path, d); + + verify_access(u, d); } static void remove_card(struct userdata *u, struct udev_device *dev) { @@ -472,7 +464,7 @@ int pa__init(pa_module *m) { udev_enumerate_unref(enumerate); - pa_log_info("Loaded %u modules.", pa_hashmap_size(u->devices)); + pa_log_info("Found %u cards.", pa_hashmap_size(u->devices)); pa_modargs_free(ma); -- cgit From 4ec701aa21c51a0a0c1dd60bd94ee4af1c1d1343 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Aug 2009 22:11:38 +0200 Subject: udev: don't access string after free() --- src/modules/module-udev-detect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/modules/module-udev-detect.c b/src/modules/module-udev-detect.c index 2c7e7dc3..f2e7b0df 100644 --- a/src/modules/module-udev-detect.c +++ b/src/modules/module-udev-detect.c @@ -107,9 +107,9 @@ static void verify_access(struct userdata *u, struct device *d) { cd = pa_sprintf_malloc("%s/snd/controlC%s", udev_get_dev_path(u->udev), path_get_card_id(d->path)); d->accessible = access(cd, R_OK|W_OK) >= 0; - pa_xfree(cd); pa_log_info("%s is accessible: %s", cd, pa_yes_no(d->accessible)); + pa_xfree(cd); if (d->module == PA_INVALID_INDEX) { -- cgit From 2d0120485c60e0eacc81d1e28e2993559350211b Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Aug 2009 22:49:23 +0200 Subject: udev: watch for both ACL changes and processes closing devices --- src/modules/module-udev-detect.c | 63 ++++++++++++++++++++++++++++++++-------- 1 file changed, 51 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/modules/module-udev-detect.c b/src/modules/module-udev-detect.c index f2e7b0df..2fcbe216 100644 --- a/src/modules/module-udev-detect.c +++ b/src/modules/module-udev-detect.c @@ -45,7 +45,8 @@ PA_MODULE_USAGE( struct device { char *path; - pa_bool_t accessible; + pa_bool_t accessible:1; + pa_bool_t need_verify:1; char *card_name; char *args; uint32_t module; @@ -277,6 +278,34 @@ fail: u->udev_io = NULL; } +static pa_bool_t pcm_node_belongs_to_device( + struct device *d, + const char *node) { + + char *cd; + pa_bool_t b; + + cd = pa_sprintf_malloc("pcmC%sD", path_get_card_id(d->path)); + b = pa_startswith(node, cd); + pa_xfree(cd); + + return b; +} + +static pa_bool_t control_node_belongs_to_device( + struct device *d, + const char *node) { + + char *cd; + pa_bool_t b; + + cd = pa_sprintf_malloc("controlC%s", path_get_card_id(d->path)); + b = pa_streq(node, cd); + pa_xfree(cd); + + return b; +} + static void inotify_cb( pa_mainloop_api*a, pa_io_event* e, @@ -290,7 +319,9 @@ static void inotify_cb( } buf; struct userdata *u = userdata; static int type = 0; - pa_bool_t verify = FALSE, deleted = FALSE; + pa_bool_t deleted = FALSE; + struct device *d; + void *state; for (;;) { ssize_t r; @@ -305,22 +336,30 @@ static void inotify_cb( goto fail; } - if ((buf.e.mask & IN_CLOSE_WRITE) && pa_startswith(buf.e.name, "pcmC")) - verify = TRUE; + /* From udev we get the guarantee that the control + * device's ACL is changes last. To avoid races when ACLs + * are changed we hence watch only the control device */ + if (((buf.e.mask & IN_ATTRIB) && pa_startswith(buf.e.name, "controlC"))) + PA_HASHMAP_FOREACH(d, u->devices, state) + if (control_node_belongs_to_device(d, buf.e.name)) + d->need_verify = TRUE; + + /* ALSA doesn't really give us any guarantee on the closing + * order, so let's simply hope */ + if (((buf.e.mask & IN_CLOSE_WRITE) && pa_startswith(buf.e.name, "pcmC"))) + PA_HASHMAP_FOREACH(d, u->devices, state) + if (pcm_node_belongs_to_device(d, buf.e.name)) + d->need_verify = TRUE; if ((buf.e.mask & (IN_DELETE_SELF|IN_MOVE_SELF))) deleted = TRUE; } - if (verify) { - struct device *d; - void *state; - - pa_log_debug("Verifying access."); - - PA_HASHMAP_FOREACH(d, u->devices, state) + PA_HASHMAP_FOREACH(d, u->devices, state) + if (d->need_verify) { + d->need_verify = FALSE; verify_access(u, d); - } + } if (!deleted) return; -- cgit From 066e160bbd095afe79fc7ea79fbc88b8746960cc Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Fri, 21 Aug 2009 23:24:40 +0200 Subject: udev: tell inotify to actually inform us about ACL changes --- src/modules/module-udev-detect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/modules/module-udev-detect.c b/src/modules/module-udev-detect.c index 2fcbe216..1d67c0cc 100644 --- a/src/modules/module-udev-detect.c +++ b/src/modules/module-udev-detect.c @@ -389,7 +389,7 @@ static int setup_inotify(struct userdata *u) { } dev_snd = pa_sprintf_malloc("%s/snd", udev_get_dev_path(u->udev)); - r = inotify_add_watch(u->inotify_fd, dev_snd, IN_CLOSE_WRITE|IN_DELETE_SELF|IN_MOVE_SELF); + r = inotify_add_watch(u->inotify_fd, dev_snd, IN_ATTRIB|IN_CLOSE_WRITE|IN_DELETE_SELF|IN_MOVE_SELF); pa_xfree(dev_snd); if (r < 0) { -- cgit From de19bdd34e54c6a4fe8791d28ce0733343381488 Mon Sep 17 00:00:00 2001 From: Scott Reeves Date: Fri, 21 Aug 2009 23:59:39 +0200 Subject: daemon: fix leak of script_commands --- src/daemon/cmdline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/daemon/cmdline.c b/src/daemon/cmdline.c index ecb38486..3ebc9270 100644 --- a/src/daemon/cmdline.c +++ b/src/daemon/cmdline.c @@ -385,7 +385,7 @@ int pa_cmdline_parse(pa_daemon_conf *conf, int argc, char *const argv [], int *d pa_xfree(conf->script_commands); conf->script_commands = pa_strbuf_tostring_free(buf); - if (!conf->script_commands) { + if (conf->script_commands) { pa_xfree(conf->script_commands); conf->script_commands = NULL; } -- cgit From 15eb03a5b39f8c54328caa7516a7870bf977db40 Mon Sep 17 00:00:00 2001 From: Ted Percival Date: Fri, 21 Aug 2009 16:02:57 -0600 Subject: core: Add thread-safe group info functions with dynamic buffers Provides getgrgid, getgrnam, getpwuid & getpwnam replacements that are thread safe (a la getgrgid_r() and friends) that internally handle allocating big-enough buffers to avoid ERANGE errors on large users or groups. --- src/Makefile.am | 12 +- src/pulse/util.c | 52 +++---- src/pulsecore/core-util.c | 106 +++---------- src/pulsecore/usergroup.c | 376 +++++++++++++++++++++++++++++++++++++++++++++ src/pulsecore/usergroup.h | 51 ++++++ src/tests/usergroup-test.c | 161 +++++++++++++++++++ 6 files changed, 643 insertions(+), 115 deletions(-) create mode 100644 src/pulsecore/usergroup.c create mode 100644 src/pulsecore/usergroup.h create mode 100644 src/tests/usergroup-test.c (limited to 'src') diff --git a/src/Makefile.am b/src/Makefile.am index fd440991..73c0db5b 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -280,7 +280,8 @@ TESTS = \ proplist-test \ lock-autospawn-test \ prioq-test \ - sigbus-test + sigbus-test \ + usergroup-test TESTS_BINARIES = \ mainloop-test \ @@ -318,7 +319,8 @@ TESTS_BINARIES = \ stripnul \ lock-autospawn-test \ prioq-test \ - sigbus-test + sigbus-test \ + usergroup-test if HAVE_SIGXCPU #TESTS += \ @@ -557,6 +559,11 @@ alsa_time_test_LDADD = $(AM_LDADD) alsa_time_test_CFLAGS = $(AM_CFLAGS) $(ASOUNDLIB_CFLAGS) alsa_time_test_LDFLAGS = $(AM_LDFLAGS) $(BINLDFLAGS) $(ASOUNDLIB_LIBS) +usergroup_test_SOURCES = tests/usergroup-test.c +usergroup_test_LDADD = $(AM_LDADD) libpulsecore-@PA_MAJORMINORMICRO@.la +usergroup_test_CFLAGS = $(AM_CFLAGS) +usergroup_test_LDFLAGS = $(AM_LDFLAGS) $(BINLDFLAGS) + ################################### # Common library # ################################### @@ -621,6 +628,7 @@ libpulsecommon_@PA_MAJORMINORMICRO@_la_SOURCES = \ pulsecore/tagstruct.c pulsecore/tagstruct.h \ pulsecore/time-smoother.c pulsecore/time-smoother.h \ pulsecore/tokenizer.c pulsecore/tokenizer.h \ + pulsecore/usergroup.c pulsecore/usergroup.h \ pulsecore/sndfile-util.c pulsecore/sndfile-util.h \ pulsecore/winsock.h diff --git a/src/pulse/util.c b/src/pulse/util.c index 6f1e40a9..9440f5de 100644 --- a/src/pulse/util.c +++ b/src/pulse/util.c @@ -61,38 +61,40 @@ #include #include #include +#include #include "util.h" char *pa_get_user_name(char *s, size_t l) { const char *p; + char *name = NULL; +#ifdef OS_IS_WIN32 char buf[1024]; +#endif #ifdef HAVE_PWD_H - struct passwd pw, *r; + struct passwd *r; #endif pa_assert(s); pa_assert(l > 0); - if (!(p = (getuid() == 0 ? "root" : NULL)) && - !(p = getenv("USER")) && - !(p = getenv("LOGNAME")) && - !(p = getenv("USERNAME"))) { + if ((p = (getuid() == 0 ? "root" : NULL)) || + (p = getenv("USER")) || + (p = getenv("LOGNAME")) || + (p = getenv("USERNAME"))) + { + name = pa_strlcpy(s, p, l); + } else { #ifdef HAVE_PWD_H -#ifdef HAVE_GETPWUID_R - if (getpwuid_r(getuid(), &pw, buf, sizeof(buf), &r) != 0 || !r) { -#else - /* XXX Not thread-safe, but needed on OSes (e.g. FreeBSD 4.X) - * that do not support getpwuid_r. */ - if ((r = getpwuid(getuid())) == NULL) { -#endif + if ((r = pa_getpwuid_malloc(getuid())) == NULL) { pa_snprintf(s, l, "%lu", (unsigned long) getuid()); return s; } - p = r->pw_name; + name = pa_strlcpy(s, r->pw_name, l); + pa_getpwuid_free(r); #elif defined(OS_IS_WIN32) /* HAVE_PWD_H */ DWORD size = sizeof(buf); @@ -102,7 +104,7 @@ char *pa_get_user_name(char *s, size_t l) { return NULL; } - p = buf; + name = pa_strlcpy(s, buf, l); #else /* HAVE_PWD_H */ @@ -110,7 +112,7 @@ char *pa_get_user_name(char *s, size_t l) { #endif /* HAVE_PWD_H */ } - return pa_strlcpy(s, p, l); + return name; } char *pa_get_host_name(char *s, size_t l) { @@ -126,11 +128,10 @@ char *pa_get_host_name(char *s, size_t l) { } char *pa_get_home_dir(char *s, size_t l) { - char *e; + char *e, *dir; #ifdef HAVE_PWD_H - char buf[1024]; - struct passwd pw, *r; + struct passwd *r; #endif pa_assert(s); @@ -143,22 +144,19 @@ char *pa_get_home_dir(char *s, size_t l) { return pa_strlcpy(s, e, l); #ifdef HAVE_PWD_H - errno = 0; -#ifdef HAVE_GETPWUID_R - if (getpwuid_r(getuid(), &pw, buf, sizeof(buf), &r) != 0 || !r) { -#else - /* XXX Not thread-safe, but needed on OSes (e.g. FreeBSD 4.X) - * that do not support getpwuid_r. */ - if ((r = getpwuid(getuid())) == NULL) { -#endif + if ((r = pa_getpwuid_malloc(getuid())) == NULL) { if (!errno) errno = ENOENT; return NULL; } - return pa_strlcpy(s, r->pw_dir, l); + dir = pa_strlcpy(s, r->pw_dir, l); + + pa_getpwuid_free(r); + + return dir; #else /* HAVE_PWD_H */ errno = ENOENT; diff --git a/src/pulsecore/core-util.c b/src/pulsecore/core-util.c index 843c8377..0eb32cc4 100644 --- a/src/pulsecore/core-util.c +++ b/src/pulsecore/core-util.c @@ -115,6 +115,7 @@ #include #include #include +#include #include "core-util.h" @@ -969,42 +970,24 @@ fail: /* Check whether the specified GID and the group name match */ static int is_group(gid_t gid, const char *name) { - struct group group, *result = NULL; - long n; - void *data; + struct group *group = NULL; int r = -1; -#ifdef HAVE_GETGRGID_R - -#ifdef _SC_GETGR_R_SIZE_MAX - n = sysconf(_SC_GETGR_R_SIZE_MAX); -#else - n = -1; -#endif - if (n <= 0) - n = 512; - - data = pa_xmalloc((size_t) n); - - if ((errno = getgrgid_r(gid, &group, data, (size_t) n, &result)) || !result) -#else errno = 0; - if (!(result = getgrgid(gid))) -#endif + if (!(group = pa_getgrgid_malloc(gid))) { if (!errno) errno = ENOENT; - pa_log("getgrgid(%u): %s", gid, pa_cstrerror(errno)); + pa_log("pa_getgrgid_malloc(%u): %s", gid, pa_cstrerror(errno)); goto finish; } - r = strcmp(name, result->gr_name) == 0; + r = strcmp(name, group->gr_name) == 0; finish: - - pa_xfree(data); + pa_getgrgid_free(group); return r; } @@ -1053,69 +1036,37 @@ finish: /* Check whether the specifc user id is a member of the specified group */ int pa_uid_in_group(uid_t uid, const char *name) { - char *g_buf = NULL, *p_buf = NULL; - long g_n, p_n; - struct group grbuf, *gr = NULL; + struct group *group = NULL; char **i; int r = -1; -#ifdef HAVE_GETGRNAM_R - -#ifdef _SC_GETGR_R_SIZE_MAX - g_n = sysconf(_SC_GETGR_R_SIZE_MAX); -#else - g_n = -1; -#endif - if (g_n <= 0) - g_n = 512; - - g_buf = pa_xmalloc((size_t) g_n); - - if ((errno = getgrnam_r(name, &grbuf, g_buf, (size_t) g_n, &gr)) != 0 || !gr) -#else errno = 0; - if (!(gr = getgrnam(name))) -#endif + if (!(group = pa_getgrnam_malloc(name))) { if (!errno) errno = ENOENT; goto finish; } -#ifdef HAVE_GETPWNAM_R - -#ifdef _SC_GETPW_R_SIZE_MAX - p_n = sysconf(_SC_GETPW_R_SIZE_MAX); -#else - p_n = -1; -#endif - if (p_n <= 0) - p_n = 512; - - p_buf = pa_xmalloc((size_t) p_n); -#endif - r = 0; - for (i = gr->gr_mem; *i; i++) { - struct passwd pwbuf, *pw = NULL; + for (i = group->gr_mem; *i; i++) { + struct passwd *pw = NULL; -#ifdef HAVE_GETPWNAM_R - if ((errno = getpwnam_r(*i, &pwbuf, p_buf, (size_t) p_n, &pw)) != 0 || !pw) -#else errno = 0; - if (!(pw = getpwnam(*i))) -#endif + if (!(pw = pa_getpwnam_malloc(*i))) continue; - if (pw->pw_uid == uid) { + if (pw->pw_uid == uid) r = 1; + + pa_getpwnam_free(pw); + + if (r == 1) break; - } } finish: - pa_xfree(g_buf); - pa_xfree(p_buf); + pa_getgrnam_free(group); return r; } @@ -1123,27 +1074,10 @@ finish: /* Get the GID of a gfiven group, return (gid_t) -1 on failure. */ gid_t pa_get_gid_of_group(const char *name) { gid_t ret = (gid_t) -1; - char *g_buf = NULL; - long g_n; - struct group grbuf, *gr = NULL; - -#ifdef HAVE_GETGRNAM_R - -#ifdef _SC_GETGR_R_SIZE_MAX - g_n = sysconf(_SC_GETGR_R_SIZE_MAX); -#else - g_n = -1; -#endif - if (g_n <= 0) - g_n = 512; - - g_buf = pa_xmalloc((size_t) g_n); + struct group *gr = NULL; - if ((errno = getgrnam_r(name, &grbuf, g_buf, (size_t) g_n, &gr)) != 0 || !gr) -#else errno = 0; - if (!(gr = getgrnam(name))) -#endif + if (!(gr = pa_getgrnam_malloc(name))) { if (!errno) errno = ENOENT; @@ -1153,7 +1087,7 @@ gid_t pa_get_gid_of_group(const char *name) { ret = gr->gr_gid; finish: - pa_xfree(g_buf); + pa_getgrnam_free(gr); return ret; } diff --git a/src/pulsecore/usergroup.c b/src/pulsecore/usergroup.c new file mode 100644 index 00000000..bf686b77 --- /dev/null +++ b/src/pulsecore/usergroup.c @@ -0,0 +1,376 @@ +/*** + This file is part of PulseAudio. + + Copyright 2009 Ted Percival + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with PulseAudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#ifdef HAVE_PWD_H +#include +#endif + +#ifdef HAVE_GRP_H +#include +#endif + +#include +#include + +#include "usergroup.h" + +#ifdef HAVE_GRP_H + +/* Returns a suitable starting size for a getgrnam_r() or getgrgid_r() buffer, + plus the size of a struct group. + */ +static size_t starting_getgr_buflen(void) { + size_t full_size; + long n; +#ifdef _SC_GETGR_R_SIZE_MAX + n = sysconf(_SC_GETGR_R_SIZE_MAX); +#else + n = -1; +#endif + if (n <= 0) + n = 512; + + full_size = (size_t) n + sizeof(struct group); + + if (full_size < (size_t) n) /* check for integer overflow */ + return (size_t) n; + + return full_size; +} + +/* Returns a suitable starting size for a getpwnam_r() or getpwuid_r() buffer, + plus the size of a struct passwd. + */ +static size_t starting_getpw_buflen(void) { + long n; + size_t full_size; + +#ifdef _SC_GETPW_R_SIZE_MAX + n = sysconf(_SC_GETPW_R_SIZE_MAX); +#else + n = -1; +#endif + if (n <= 0) + n = 512; + + full_size = (size_t) n + sizeof(struct passwd); + + if (full_size < (size_t) n) /* check for integer overflow */ + return (size_t) n; + + return full_size; +} + +/* Given a memory allocation (*bufptr) and its length (*buflenptr), + double the size of the allocation, updating the given buffer and length + arguments. This function should be used in conjunction with the pa_*alloc + and pa_xfree functions. + + Unlike realloc(), this function does *not* retain the original buffer's + contents. + + Returns 0 on success, nonzero on error. The error cause is indicated by + errno. + */ +static int expand_buffer_trashcontents(void **bufptr, size_t *buflenptr) { + size_t newlen; + + if (!bufptr || !*bufptr || !buflenptr) { + errno = EINVAL; + return -1; + } + + newlen = *buflenptr * 2; + + if (newlen < *buflenptr) { + errno = EOVERFLOW; + return -1; + } + + /* Don't bother retaining memory contents; free & alloc anew */ + pa_xfree(*bufptr); + + *bufptr = pa_xmalloc(newlen); + *buflenptr = newlen; + + return 0; +} + +#ifdef HAVE_GETGRGID_R +/* Thread-safe getgrgid() replacement. + Returned value should be freed using pa_getgrgid_free() when the caller is + finished with the returned group data. + + API is the same as getgrgid(), errors are indicated by a NULL return; + consult errno for the error cause (zero it before calling). + The returned value must be freed using pa_xfree(). + */ +struct group *pa_getgrgid_malloc(gid_t gid) { + size_t buflen, getgr_buflen; + int err; + void *buf; + void *getgr_buf; + struct group *result = NULL; + + buflen = starting_getgr_buflen(); + buf = pa_xmalloc(buflen); + + getgr_buflen = buflen - sizeof(struct group); + getgr_buf = (char *)buf + sizeof(struct group); + + while ((err = getgrgid_r(gid, (struct group *)buf, getgr_buf, + getgr_buflen, &result)) == ERANGE) + { + if (expand_buffer_trashcontents(&buf, &buflen)) + break; + + getgr_buflen = buflen - sizeof(struct group); + getgr_buf = (char *)buf + sizeof(struct group); + } + + if (err || !result) { + result = NULL; + if (buf) { + pa_xfree(buf); + buf = NULL; + } + } + + pa_assert(result == buf || result == NULL); + + return result; +} + +void pa_getgrgid_free(struct group *grp) { + pa_xfree(grp); +} + +#else /* !HAVE_GETGRGID_R */ + +struct group *pa_getgrgid_malloc(gid_t gid) { + return getgrgid(gid); +} + +void pa_getgrgid_free(struct group *grp) { + /* nothing */ + return; +} + +#endif /* !HAVE_GETGRGID_R */ + +#ifdef HAVE_GETGRNAM_R +/* Thread-safe getgrnam() function. + Returned value should be freed using pa_getgrnam_free() when the caller is + finished with the returned group data. + + API is the same as getgrnam(), errors are indicated by a NULL return; + consult errno for the error cause (zero it before calling). + The returned value must be freed using pa_xfree(). + */ +struct group *pa_getgrnam_malloc(const char *name) { + size_t buflen, getgr_buflen; + int err; + void *buf; + void *getgr_buf; + struct group *result = NULL; + + buflen = starting_getgr_buflen(); + buf = pa_xmalloc(buflen); + + getgr_buflen = buflen - sizeof(struct group); + getgr_buf = (char *)buf + sizeof(struct group); + + while ((err = getgrnam_r(name, (struct group *)buf, getgr_buf, + getgr_buflen, &result)) == ERANGE) + { + if (expand_buffer_trashcontents(&buf, &buflen)) + break; + + getgr_buflen = buflen - sizeof(struct group); + getgr_buf = (char *)buf + sizeof(struct group); + } + + if (err || !result) { + result = NULL; + if (buf) { + pa_xfree(buf); + buf = NULL; + } + } + + pa_assert(result == buf || result == NULL); + + return result; +} + +void pa_getgrnam_free(struct group *group) { + pa_xfree(group); +} + +#else /* !HAVE_GETGRNAM_R */ + +struct group *pa_getgrnam_malloc(const char *name) { + return getgrnam(name); +} + +void pa_getgrnam_free(struct group *group) { + /* nothing */ + return; +} + +#endif /* HAVE_GETGRNAM_R */ + +#endif /* HAVE_GRP_H */ + +#ifdef HAVE_PWD_H + +#ifdef HAVE_GETPWNAM_R +/* Thread-safe getpwnam() function. + Returned value should be freed using pa_getpwnam_free() when the caller is + finished with the returned passwd data. + + API is the same as getpwnam(), errors are indicated by a NULL return; + consult errno for the error cause (zero it before calling). + The returned value must be freed using pa_xfree(). + */ +struct passwd *pa_getpwnam_malloc(const char *name) { + size_t buflen, getpw_buflen; + int err; + void *buf; + void *getpw_buf; + struct passwd *result = NULL; + + buflen = starting_getpw_buflen(); + buf = pa_xmalloc(buflen); + + getpw_buflen = buflen - sizeof(struct passwd); + getpw_buf = (char *)buf + sizeof(struct passwd); + + while ((err = getpwnam_r(name, (struct passwd *)buf, getpw_buf, + getpw_buflen, &result)) == ERANGE) + { + if (expand_buffer_trashcontents(&buf, &buflen)) + break; + + getpw_buflen = buflen - sizeof(struct passwd); + getpw_buf = (char *)buf + sizeof(struct passwd); + } + + if (err || !result) { + result = NULL; + if (buf) { + pa_xfree(buf); + buf = NULL; + } + } + + pa_assert(result == buf || result == NULL); + + return result; +} + +void pa_getpwnam_free(struct passwd *passwd) { + pa_xfree(passwd); +} + +#else /* !HAVE_GETPWNAM_R */ + +struct passwd *pa_getpwnam_malloc(const char *name) { + return getpwnam(name); +} + +void pa_getpwnam_free(struct passwd *passwd) { + /* nothing */ + return; +} + +#endif /* !HAVE_GETPWNAM_R */ + +#ifdef HAVE_GETPWUID_R +/* Thread-safe getpwuid() function. + Returned value should be freed using pa_getpwuid_free() when the caller is + finished with the returned group data. + + API is the same as getpwuid(), errors are indicated by a NULL return; + consult errno for the error cause (zero it before calling). + The returned value must be freed using pa_xfree(). + */ +struct passwd *pa_getpwuid_malloc(uid_t uid) { + size_t buflen, getpw_buflen; + int err; + void *buf; + void *getpw_buf; + struct passwd *result = NULL; + + buflen = starting_getpw_buflen(); + buf = pa_xmalloc(buflen); + + getpw_buflen = buflen - sizeof(struct passwd); + getpw_buf = (char *)buf + sizeof(struct passwd); + + while ((err = getpwuid_r(uid, (struct passwd *)buf, getpw_buf, + getpw_buflen, &result)) == ERANGE) + { + if (expand_buffer_trashcontents(&buf, &buflen)) + break; + + getpw_buflen = buflen - sizeof(struct passwd); + getpw_buf = (char *)buf + sizeof(struct passwd); + } + + if (err || !result) { + result = NULL; + if (buf) { + pa_xfree(buf); + buf = NULL; + } + } + + pa_assert(result == buf || result == NULL); + + return result; +} + +void pa_getpwuid_free(struct passwd *passwd) { + pa_xfree(passwd); +} + +#else /* !HAVE_GETPWUID_R */ + +struct passwd *pa_getpwuid_malloc(uid_t uid) { + return getpwuid(uid); +} + +void pa_getpwuid_free(struct passwd *passwd) { + /* nothing */ + return; +} + +#endif /* !HAVE_GETPWUID_R */ + +#endif /* HAVE_PWD_H */ diff --git a/src/pulsecore/usergroup.h b/src/pulsecore/usergroup.h new file mode 100644 index 00000000..1c091638 --- /dev/null +++ b/src/pulsecore/usergroup.h @@ -0,0 +1,51 @@ +#ifndef foousergrouphfoo +#define foousergrouphfoo + +/*** + This file is part of PulseAudio. + + Copyright 2009 Ted Percival + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with PulseAudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#include + +#ifndef PACKAGE +#error "Please include config.h before including this file!" +#endif + +#ifdef HAVE_GRP_H + +struct group *pa_getgrgid_malloc(gid_t gid); +void pa_getgrgid_free(struct group *grp); + +struct group *pa_getgrnam_malloc(const char *name); +void pa_getgrnam_free(struct group *group); + +#endif /* HAVE_GRP_H */ + +#ifdef HAVE_PWD_H + +struct passwd *pa_getpwuid_malloc(uid_t uid); +void pa_getpwuid_free(struct passwd *passwd); + +struct passwd *pa_getpwnam_malloc(const char *name); +void pa_getpwnam_free(struct passwd *passwd); + +#endif /* HAVE_PWD_H */ + +#endif /* foousergrouphfoo */ diff --git a/src/tests/usergroup-test.c b/src/tests/usergroup-test.c new file mode 100644 index 00000000..a48b016d --- /dev/null +++ b/src/tests/usergroup-test.c @@ -0,0 +1,161 @@ +/*** + This file is part of PulseAudio. + + Copyright 2009 Ted Percival + + PulseAudio is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2.1 of the + License, or (at your option) any later version. + + PulseAudio is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with PulseAudio; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA. +***/ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +static int load_reference_structs(struct group **gr, struct passwd **pw) { + setpwent(); + *pw = getpwent(); + endpwent(); + + setgrent(); + *gr = getgrent(); + endgrent(); + + return (*gr && *pw) ? 0 : 1; +} + +static int compare_group(const struct group *a, const struct group *b) { + char **amem, **bmem; + + if (strcmp(a->gr_name, b->gr_name)) { + fprintf(stderr, "Group name mismatch: [%s] [%s]\n", + a->gr_name, b->gr_name); + return 1; + } + + if (strcmp(a->gr_passwd, b->gr_passwd)) { + fprintf(stderr, "Group password mismatch: [%s] [%s]\n", + a->gr_passwd, b->gr_passwd); + return 1; + } + + if (a->gr_gid != b->gr_gid) { + fprintf(stderr, "Gid mismatch: [%lu] [%lu]\n", + (unsigned long) a->gr_gid, (unsigned long) b->gr_gid); + return 1; + } + + /* XXX: Assuming the group ordering is identical. */ + for (amem = a->gr_mem, bmem = b->gr_mem; *amem && *bmem; ++amem, ++bmem) { + if (strcmp(*amem, *bmem)) { + fprintf(stderr, "Group member mismatch: [%s] [%s]\n", + *amem, *bmem); + return 1; + } + } + + if (*amem || *bmem) { + fprintf(stderr, "Mismatched group count\n"); + return 1; + } + + return 0; +} + +static int compare_passwd(const struct passwd *a, const struct passwd *b) { + if (strcmp(a->pw_name, b->pw_name)) { + fprintf(stderr, "pw_name mismatch: [%s] [%s]\n", a->pw_name, b->pw_name); + return 1; + } + + if (strcmp(a->pw_passwd, b->pw_passwd)) { + fprintf(stderr, "pw_passwd mismatch: [%s] [%s]\n", a->pw_passwd, b->pw_passwd); + return 1; + } + + if (a->pw_uid != b->pw_uid) { + fprintf(stderr, "pw_uid mismatch: [%lu] [%lu]\n", + (unsigned long) a->pw_uid, (unsigned long) b->pw_uid); + return 1; + } + + if (a->pw_gid != b->pw_gid) { + fprintf(stderr, "pw_gid mismatch: [%lu] [%lu]\n", + (unsigned long) a->pw_gid, (unsigned long) b->pw_gid); + return 1; + } + + if (strcmp(a->pw_gecos, b->pw_gecos)) { + fprintf(stderr, "pw_gecos mismatch: [%s] [%s]\n", a->pw_gecos, b->pw_gecos); + return 1; + } + + if (strcmp(a->pw_dir, b->pw_dir)) { + fprintf(stderr, "pw_dir mismatch: [%s] [%s]\n", a->pw_dir, b->pw_dir); + return 1; + } + + if (strcmp(a->pw_shell, b->pw_shell)) { + fprintf(stderr, "pw_shell mismatch: [%s] [%s]\n", a->pw_shell, b->pw_shell); + return 1; + } + + return 0; +} + +int main(int argc, char *argv[]) { + struct group *gr; + struct passwd *pw; + int err; + struct group *reference_group = NULL; + struct passwd *reference_passwd = NULL; + + err = load_reference_structs(&reference_group, &reference_passwd); + if (err) + return 77; + + errno = 0; + gr = pa_getgrgid_malloc(reference_group->gr_gid); + if (compare_group(reference_group, gr)) + return 1; + pa_getgrgid_free(gr); + + errno = 0; + gr = pa_getgrnam_malloc(reference_group->gr_name); + if (compare_group(reference_group, gr)) + return 1; + pa_getgrnam_free(gr); + + errno = 0; + pw = pa_getpwuid_malloc(reference_passwd->pw_uid); + if (compare_passwd(reference_passwd, pw)) + return 1; + pa_getpwuid_free(pw); + + errno = 0; + pw = pa_getpwnam_malloc(reference_passwd->pw_name); + if (compare_passwd(reference_passwd, pw)) + return 1; + pa_getpwnam_free(pw); + + return 0; +} -- cgit From 44c7aa55e25334901769b82355c12dee91cb3629 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Fri, 21 Aug 2009 13:15:38 +1000 Subject: Solaris: build fixes (resent) Fix bit rot due to recent flat volume changes. --- src/modules/module-solaris.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/modules/module-solaris.c b/src/modules/module-solaris.c index 0920d25e..2c878c20 100644 --- a/src/modules/module-solaris.c +++ b/src/modules/module-solaris.c @@ -479,7 +479,7 @@ static void sink_set_volume(pa_sink *s) { if (u->fd >= 0) { AUDIO_INITINFO(&info); - info.play.gain = pa_cvolume_max(&s->virtual_volume) * AUDIO_MAX_GAIN / PA_VOLUME_NORM; + info.play.gain = pa_cvolume_max(&s->real_volume) * AUDIO_MAX_GAIN / PA_VOLUME_NORM; assert(info.play.gain <= AUDIO_MAX_GAIN); if (ioctl(u->fd, AUDIO_SETINFO, &info) < 0) { @@ -501,8 +501,7 @@ static void sink_get_volume(pa_sink *s) { if (ioctl(u->fd, AUDIO_GETINFO, &info) < 0) pa_log("AUDIO_SETINFO: %s", pa_cstrerror(errno)); else - pa_cvolume_set(&s->virtual_volume, s->sample_spec.channels, - info.play.gain * PA_VOLUME_NORM / AUDIO_MAX_GAIN); + pa_cvolume_set(&s->real_volume, s->sample_spec.channels, info.play.gain * PA_VOLUME_NORM / AUDIO_MAX_GAIN); } } @@ -515,7 +514,7 @@ static void source_set_volume(pa_source *s) { if (u->fd >= 0) { AUDIO_INITINFO(&info); - info.play.gain = pa_cvolume_max(&s->virtual_volume) * AUDIO_MAX_GAIN / PA_VOLUME_NORM; + info.play.gain = pa_cvolume_max(&s->volume) * AUDIO_MAX_GAIN / PA_VOLUME_NORM; assert(info.play.gain <= AUDIO_MAX_GAIN); if (ioctl(u->fd, AUDIO_SETINFO, &info) < 0) { @@ -537,8 +536,7 @@ static void source_get_volume(pa_source *s) { if (ioctl(u->fd, AUDIO_GETINFO, &info) < 0) pa_log("AUDIO_SETINFO: %s", pa_cstrerror(errno)); else - pa_cvolume_set(&s->virtual_volume, s->sample_spec.channels, - info.play.gain * PA_VOLUME_NORM / AUDIO_MAX_GAIN); + pa_cvolume_set(&s->volume, s->sample_spec.channels, info.play.gain * PA_VOLUME_NORM / AUDIO_MAX_GAIN); } } @@ -797,7 +795,7 @@ static void sig_callback(pa_mainloop_api *api, pa_signal_event*e, int sig, void pa_log_debug("caught signal"); if (u->sink) { - pa_sink_get_volume(u->sink, TRUE, FALSE); + pa_sink_get_volume(u->sink, TRUE); pa_sink_get_mute(u->sink, TRUE); } -- cgit From 87d2dded9b90331943a6c7b9d8d9b1ac100b6689 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Fri, 21 Aug 2009 13:17:03 +1000 Subject: Solaris: use smoother (resent) Make use of the smoother, just in case. --- src/modules/module-solaris.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/modules/module-solaris.c b/src/modules/module-solaris.c index 2c878c20..3bf7c4b0 100644 --- a/src/modules/module-solaris.c +++ b/src/modules/module-solaris.c @@ -60,6 +60,7 @@ #include #include #include +#include #include "module-solaris-symdef.h" @@ -110,6 +111,8 @@ struct userdata { uint32_t prev_playback_samples, prev_record_samples; int32_t minimum_request; + + pa_smoother *smoother; }; static const char* const valid_modargs[] = { @@ -145,7 +148,12 @@ static uint64_t get_playback_buffered_bytes(struct userdata *u) { /* Handle wrap-around of the device's sample counter, which is a uint_32. */ if (u->prev_playback_samples > info.play.samples) { - /* Unfortunately info.play.samples can sometimes go backwards, even before it wraps! */ + /* + * Unfortunately info.play.samples can sometimes go backwards, even before it wraps! + * The bug seems to be absent on Solaris x86 nv117 with audio810 driver, at least on this (UP) machine. + * The bug is present on a different (SMP) machine running Solaris x86 nv103 with audioens driver. + * An earlier revision of this file mentions the same bug independently (unknown configuration). + */ if (u->prev_playback_samples + info.play.samples < 240000) { ++u->play_samples_msw; } else { @@ -155,6 +163,8 @@ static uint64_t get_playback_buffered_bytes(struct userdata *u) { u->prev_playback_samples = info.play.samples; played_bytes = (((uint64_t)u->play_samples_msw << 32) + info.play.samples) * u->frame_size; + pa_smoother_put(u->smoother, pa_rtclock_now(), pa_bytes_to_usec(played_bytes, &u->sink->sample_spec)); + return u->written_bytes - played_bytes; } @@ -387,6 +397,8 @@ static int sink_process_msg(pa_msgobject *o, int code, void *data, int64_t offse pa_assert(PA_SINK_IS_OPENED(u->sink->thread_info.state)); + pa_smoother_pause(u->smoother, pa_rtclock_now()); + if (!u->source || u->source_suspended) { if (suspend(u) < 0) return -1; @@ -398,6 +410,8 @@ static int sink_process_msg(pa_msgobject *o, int code, void *data, int64_t offse case PA_SINK_RUNNING: if (u->sink->thread_info.state == PA_SINK_SUSPENDED) { + pa_smoother_resume(u->smoother, pa_rtclock_now(), TRUE); + if (!u->source || u->source_suspended) { if (unsuspend(u) < 0) return -1; @@ -604,11 +618,13 @@ static void thread_func(void *userdata) { pa_thread_mq_install(&u->thread_mq); + pa_smoother_set_time_offset(u->smoother, pa_rtclock_now()); + for (;;) { /* Render some data and write it to the dsp */ if (u->sink && PA_SINK_IS_OPENED(u->sink->thread_info.state)) { - pa_usec_t xtime0; + pa_usec_t xtime0, ysleep_interval, xsleep_interval; uint64_t buffered_bytes; if (u->sink->thread_info.rewind_requested) @@ -627,6 +643,8 @@ static void thread_func(void *userdata) { info.play.error = 0; if (ioctl(u->fd, AUDIO_SETINFO, &info) < 0) pa_log("AUDIO_SETINFO: %s", pa_cstrerror(errno)); + + pa_smoother_reset(u->smoother, pa_rtclock_now(), TRUE); } for (;;) { @@ -689,7 +707,9 @@ static void thread_func(void *userdata) { } } - pa_rtpoll_set_timer_absolute(u->rtpoll, xtime0 + pa_bytes_to_usec(buffered_bytes / 2, &u->sink->sample_spec)); + ysleep_interval = pa_bytes_to_usec(buffered_bytes / 2, &u->sink->sample_spec); + xsleep_interval = pa_smoother_translate(u->smoother, xtime0, ysleep_interval); + pa_rtpoll_set_timer_absolute(u->rtpoll, xtime0 + PA_MIN(xsleep_interval, ysleep_interval)); } else pa_rtpoll_set_timer_disabled(u->rtpoll); @@ -836,6 +856,9 @@ int pa__init(pa_module *m) { u = pa_xnew0(struct userdata, 1); + if (!(u->smoother = pa_smoother_new(PA_USEC_PER_SEC, PA_USEC_PER_SEC * 2, TRUE, TRUE, 10, pa_rtclock_now(), TRUE))) + goto fail; + /* * For a process (or several processes) to use the same audio device for both * record and playback at the same time, the device's mixer must be enabled. @@ -1073,6 +1096,9 @@ void pa__done(pa_module *m) { if (u->fd >= 0) close(u->fd); + if (u->smoother) + pa_smoother_free(u->smoother); + pa_xfree(u->device_name); pa_xfree(u); -- cgit From 601fb63b0160d3d76083d07dcc1201a123031915 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Fri, 21 Aug 2009 13:18:40 +1000 Subject: Solaris: fixed latency (resent) Set a fixed latency based on the given buffer size, which is constrained to the 128 KB limit on buffered writes. Also fix an error path. --- src/modules/module-solaris.c | 56 ++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 26 deletions(-) (limited to 'src') diff --git a/src/modules/module-solaris.c b/src/modules/module-solaris.c index 3bf7c4b0..71f14071 100644 --- a/src/modules/module-solaris.c +++ b/src/modules/module-solaris.c @@ -136,6 +136,9 @@ static const char* const valid_modargs[] = { #define MAX_RENDER_HZ (300) /* This render rate limit imposes a minimum latency, but without it we waste too much CPU time. */ +#define MAX_BUFFER_SIZE (128 * 1024) +/* An attempt to buffer more than 128 KB causes write() to fail with errno == EAGAIN. */ + static uint64_t get_playback_buffered_bytes(struct userdata *u) { audio_info_t info; uint64_t played_bytes; @@ -651,6 +654,7 @@ static void thread_func(void *userdata) { void *p; ssize_t w; size_t len; + int write_type = 1; /* * Since we cannot modify the size of the output buffer we fake it @@ -668,38 +672,31 @@ static void thread_func(void *userdata) { break; if (u->memchunk.length < len) - pa_sink_render(u->sink, u->sink->thread_info.max_request, &u->memchunk); + pa_sink_render(u->sink, len - u->memchunk.length, &u->memchunk); + + len = PA_MIN(u->memchunk.length, len); p = pa_memblock_acquire(u->memchunk.memblock); - w = pa_write(u->fd, (uint8_t*) p + u->memchunk.index, u->memchunk.length, NULL); + w = pa_write(u->fd, (uint8_t*) p + u->memchunk.index, len, &write_type); pa_memblock_release(u->memchunk.memblock); if (w <= 0) { - switch (errno) { - case EINTR: - continue; - case EAGAIN: - /* If the buffer_size is too big, we get EAGAIN. Avoiding that limit by trial and error - * is not ideal, but I don't know how to get the system to tell me what the limit is. - */ - u->buffer_size = u->buffer_size * 18 / 25; - u->buffer_size -= u->buffer_size % u->frame_size; - u->buffer_size = PA_MAX(u->buffer_size, 2 * u->minimum_request); - pa_sink_set_max_request_within_thread(u->sink, u->buffer_size); - pa_sink_set_max_rewind_within_thread(u->sink, u->buffer_size); - pa_log("EAGAIN. Buffer size is now %u bytes (%llu buffered)", u->buffer_size, buffered_bytes); - break; - default: - pa_log("Failed to write data to DSP: %s", pa_cstrerror(errno)); - goto fail; + if (errno == EINTR) { + continue; + } else if (errno == EAGAIN) { + /* We may have realtime priority so yield the CPU to ensure that fd can become writable again. */ + pa_log_debug("EAGAIN with %llu bytes buffered.", buffered_bytes); + break; + } else { + pa_log("Failed to write data to DSP: %s", pa_cstrerror(errno)); + goto fail; } } else { pa_assert(w % u->frame_size == 0); u->written_bytes += w; - u->memchunk.length -= w; - u->memchunk.index += w; + u->memchunk.length -= w; if (u->memchunk.length <= 0) { pa_memblock_unref(u->memchunk.memblock); pa_memchunk_reset(&u->memchunk); @@ -830,7 +827,7 @@ int pa__init(pa_module *m) { pa_channel_map map; pa_modargs *ma = NULL; uint32_t buffer_length_msec; - int fd; + int fd = -1; pa_sink_new_data sink_new_data; pa_source_new_data source_new_data; char const *name; @@ -882,7 +879,13 @@ int pa__init(pa_module *m) { } u->buffer_size = pa_usec_to_bytes(1000 * buffer_length_msec, &ss); if (u->buffer_size < 2 * u->minimum_request) { - pa_log("supplied buffer size argument is too small"); + pa_log("buffer_length argument cannot be smaller than %u", + (unsigned)(pa_bytes_to_usec(2 * u->minimum_request, &ss) / 1000)); + goto fail; + } + if (u->buffer_size > MAX_BUFFER_SIZE) { + pa_log("buffer_length argument cannot be greater than %u", + (unsigned)(pa_bytes_to_usec(MAX_BUFFER_SIZE, &ss) / 1000)); goto fail; } @@ -945,6 +948,7 @@ int pa__init(pa_module *m) { pa_source_set_asyncmsgq(u->source, u->thread_mq.inq); pa_source_set_rtpoll(u->source, u->rtpoll); + pa_source_set_fixed_latency(u->source, pa_bytes_to_usec(u->buffer_size, &u->source->sample_spec)); u->source->get_volume = source_get_volume; u->source->set_volume = source_set_volume; @@ -987,15 +991,15 @@ int pa__init(pa_module *m) { pa_sink_set_asyncmsgq(u->sink, u->thread_mq.inq); pa_sink_set_rtpoll(u->sink, u->rtpoll); + pa_sink_set_fixed_latency(u->sink, pa_bytes_to_usec(u->buffer_size, &u->sink->sample_spec)); + pa_sink_set_max_request(u->sink, u->buffer_size); + pa_sink_set_max_rewind(u->sink, u->buffer_size); u->sink->get_volume = sink_get_volume; u->sink->set_volume = sink_set_volume; u->sink->get_mute = sink_get_mute; u->sink->set_mute = sink_set_mute; u->sink->refresh_volume = u->sink->refresh_muted = TRUE; - - pa_sink_set_max_request(u->sink, u->buffer_size); - pa_sink_set_max_rewind(u->sink, u->buffer_size); } else u->sink = NULL; -- cgit From 17dc410e8874d38ce7a9882245360314a8251e06 Mon Sep 17 00:00:00 2001 From: Ted Percival Date: Fri, 21 Aug 2009 17:05:41 -0600 Subject: core: Remove wrong doc on how to free returned data --- src/pulsecore/usergroup.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src') diff --git a/src/pulsecore/usergroup.c b/src/pulsecore/usergroup.c index bf686b77..71b13bca 100644 --- a/src/pulsecore/usergroup.c +++ b/src/pulsecore/usergroup.c @@ -128,7 +128,6 @@ static int expand_buffer_trashcontents(void **bufptr, size_t *buflenptr) { API is the same as getgrgid(), errors are indicated by a NULL return; consult errno for the error cause (zero it before calling). - The returned value must be freed using pa_xfree(). */ struct group *pa_getgrgid_malloc(gid_t gid) { size_t buflen, getgr_buflen; @@ -190,7 +189,6 @@ void pa_getgrgid_free(struct group *grp) { API is the same as getgrnam(), errors are indicated by a NULL return; consult errno for the error cause (zero it before calling). - The returned value must be freed using pa_xfree(). */ struct group *pa_getgrnam_malloc(const char *name) { size_t buflen, getgr_buflen; @@ -256,7 +254,6 @@ void pa_getgrnam_free(struct group *group) { API is the same as getpwnam(), errors are indicated by a NULL return; consult errno for the error cause (zero it before calling). - The returned value must be freed using pa_xfree(). */ struct passwd *pa_getpwnam_malloc(const char *name) { size_t buflen, getpw_buflen; @@ -318,7 +315,6 @@ void pa_getpwnam_free(struct passwd *passwd) { API is the same as getpwuid(), errors are indicated by a NULL return; consult errno for the error cause (zero it before calling). - The returned value must be freed using pa_xfree(). */ struct passwd *pa_getpwuid_malloc(uid_t uid) { size_t buflen, getpw_buflen; -- cgit From aa5429805f88bcade639e5e8667b79a07ec104b6 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sat, 22 Aug 2009 03:16:47 +0200 Subject: daemon: don't free script_commands twice --- src/daemon/cmdline.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'src') diff --git a/src/daemon/cmdline.c b/src/daemon/cmdline.c index 3ebc9270..f6cdcdc8 100644 --- a/src/daemon/cmdline.c +++ b/src/daemon/cmdline.c @@ -385,11 +385,6 @@ int pa_cmdline_parse(pa_daemon_conf *conf, int argc, char *const argv [], int *d pa_xfree(conf->script_commands); conf->script_commands = pa_strbuf_tostring_free(buf); - if (conf->script_commands) { - pa_xfree(conf->script_commands); - conf->script_commands = NULL; - } - *d = optind; return 0; -- cgit From 2595b9d98569e62f4d375ced1d3f7c7af34efa74 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sat, 22 Aug 2009 03:17:24 +0200 Subject: add usergroup-test to .gitignore --- src/.gitignore | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/.gitignore b/src/.gitignore index 82331524..6cd173c0 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -1,3 +1,4 @@ +usergroup-test sigbus-test TAGS alsa-time-test -- cgit From 5b0683d6cd103a7a91bc2e88bcc9f77750d10c25 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sat, 22 Aug 2009 04:03:31 +0200 Subject: ladspa/remap: handle failing stream moves properly --- src/modules/module-ladspa-sink.c | 7 +++++-- src/modules/module-remap-sink.c | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/modules/module-ladspa-sink.c b/src/modules/module-ladspa-sink.c index 233f90c4..aa28f7fd 100644 --- a/src/modules/module-ladspa-sink.c +++ b/src/modules/module-ladspa-sink.c @@ -418,8 +418,11 @@ static void sink_input_moving_cb(pa_sink_input *i, pa_sink *dest) { pa_sink_input_assert_ref(i); pa_assert_se(u = i->userdata); - pa_sink_set_asyncmsgq(u->sink, dest->asyncmsgq); - pa_sink_update_flags(u->sink, PA_SINK_LATENCY|PA_SINK_DYNAMIC_LATENCY, dest->flags); + if (dest) { + pa_sink_set_asyncmsgq(u->sink, dest->asyncmsgq); + pa_sink_update_flags(u->sink, PA_SINK_LATENCY|PA_SINK_DYNAMIC_LATENCY, dest->flags); + } else + pa_sink_set_asyncmsgq(u->sink, NULL); } /* Called from main context */ diff --git a/src/modules/module-remap-sink.c b/src/modules/module-remap-sink.c index 0b4fdc9b..becff55c 100644 --- a/src/modules/module-remap-sink.c +++ b/src/modules/module-remap-sink.c @@ -302,8 +302,11 @@ static void sink_input_moving_cb(pa_sink_input *i, pa_sink *dest) { pa_sink_input_assert_ref(i); pa_assert_se(u = i->userdata); - pa_sink_set_asyncmsgq(u->sink, dest->asyncmsgq); - pa_sink_update_flags(u->sink, PA_SINK_LATENCY|PA_SINK_DYNAMIC_LATENCY, dest->flags); + if (dest) { + pa_sink_set_asyncmsgq(u->sink, dest->asyncmsgq); + pa_sink_update_flags(u->sink, PA_SINK_LATENCY|PA_SINK_DYNAMIC_LATENCY, dest->flags); + } else + pa_sink_set_asyncmsgq(u->sink, NULL); } int pa__init(pa_module*m) { -- cgit From 1a05d67f07fb4bfa6e419791cf5609d608f536cd Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sat, 22 Aug 2009 04:04:23 +0200 Subject: core: relex validity checks when destructing half-set up source outputs/sink inputs --- src/pulsecore/sink-input.c | 5 ++++- src/pulsecore/source-output.c | 2 -- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/pulsecore/sink-input.c b/src/pulsecore/sink-input.c index 4137a425..0ad95e6f 100644 --- a/src/pulsecore/sink-input.c +++ b/src/pulsecore/sink-input.c @@ -487,7 +487,10 @@ static void sink_input_free(pa_object *o) { pa_log_info("Freeing input %u \"%s\"", i->index, pa_strnull(pa_proplist_gets(i->proplist, PA_PROP_MEDIA_NAME))); - pa_assert(!i->thread_info.attached); + /* Side note: this function must be able to destruct properly any + * kind of sink input in any state, even those which are + * "half-moved" or are connected to sinks that have no asyncmsgq + * and are hence half-destructed themselves! */ if (i->thread_info.render_memblockq) pa_memblockq_free(i->thread_info.render_memblockq); diff --git a/src/pulsecore/source-output.c b/src/pulsecore/source-output.c index b0298616..43733400 100644 --- a/src/pulsecore/source-output.c +++ b/src/pulsecore/source-output.c @@ -359,8 +359,6 @@ static void source_output_free(pa_object* mo) { pa_log_info("Freeing output %u \"%s\"", o->index, pa_strnull(pa_proplist_gets(o->proplist, PA_PROP_MEDIA_NAME))); - pa_assert(!o->thread_info.attached); - if (o->thread_info.delay_memblockq) pa_memblockq_free(o->thread_info.delay_memblockq); -- cgit From 560da5b0a1386c2a158d69b6ba0ef99c0f03bbf4 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sat, 22 Aug 2009 21:59:17 +0200 Subject: udev: process all inotify events queued up, not just the first one in the queue --- src/modules/module-udev-detect.c | 56 ++++++++++++++++++++++++++++------------ 1 file changed, 39 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/modules/module-udev-detect.c b/src/modules/module-udev-detect.c index 1d67c0cc..55136348 100644 --- a/src/modules/module-udev-detect.c +++ b/src/modules/module-udev-detect.c @@ -325,6 +325,7 @@ static void inotify_cb( for (;;) { ssize_t r; + struct inotify_event *event; pa_zero(buf); if ((r = pa_read(fd, &buf, sizeof(buf), &type)) <= 0) { @@ -336,23 +337,44 @@ static void inotify_cb( goto fail; } - /* From udev we get the guarantee that the control - * device's ACL is changes last. To avoid races when ACLs - * are changed we hence watch only the control device */ - if (((buf.e.mask & IN_ATTRIB) && pa_startswith(buf.e.name, "controlC"))) - PA_HASHMAP_FOREACH(d, u->devices, state) - if (control_node_belongs_to_device(d, buf.e.name)) - d->need_verify = TRUE; - - /* ALSA doesn't really give us any guarantee on the closing - * order, so let's simply hope */ - if (((buf.e.mask & IN_CLOSE_WRITE) && pa_startswith(buf.e.name, "pcmC"))) - PA_HASHMAP_FOREACH(d, u->devices, state) - if (pcm_node_belongs_to_device(d, buf.e.name)) - d->need_verify = TRUE; - - if ((buf.e.mask & (IN_DELETE_SELF|IN_MOVE_SELF))) - deleted = TRUE; + event = &buf.e; + while (r > 0) { + size_t len; + + if ((size_t) r < sizeof(struct inotify_event)) { + pa_log("read() too short."); + goto fail; + } + + len = sizeof(struct inotify_event) + event->len; + + if ((size_t) r < len) { + pa_log("Payload missing."); + goto fail; + } + + /* From udev we get the guarantee that the control + * device's ACL is changed last. To avoid races when ACLs + * are changed we hence watch only the control device */ + if (((event->mask & IN_ATTRIB) && pa_startswith(event->name, "controlC"))) + PA_HASHMAP_FOREACH(d, u->devices, state) + if (control_node_belongs_to_device(d, event->name)) + d->need_verify = TRUE; + + /* ALSA doesn't really give us any guarantee on the closing + * order, so let's simply hope */ + if (((event->mask & IN_CLOSE_WRITE) && pa_startswith(event->name, "pcmC"))) + PA_HASHMAP_FOREACH(d, u->devices, state) + if (pcm_node_belongs_to_device(d, event->name)) + d->need_verify = TRUE; + + /* /dev/snd/ might have been removed */ + if ((event->mask & (IN_DELETE_SELF|IN_MOVE_SELF))) + deleted = TRUE; + + event = (struct inotify_event*) ((uint8_t*) event + len); + r -= len; + } } PA_HASHMAP_FOREACH(d, u->devices, state) -- cgit From d6fb8d10819bebc1cee203de7330cceeafde9fed Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sat, 22 Aug 2009 23:10:45 +0200 Subject: udev: check busy status of alsa cards before loading alsa modules and hence initiating profile probing --- src/modules/module-udev-detect.c | 149 ++++++++++++++++++++++++++++++++++----- 1 file changed, 132 insertions(+), 17 deletions(-) (limited to 'src') diff --git a/src/modules/module-udev-detect.c b/src/modules/module-udev-detect.c index 55136348..b41b9c0f 100644 --- a/src/modules/module-udev-detect.c +++ b/src/modules/module-udev-detect.c @@ -25,6 +25,7 @@ #include #include +#include #include #include @@ -45,8 +46,7 @@ PA_MODULE_USAGE( struct device { char *path; - pa_bool_t accessible:1; - pa_bool_t need_verify:1; + pa_bool_t need_verify; char *card_name; char *args; uint32_t module; @@ -99,34 +99,150 @@ static const char *path_get_card_id(const char *path) { return e + 5; } +static pa_bool_t is_card_busy(const char *id) { + char *card_path = NULL, *pcm_path = NULL, *sub_status = NULL; + DIR *card_dir = NULL, *pcm_dir = NULL; + FILE *status_file = NULL; + size_t len; + struct dirent *space = NULL, *de; + pa_bool_t busy = FALSE; + int r; + + pa_assert(id); + + card_path = pa_sprintf_malloc("/proc/asound/card%s", id); + + if (!(card_dir = opendir(card_path))) { + pa_log_warn("Failed to open %s: %s", card_path, pa_cstrerror(errno)); + goto fail; + } + + len = offsetof(struct dirent, d_name) + fpathconf(dirfd(card_dir), _PC_NAME_MAX) + 1; + space = pa_xmalloc(len); + + for (;;) { + de = NULL; + + if ((r = readdir_r(card_dir, space, &de)) != 0) { + pa_log_warn("readdir_r() failed: %s", pa_cstrerror(r)); + goto fail; + } + + if (!de) + break; + + if (!pa_startswith(de->d_name, "pcm")) + continue; + + pa_xfree(pcm_path); + pcm_path = pa_sprintf_malloc("%s/%s", card_path, de->d_name); + + if (pcm_dir) + closedir(pcm_dir); + + if (!(pcm_dir = opendir(pcm_path))) { + pa_log_warn("Failed to open %s: %s", pcm_path, pa_cstrerror(errno)); + continue; + } + + for (;;) { + char line[32]; + + if ((r = readdir_r(pcm_dir, space, &de)) != 0) { + pa_log_warn("readdir_r() failed: %s", pa_cstrerror(r)); + goto fail; + } + + if (!de) + break; + + if (!pa_startswith(de->d_name, "sub")) + continue; + + pa_xfree(sub_status); + sub_status = pa_sprintf_malloc("%s/%s/status", pcm_path, de->d_name); + + if (status_file) + fclose(status_file); + + if (!(status_file = fopen(sub_status, "r"))) { + pa_log_warn("Failed to open %s: %s", sub_status, pa_cstrerror(errno)); + continue; + } + + if (!(fgets(line, sizeof(line)-1, status_file))) { + pa_log_warn("Failed to read from %s: %s", sub_status, pa_cstrerror(errno)); + continue; + } + + if (!pa_streq(line, "closed\n")) { + busy = TRUE; + break; + } + } + } + +fail: + + pa_xfree(card_path); + pa_xfree(pcm_path); + pa_xfree(sub_status); + pa_xfree(space); + + if (card_dir) + closedir(card_dir); + + if (pcm_dir) + closedir(pcm_dir); + + if (status_file) + fclose(status_file); + + return busy; +} + static void verify_access(struct userdata *u, struct device *d) { char *cd; pa_card *card; + pa_bool_t accessible; pa_assert(u); pa_assert(d); cd = pa_sprintf_malloc("%s/snd/controlC%s", udev_get_dev_path(u->udev), path_get_card_id(d->path)); - d->accessible = access(cd, R_OK|W_OK) >= 0; + accessible = access(cd, R_OK|W_OK) >= 0; + pa_log_debug("%s is accessible: %s", cd, pa_yes_no(accessible)); - pa_log_info("%s is accessible: %s", cd, pa_yes_no(d->accessible)); pa_xfree(cd); if (d->module == PA_INVALID_INDEX) { - /* If we not loaded, try to load */ + /* If we are not loaded, try to load */ - if (d->accessible) { + if (accessible) { pa_module *m; - - pa_log_debug("Loading module-alsa-card with arguments '%s'", d->args); - m = pa_module_load(u->core, "module-alsa-card", d->args); - - if (m) { - d->module = m->index; - pa_log_info("Card %s (%s) module loaded.", d->path, d->card_name); - } else - pa_log_info("Card %s (%s) failed to load module.", d->path, d->card_name); + pa_bool_t busy; + + /* Check if any of the PCM devices that belong to this + * card are currently busy. If they are, don't try to load + * right now, to make sure the probing phase can + * successfully complete. When the current user of the + * device closes it we will get another notification via + * inotify and can then recheck. */ + + busy = is_card_busy(path_get_card_id(d->path)); + pa_log_debug("%s is busy: %s", d->path, pa_yes_no(busy)); + + if (!busy) { + pa_log_debug("Loading module-alsa-card with arguments '%s'", d->args); + m = pa_module_load(u->core, "module-alsa-card", d->args); + + if (m) { + d->module = m->index; + pa_log_info("Card %s (%s) module loaded.", d->path, d->card_name); + } else + pa_log_info("Card %s (%s) failed to load module.", d->path, d->card_name); + } } } else { @@ -135,7 +251,7 @@ static void verify_access(struct userdata *u, struct device *d) { * accessible boolean */ if ((card = pa_namereg_get(u->core, d->card_name, PA_NAMEREG_CARD))) - pa_card_suspend(card, !d->accessible, PA_SUSPEND_SESSION); + pa_card_suspend(card, !accessible, PA_SUSPEND_SESSION); } } @@ -160,7 +276,6 @@ static void card_changed(struct userdata *u, struct udev_device *dev) { d = pa_xnew0(struct device, 1); d->path = pa_xstrdup(path); - d->accessible = TRUE; d->module = PA_INVALID_INDEX; if (!(t = udev_device_get_property_value(dev, "PULSE_NAME"))) -- cgit From a0f01ddc951694e1d13f44dc3a5d0d3fb2daa142 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sun, 23 Aug 2009 21:49:37 +0200 Subject: port a few things over to use xmalloc and friends instead of low-level libc malloc/free directly --- src/pulsecore/core-util.c | 2 +- src/pulsecore/cpu-arm.c | 21 ++++++++++----------- 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'src') diff --git a/src/pulsecore/core-util.c b/src/pulsecore/core-util.c index 0eb32cc4..1c8c6780 100644 --- a/src/pulsecore/core-util.c +++ b/src/pulsecore/core-util.c @@ -2223,7 +2223,7 @@ int pa_close_all(int except_fd, ...) { va_end(ap); r = pa_close_allv(p); - free(p); + pa_xfree(p); return r; } diff --git a/src/pulsecore/cpu-arm.c b/src/pulsecore/cpu-arm.c index 5a994b71..453b7848 100644 --- a/src/pulsecore/cpu-arm.c +++ b/src/pulsecore/cpu-arm.c @@ -2,7 +2,7 @@ This file is part of PulseAudio. Copyright 2004-2006 Lennart Poettering - Copyright 2009 Wim Taymans + Copyright 2009 Wim Taymans PulseAudio is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published @@ -60,21 +60,20 @@ static char *get_cpuinfo(void) { char *cpuinfo; int n, fd; - if (!(cpuinfo = malloc(MAX_BUFFER))) - return NULL; + cpuinfo = pa_xmalloc(MAX_BUFFER); if ((fd = open("/proc/cpuinfo", O_RDONLY)) < 0) { - free (cpuinfo); + pa_xfree(cpuinfo); return NULL; } - if ((n = read(fd, cpuinfo, MAX_BUFFER-1)) < 0) { - free (cpuinfo); - close (fd); + if ((n = pa_read(fd, cpuinfo, MAX_BUFFER-1)) < 0) { + pa_xfree(cpuinfo); + pa_close(fd); return NULL; } cpuinfo[n] = 0; - close (fd); + pa_close(fd); return cpuinfo; } @@ -102,7 +101,7 @@ void pa_cpu_init_arm (void) { if (arch >= 7) flags |= PA_CPU_ARM_V7; - free (line); + pa_xfree(line); } /* get the CPU features */ if ((line = get_cpuinfo_line (cpuinfo, "Features"))) { @@ -118,10 +117,10 @@ void pa_cpu_init_arm (void) { else if (!strcmp (current, "vfpv3")) flags |= PA_CPU_ARM_VFPV3; - free (current); + pa_xfree(current); } } - free (cpuinfo); + pa_xfree(cpuinfo); pa_log_info ("CPU flags: %s%s%s%s%s%s", (flags & PA_CPU_ARM_V6) ? "V6 " : "", -- cgit From 80c693730365c1a375a5c0e781f38e7f165b37bf Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Sun, 23 Aug 2009 22:34:42 +0200 Subject: alsa: increase interval between smoother updates exponentially for alsa sources, following the scheme for sinks --- src/modules/alsa/alsa-sink.c | 8 +++----- src/modules/alsa/alsa-source.c | 20 ++++++++++++++++++-- 2 files changed, 21 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/modules/alsa/alsa-sink.c b/src/modules/alsa/alsa-sink.c index c3694729..b99ed782 100644 --- a/src/modules/alsa/alsa-sink.c +++ b/src/modules/alsa/alsa-sink.c @@ -733,10 +733,9 @@ static void update_smoother(struct userdata *u) { now1 = pa_rtclock_now(); /* check if the time since the last update is bigger than the interval */ - if (u->last_smoother_update > 0) { + if (u->last_smoother_update > 0) if (u->last_smoother_update + u->smoother_interval > now1) return; - } position = (int64_t) u->write_count - ((int64_t) delay * (int64_t) u->frame_size); @@ -745,11 +744,11 @@ static void update_smoother(struct userdata *u) { now2 = pa_bytes_to_usec((uint64_t) position, &u->sink->sample_spec); + pa_smoother_put(u->smoother, now1, now2); + u->last_smoother_update = now1; /* exponentially increase the update interval up to the MAX limit */ u->smoother_interval = PA_MIN (u->smoother_interval * 2, SMOOTHER_MAX_INTERVAL); - - pa_smoother_put(u->smoother, now1, now2); } static pa_usec_t sink_get_latency(struct userdata *u) { @@ -927,7 +926,6 @@ static int unsuspend(struct userdata *u) { u->first = TRUE; u->since_start = 0; - pa_log_info("Resumed successfully..."); return 0; diff --git a/src/modules/alsa/alsa-source.c b/src/modules/alsa/alsa-source.c index 7da37553..336027a2 100644 --- a/src/modules/alsa/alsa-source.c +++ b/src/modules/alsa/alsa-source.c @@ -65,6 +65,9 @@ #define TSCHED_MIN_SLEEP_USEC (10*PA_USEC_PER_MSEC) /* 10ms */ #define TSCHED_MIN_WAKEUP_USEC (4*PA_USEC_PER_MSEC) /* 4ms */ +#define SMOOTHER_MIN_INTERVAL (2*PA_USEC_PER_MSEC) /* 2ms */ +#define SMOOTHER_MAX_INTERVAL (200*PA_USEC_PER_MSEC) /* 200ms */ + #define VOLUME_ACCURACY (PA_VOLUME_NORM/100) struct userdata { @@ -108,6 +111,8 @@ struct userdata { pa_smoother *smoother; uint64_t read_count; + pa_usec_t smoother_interval; + pa_usec_t last_smoother_update; pa_reserve_wrapper *reserve; pa_hook_slot *reserve_slot; @@ -691,15 +696,23 @@ static void update_smoother(struct userdata *u) { now1 = pa_timespec_load(&htstamp); } - position = u->read_count + ((uint64_t) delay * (uint64_t) u->frame_size); - /* Hmm, if the timestamp is 0, then it wasn't set and we take the current time */ if (now1 <= 0) now1 = pa_rtclock_now(); + /* check if the time since the last update is bigger than the interval */ + if (u->last_smoother_update > 0) + if (u->last_smoother_update + u->smoother_interval > now1) + return; + + position = u->read_count + ((uint64_t) delay * (uint64_t) u->frame_size); now2 = pa_bytes_to_usec(position, &u->source->sample_spec); pa_smoother_put(u->smoother, now1, now2); + + u->last_smoother_update = now1; + /* exponentially increase the update interval up to the MAX limit */ + u->smoother_interval = PA_MIN (u->smoother_interval * 2, SMOOTHER_MAX_INTERVAL); } static pa_usec_t source_get_latency(struct userdata *u) { @@ -862,6 +875,8 @@ static int unsuspend(struct userdata *u) { u->read_count = 0; pa_smoother_reset(u->smoother, pa_rtclock_now(), TRUE); + u->smoother_interval = SMOOTHER_MIN_INTERVAL; + u->last_smoother_update = 0; pa_log_info("Resumed successfully..."); @@ -1469,6 +1484,7 @@ pa_source *pa_alsa_source_new(pa_module *m, pa_modargs *ma, const char*driver, p 5, pa_rtclock_now(), FALSE); + u->smoother_interval = SMOOTHER_MIN_INTERVAL; dev_id = pa_modargs_get_value( ma, "device_id", -- cgit From 050a3a99e1d151b4f55c89f82073ef33f3399646 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Mon, 24 Aug 2009 03:26:56 +0200 Subject: alsa: automatically decrease watermark after a time of stability --- src/modules/alsa/alsa-sink.c | 146 +++++++++++++++++++++++++++++++---------- src/modules/alsa/alsa-source.c | 145 ++++++++++++++++++++++++++++++---------- src/pulsecore/rtpoll.c | 28 ++++---- src/pulsecore/rtpoll.h | 4 ++ 4 files changed, 240 insertions(+), 83 deletions(-) (limited to 'src') diff --git a/src/modules/alsa/alsa-sink.c b/src/modules/alsa/alsa-sink.c index b99ed782..07d53880 100644 --- a/src/modules/alsa/alsa-sink.c +++ b/src/modules/alsa/alsa-sink.c @@ -62,14 +62,21 @@ /* #define DEBUG_TIMING */ #define DEFAULT_DEVICE "default" -#define DEFAULT_TSCHED_BUFFER_USEC (2*PA_USEC_PER_SEC) /* 2s -- Overall buffer size */ -#define DEFAULT_TSCHED_WATERMARK_USEC (20*PA_USEC_PER_MSEC) /* 20ms -- Fill up when only this much is left in the buffer */ -#define TSCHED_WATERMARK_STEP_USEC (10*PA_USEC_PER_MSEC) /* 10ms -- On underrun, increase watermark by this */ -#define TSCHED_MIN_SLEEP_USEC (10*PA_USEC_PER_MSEC) /* 10ms -- Sleep at least 10ms on each iteration */ -#define TSCHED_MIN_WAKEUP_USEC (4*PA_USEC_PER_MSEC) /* 4ms -- Wakeup at least this long before the buffer runs empty*/ -#define SMOOTHER_MIN_INTERVAL (2*PA_USEC_PER_MSEC) /* 2ms -- min smoother update interval */ -#define SMOOTHER_MAX_INTERVAL (200*PA_USEC_PER_MSEC) /* 200ms -- max smoother update inteval */ +#define DEFAULT_TSCHED_BUFFER_USEC (2*PA_USEC_PER_SEC) /* 2s -- Overall buffer size */ +#define DEFAULT_TSCHED_WATERMARK_USEC (20*PA_USEC_PER_MSEC) /* 20ms -- Fill up when only this much is left in the buffer */ + +#define TSCHED_WATERMARK_INC_STEP_USEC (10*PA_USEC_PER_MSEC) /* 10ms -- On underrun, increase watermark by this */ +#define TSCHED_WATERMARK_DEC_STEP_USEC (5*PA_USEC_PER_MSEC) /* 5ms -- When everything's great, decrease watermark by this */ +#define TSCHED_WATERMARK_VERIFY_AFTER_USEC (20*PA_USEC_PER_SEC) /* 20s -- How long after a drop out recheck if things are good now */ +#define TSCHED_WATERMARK_INC_THRESHOLD_USEC (1*PA_USEC_PER_MSEC) /* 3ms -- If the buffer level ever below this theshold, increase the watermark */ +#define TSCHED_WATERMARK_DEC_THRESHOLD_USEC (100*PA_USEC_PER_MSEC) /* 100ms -- If the buffer level didn't drop below this theshold in the verification time, decrease the watermark */ + +#define TSCHED_MIN_SLEEP_USEC (10*PA_USEC_PER_MSEC) /* 10ms -- Sleep at least 10ms on each iteration */ +#define TSCHED_MIN_WAKEUP_USEC (4*PA_USEC_PER_MSEC) /* 4ms -- Wakeup at least this long before the buffer runs empty*/ + +#define SMOOTHER_MIN_INTERVAL (2*PA_USEC_PER_MSEC) /* 2ms -- min smoother update interval */ +#define SMOOTHER_MAX_INTERVAL (200*PA_USEC_PER_MSEC) /* 200ms -- max smoother update inteval */ #define VOLUME_ACCURACY (PA_VOLUME_NORM/100) /* don't require volume adjustments to be perfectly correct. don't necessarily extend granularity in software unless the differences get greater than this level */ @@ -99,7 +106,12 @@ struct userdata { hwbuf_unused, min_sleep, min_wakeup, - watermark_step; + watermark_inc_step, + watermark_dec_step, + watermark_inc_threshold, + watermark_dec_threshold; + + pa_usec_t watermark_dec_not_before; unsigned nfragments; pa_memchunk memchunk; @@ -248,6 +260,7 @@ static void fix_min_sleep_wakeup(struct userdata *u) { size_t max_use, max_use_2; pa_assert(u); + pa_assert(u->use_tsched); max_use = u->hwbuf_size - u->hwbuf_unused; max_use_2 = pa_frame_align(max_use/2, &u->sink->sample_spec); @@ -262,6 +275,7 @@ static void fix_min_sleep_wakeup(struct userdata *u) { static void fix_tsched_watermark(struct userdata *u) { size_t max_use; pa_assert(u); + pa_assert(u->use_tsched); max_use = u->hwbuf_size - u->hwbuf_unused; @@ -272,7 +286,7 @@ static void fix_tsched_watermark(struct userdata *u) { u->tsched_watermark = u->min_wakeup; } -static void adjust_after_underrun(struct userdata *u) { +static void increase_watermark(struct userdata *u) { size_t old_watermark; pa_usec_t old_min_latency, new_min_latency; @@ -281,31 +295,64 @@ static void adjust_after_underrun(struct userdata *u) { /* First, just try to increase the watermark */ old_watermark = u->tsched_watermark; - u->tsched_watermark = PA_MIN(u->tsched_watermark * 2, u->tsched_watermark + u->watermark_step); + u->tsched_watermark = PA_MIN(u->tsched_watermark * 2, u->tsched_watermark + u->watermark_inc_step); fix_tsched_watermark(u); if (old_watermark != u->tsched_watermark) { - pa_log_notice("Increasing wakeup watermark to %0.2f ms", - (double) pa_bytes_to_usec(u->tsched_watermark, &u->sink->sample_spec) / PA_USEC_PER_MSEC); + pa_log_info("Increasing wakeup watermark to %0.2f ms", + (double) pa_bytes_to_usec(u->tsched_watermark, &u->sink->sample_spec) / PA_USEC_PER_MSEC); return; } /* Hmm, we cannot increase the watermark any further, hence let's raise the latency */ old_min_latency = u->sink->thread_info.min_latency; - new_min_latency = PA_MIN(old_min_latency * 2, old_min_latency + TSCHED_WATERMARK_STEP_USEC); + new_min_latency = PA_MIN(old_min_latency * 2, old_min_latency + TSCHED_WATERMARK_INC_STEP_USEC); new_min_latency = PA_MIN(new_min_latency, u->sink->thread_info.max_latency); if (old_min_latency != new_min_latency) { - pa_log_notice("Increasing minimal latency to %0.2f ms", - (double) new_min_latency / PA_USEC_PER_MSEC); + pa_log_info("Increasing minimal latency to %0.2f ms", + (double) new_min_latency / PA_USEC_PER_MSEC); pa_sink_set_latency_range_within_thread(u->sink, new_min_latency, u->sink->thread_info.max_latency); - return; } /* When we reach this we're officialy fucked! */ } +static void decrease_watermark(struct userdata *u) { + size_t old_watermark; + pa_usec_t now; + + pa_assert(u); + pa_assert(u->use_tsched); + + now = pa_rtclock_now(); + + if (u->watermark_dec_not_before <= 0) + goto restart; + + if (u->watermark_dec_not_before > now) + return; + + old_watermark = u->tsched_watermark; + + if (u->tsched_watermark < u->watermark_dec_step) + u->tsched_watermark = u->tsched_watermark / 2; + else + u->tsched_watermark = PA_MAX(u->tsched_watermark / 2, u->tsched_watermark - u->watermark_dec_step); + + fix_tsched_watermark(u); + + if (old_watermark != u->tsched_watermark) + pa_log_info("Decreasing wakeup watermark to %0.2f ms", + (double) pa_bytes_to_usec(u->tsched_watermark, &u->sink->sample_spec) / PA_USEC_PER_MSEC); + + /* We don't change the latency range*/ + +restart: + u->watermark_dec_not_before = now + TSCHED_WATERMARK_VERIFY_AFTER_USEC; +} + static void hw_sleep_time(struct userdata *u, pa_usec_t *sleep_usec, pa_usec_t*process_usec) { pa_usec_t usec, wm; @@ -313,6 +360,7 @@ static void hw_sleep_time(struct userdata *u, pa_usec_t *sleep_usec, pa_usec_t*p pa_assert(process_usec); pa_assert(u); + pa_assert(u->use_tsched); usec = pa_sink_get_requested_latency_within_thread(u->sink); @@ -360,7 +408,7 @@ static int try_recover(struct userdata *u, const char *call, int err) { return 0; } -static size_t check_left_to_play(struct userdata *u, size_t n_bytes) { +static size_t check_left_to_play(struct userdata *u, size_t n_bytes, pa_bool_t on_timeout) { size_t left_to_play; /* We use <= instead of < for this check here because an underrun @@ -368,34 +416,55 @@ static size_t check_left_to_play(struct userdata *u, size_t n_bytes) { * it is removed from the buffer. This is particularly important * when block transfer is used. */ - if (n_bytes <= u->hwbuf_size) { + if (n_bytes <= u->hwbuf_size) left_to_play = u->hwbuf_size - n_bytes; + else { + + /* We got a dropout. What a mess! */ + left_to_play = 0; #ifdef DEBUG_TIMING - pa_log_debug("%0.2f ms left to play", (double) pa_bytes_to_usec(left_to_play, &u->sink->sample_spec) / PA_USEC_PER_MSEC); + PA_DEBUG_TRAP; #endif - } else { - left_to_play = 0; + if (!u->first && !u->after_rewind) + if (pa_log_ratelimit()) + pa_log_info("Underrun!"); + } #ifdef DEBUG_TIMING - PA_DEBUG_TRAP; + pa_log_debug("%0.2f ms left to play; inc threshold = %0.2f ms; dec threshold = %0.2f ms", + (double) pa_bytes_to_usec(left_to_play, &u->sink->sample_spec) / PA_USEC_PER_MSEC, + (double) pa_bytes_to_usec(u->watermark_inc_threshold, &u->sink->sample_spec) / PA_USEC_PER_MSEC, + (double) pa_bytes_to_usec(u->watermark_dec_threshold, &u->sink->sample_spec) / PA_USEC_PER_MSEC); #endif + if (u->use_tsched) { + pa_bool_t reset_not_before = TRUE; + if (!u->first && !u->after_rewind) { + if (left_to_play < u->watermark_inc_threshold) + increase_watermark(u); + else if (left_to_play > u->watermark_dec_threshold) { + reset_not_before = FALSE; - if (pa_log_ratelimit()) - pa_log_info("Underrun!"); + /* We decrease the watermark only if have actually + * been woken up by a timeout. If something else woke + * us up it's too easy to fulfill the deadlines... */ - if (u->use_tsched) - adjust_after_underrun(u); + if (on_timeout) + decrease_watermark(u); + } } + + if (reset_not_before) + u->watermark_dec_not_before = 0; } return left_to_play; } -static int mmap_write(struct userdata *u, pa_usec_t *sleep_usec, pa_bool_t polled) { +static int mmap_write(struct userdata *u, pa_usec_t *sleep_usec, pa_bool_t polled, pa_bool_t on_timeout) { pa_bool_t work_done = TRUE; pa_usec_t max_sleep_usec = 0, process_usec = 0; size_t left_to_play; @@ -430,7 +499,8 @@ static int mmap_write(struct userdata *u, pa_usec_t *sleep_usec, pa_bool_t polle pa_log_debug("avail: %lu", (unsigned long) n_bytes); #endif - left_to_play = check_left_to_play(u, n_bytes); + left_to_play = check_left_to_play(u, n_bytes, on_timeout); + on_timeout = FALSE; if (u->use_tsched) @@ -565,7 +635,7 @@ static int mmap_write(struct userdata *u, pa_usec_t *sleep_usec, pa_bool_t polle return work_done ? 1 : 0; } -static int unix_write(struct userdata *u, pa_usec_t *sleep_usec, pa_bool_t polled) { +static int unix_write(struct userdata *u, pa_usec_t *sleep_usec, pa_bool_t polled, pa_bool_t on_timeout) { pa_bool_t work_done = FALSE; pa_usec_t max_sleep_usec = 0, process_usec = 0; size_t left_to_play; @@ -591,7 +661,8 @@ static int unix_write(struct userdata *u, pa_usec_t *sleep_usec, pa_bool_t polle } n_bytes = (size_t) n * u->frame_size; - left_to_play = check_left_to_play(u, n_bytes); + left_to_play = check_left_to_play(u, n_bytes, on_timeout); + on_timeout = FALSE; if (u->use_tsched) @@ -1278,15 +1349,16 @@ static void thread_func(void *userdata) { if (PA_SINK_IS_OPENED(u->sink->thread_info.state)) { int work_done; pa_usec_t sleep_usec = 0; + pa_bool_t on_timeout = pa_rtpoll_timer_elapsed(u->rtpoll); if (PA_UNLIKELY(u->sink->thread_info.rewind_requested)) if (process_rewind(u) < 0) goto fail; if (u->use_mmap) - work_done = mmap_write(u, &sleep_usec, revents & POLLOUT); + work_done = mmap_write(u, &sleep_usec, revents & POLLOUT, on_timeout); else - work_done = unix_write(u, &sleep_usec, revents & POLLOUT); + work_done = unix_write(u, &sleep_usec, revents & POLLOUT, on_timeout); if (work_done < 0) goto fail; @@ -1787,7 +1859,6 @@ pa_sink *pa_alsa_sink_new(pa_module *m, pa_modargs *ma, const char*driver, pa_ca u->fragment_size = frag_size = (uint32_t) (period_frames * frame_size); u->nfragments = nfrags; u->hwbuf_size = u->fragment_size * nfrags; - u->tsched_watermark = pa_usec_to_bytes_round_up(pa_bytes_to_usec_round_up(tsched_watermark, &requested_ss), &u->sink->sample_spec); pa_cvolume_mute(&u->hardware_volume, u->sink->sample_spec.channels); pa_log_info("Using %u fragments of size %lu bytes, buffer time is %0.2fms", @@ -1798,7 +1869,13 @@ pa_sink *pa_alsa_sink_new(pa_module *m, pa_modargs *ma, const char*driver, pa_ca pa_sink_set_max_rewind(u->sink, u->hwbuf_size); if (u->use_tsched) { - u->watermark_step = pa_usec_to_bytes(TSCHED_WATERMARK_STEP_USEC, &u->sink->sample_spec); + u->tsched_watermark = pa_usec_to_bytes_round_up(pa_bytes_to_usec_round_up(tsched_watermark, &requested_ss), &u->sink->sample_spec); + + u->watermark_inc_step = pa_usec_to_bytes(TSCHED_WATERMARK_INC_STEP_USEC, &u->sink->sample_spec); + u->watermark_dec_step = pa_usec_to_bytes(TSCHED_WATERMARK_DEC_STEP_USEC, &u->sink->sample_spec); + + u->watermark_inc_threshold = pa_usec_to_bytes_round_up(TSCHED_WATERMARK_INC_THRESHOLD_USEC, &u->sink->sample_spec); + u->watermark_dec_threshold = pa_usec_to_bytes_round_up(TSCHED_WATERMARK_DEC_THRESHOLD_USEC, &u->sink->sample_spec); fix_min_sleep_wakeup(u); fix_tsched_watermark(u); @@ -1812,6 +1889,7 @@ pa_sink *pa_alsa_sink_new(pa_module *m, pa_modargs *ma, const char*driver, pa_ca } else pa_sink_set_fixed_latency(u->sink, pa_bytes_to_usec(u->hwbuf_size, &ss)); + reserve_update(u); if (update_sw_params(u) < 0) diff --git a/src/modules/alsa/alsa-source.c b/src/modules/alsa/alsa-source.c index 336027a2..165b2e3b 100644 --- a/src/modules/alsa/alsa-source.c +++ b/src/modules/alsa/alsa-source.c @@ -59,14 +59,22 @@ /* #define DEBUG_TIMING */ #define DEFAULT_DEVICE "default" -#define DEFAULT_TSCHED_BUFFER_USEC (2*PA_USEC_PER_SEC) /* 2s */ -#define DEFAULT_TSCHED_WATERMARK_USEC (20*PA_USEC_PER_MSEC) /* 20ms */ -#define TSCHED_WATERMARK_STEP_USEC (10*PA_USEC_PER_MSEC) /* 10ms */ -#define TSCHED_MIN_SLEEP_USEC (10*PA_USEC_PER_MSEC) /* 10ms */ -#define TSCHED_MIN_WAKEUP_USEC (4*PA_USEC_PER_MSEC) /* 4ms */ -#define SMOOTHER_MIN_INTERVAL (2*PA_USEC_PER_MSEC) /* 2ms */ -#define SMOOTHER_MAX_INTERVAL (200*PA_USEC_PER_MSEC) /* 200ms */ +#define DEFAULT_TSCHED_BUFFER_USEC (2*PA_USEC_PER_SEC) /* 2s */ +#define DEFAULT_TSCHED_WATERMARK_USEC (20*PA_USEC_PER_MSEC) /* 20ms */ + +#define TSCHED_WATERMARK_INC_STEP_USEC (10*PA_USEC_PER_MSEC) /* 10ms */ +#define TSCHED_WATERMARK_DEC_STEP_USEC (5*PA_USEC_PER_MSEC) /* 5ms */ +#define TSCHED_WATERMARK_VERIFY_AFTER_USEC (20*PA_USEC_PER_SEC) /* 20s */ +#define TSCHED_WATERMARK_INC_THRESHOLD_USEC (1*PA_USEC_PER_MSEC) /* 3ms */ +#define TSCHED_WATERMARK_DEC_THRESHOLD_USEC (100*PA_USEC_PER_MSEC) /* 100ms */ +#define TSCHED_WATERMARK_STEP_USEC (10*PA_USEC_PER_MSEC) /* 10ms */ + +#define TSCHED_MIN_SLEEP_USEC (10*PA_USEC_PER_MSEC) /* 10ms */ +#define TSCHED_MIN_WAKEUP_USEC (4*PA_USEC_PER_MSEC) /* 4ms */ + +#define SMOOTHER_MIN_INTERVAL (2*PA_USEC_PER_MSEC) /* 2ms */ +#define SMOOTHER_MAX_INTERVAL (200*PA_USEC_PER_MSEC) /* 200ms */ #define VOLUME_ACCURACY (PA_VOLUME_NORM/100) @@ -96,7 +104,12 @@ struct userdata { hwbuf_unused, min_sleep, min_wakeup, - watermark_step; + watermark_inc_step, + watermark_dec_step, + watermark_inc_threshold, + watermark_dec_threshold; + + pa_usec_t watermark_dec_not_before; unsigned nfragments; @@ -241,6 +254,7 @@ static int reserve_monitor_init(struct userdata *u, const char *dname) { static void fix_min_sleep_wakeup(struct userdata *u) { size_t max_use, max_use_2; pa_assert(u); + pa_assert(u->use_tsched); max_use = u->hwbuf_size - u->hwbuf_unused; max_use_2 = pa_frame_align(max_use/2, &u->source->sample_spec); @@ -255,6 +269,7 @@ static void fix_min_sleep_wakeup(struct userdata *u) { static void fix_tsched_watermark(struct userdata *u) { size_t max_use; pa_assert(u); + pa_assert(u->use_tsched); max_use = u->hwbuf_size - u->hwbuf_unused; @@ -265,7 +280,7 @@ static void fix_tsched_watermark(struct userdata *u) { u->tsched_watermark = u->min_wakeup; } -static void adjust_after_overrun(struct userdata *u) { +static void increase_watermark(struct userdata *u) { size_t old_watermark; pa_usec_t old_min_latency, new_min_latency; @@ -274,36 +289,72 @@ static void adjust_after_overrun(struct userdata *u) { /* First, just try to increase the watermark */ old_watermark = u->tsched_watermark; - u->tsched_watermark = PA_MIN(u->tsched_watermark * 2, u->tsched_watermark + u->watermark_step); - + u->tsched_watermark = PA_MIN(u->tsched_watermark * 2, u->tsched_watermark + u->watermark_inc_step); fix_tsched_watermark(u); if (old_watermark != u->tsched_watermark) { - pa_log_notice("Increasing wakeup watermark to %0.2f ms", - (double) pa_bytes_to_usec(u->tsched_watermark, &u->source->sample_spec) / PA_USEC_PER_MSEC); + pa_log_info("Increasing wakeup watermark to %0.2f ms", + (double) pa_bytes_to_usec(u->tsched_watermark, &u->source->sample_spec) / PA_USEC_PER_MSEC); return; } /* Hmm, we cannot increase the watermark any further, hence let's raise the latency */ old_min_latency = u->source->thread_info.min_latency; - new_min_latency = PA_MIN(old_min_latency * 2, old_min_latency + TSCHED_WATERMARK_STEP_USEC); + new_min_latency = PA_MIN(old_min_latency * 2, old_min_latency + TSCHED_WATERMARK_INC_STEP_USEC); new_min_latency = PA_MIN(new_min_latency, u->source->thread_info.max_latency); if (old_min_latency != new_min_latency) { - pa_log_notice("Increasing minimal latency to %0.2f ms", - (double) new_min_latency / PA_USEC_PER_MSEC); + pa_log_info("Increasing minimal latency to %0.2f ms", + (double) new_min_latency / PA_USEC_PER_MSEC); pa_source_set_latency_range_within_thread(u->source, new_min_latency, u->source->thread_info.max_latency); - return; } /* When we reach this we're officialy fucked! */ } +static void decrease_watermark(struct userdata *u) { + size_t old_watermark; + pa_usec_t now; + + pa_assert(u); + pa_assert(u->use_tsched); + + now = pa_rtclock_now(); + + if (u->watermark_dec_not_before <= 0) + goto restart; + + if (u->watermark_dec_not_before > now) + return; + + old_watermark = u->tsched_watermark; + + if (u->tsched_watermark < u->watermark_dec_step) + u->tsched_watermark = u->tsched_watermark / 2; + else + u->tsched_watermark = PA_MAX(u->tsched_watermark / 2, u->tsched_watermark - u->watermark_dec_step); + + fix_tsched_watermark(u); + + if (old_watermark != u->tsched_watermark) + pa_log_info("Decreasing wakeup watermark to %0.2f ms", + (double) pa_bytes_to_usec(u->tsched_watermark, &u->source->sample_spec) / PA_USEC_PER_MSEC); + + /* We don't change the latency range*/ + +restart: + u->watermark_dec_not_before = now + TSCHED_WATERMARK_VERIFY_AFTER_USEC; +} + static pa_usec_t hw_sleep_time(struct userdata *u, pa_usec_t *sleep_usec, pa_usec_t*process_usec) { pa_usec_t wm, usec; + pa_assert(sleep_usec); + pa_assert(process_usec); + pa_assert(u); + pa_assert(u->use_tsched); usec = pa_source_get_requested_latency_within_thread(u->source); @@ -352,7 +403,7 @@ static int try_recover(struct userdata *u, const char *call, int err) { return 0; } -static size_t check_left_to_record(struct userdata *u, size_t n_bytes) { +static size_t check_left_to_record(struct userdata *u, size_t n_bytes, pa_bool_t on_timeout) { size_t left_to_record; size_t rec_space = u->hwbuf_size - u->hwbuf_unused; @@ -361,14 +412,11 @@ static size_t check_left_to_record(struct userdata *u, size_t n_bytes) { * it is removed from the buffer. This is particularly important * when block transfer is used. */ - if (n_bytes <= rec_space) { + if (n_bytes <= rec_space) left_to_record = rec_space - n_bytes; + else { -#ifdef DEBUG_TIMING - pa_log_debug("%0.2f ms left to record", (double) pa_bytes_to_usec(left_to_record, &u->source->sample_spec) / PA_USEC_PER_MSEC); -#endif - - } else { + /* We got a dropout. What a mess! */ left_to_record = 0; #ifdef DEBUG_TIMING @@ -377,15 +425,36 @@ static size_t check_left_to_record(struct userdata *u, size_t n_bytes) { if (pa_log_ratelimit()) pa_log_info("Overrun!"); + } - if (u->use_tsched) - adjust_after_overrun(u); +#ifdef DEBUG_TIMING + pa_log_debug("%0.2f ms left to record", (double) pa_bytes_to_usec(left_to_record, &u->source->sample_spec) / PA_USEC_PER_MSEC); +#endif + + if (u->use_tsched) { + pa_bool_t reset_not_before = TRUE; + + if (left_to_record < u->watermark_inc_threshold) + increase_watermark(u); + else if (left_to_record > u->watermark_dec_threshold) { + reset_not_before = FALSE; + + /* We decrease the watermark only if have actually been + * woken up by a timeout. If something else woke us up + * it's too easy to fulfill the deadlines... */ + + if (on_timeout) + decrease_watermark(u); + } + + if (reset_not_before) + u->watermark_dec_not_before = 0; } return left_to_record; } -static int mmap_read(struct userdata *u, pa_usec_t *sleep_usec, pa_bool_t polled) { +static int mmap_read(struct userdata *u, pa_usec_t *sleep_usec, pa_bool_t polled, pa_bool_t on_timeout) { pa_bool_t work_done = FALSE; pa_usec_t max_sleep_usec = 0, process_usec = 0; size_t left_to_record; @@ -417,7 +486,8 @@ static int mmap_read(struct userdata *u, pa_usec_t *sleep_usec, pa_bool_t polled pa_log_debug("avail: %lu", (unsigned long) n_bytes); #endif - left_to_record = check_left_to_record(u, n_bytes); + left_to_record = check_left_to_record(u, n_bytes, on_timeout); + on_timeout = FALSE; if (u->use_tsched) if (!polled && @@ -543,7 +613,7 @@ static int mmap_read(struct userdata *u, pa_usec_t *sleep_usec, pa_bool_t polled return work_done ? 1 : 0; } -static int unix_read(struct userdata *u, pa_usec_t *sleep_usec, pa_bool_t polled) { +static int unix_read(struct userdata *u, pa_usec_t *sleep_usec, pa_bool_t polled, pa_bool_t on_timeout) { int work_done = FALSE; pa_usec_t max_sleep_usec = 0, process_usec = 0; size_t left_to_record; @@ -570,7 +640,8 @@ static int unix_read(struct userdata *u, pa_usec_t *sleep_usec, pa_bool_t polled } n_bytes = (size_t) n * u->frame_size; - left_to_record = check_left_to_record(u, n_bytes); + left_to_record = check_left_to_record(u, n_bytes, on_timeout); + on_timeout = FALSE; if (u->use_tsched) if (!polled && @@ -1158,11 +1229,12 @@ static void thread_func(void *userdata) { if (PA_SOURCE_IS_OPENED(u->source->thread_info.state)) { int work_done; pa_usec_t sleep_usec = 0; + pa_bool_t on_timeout = pa_rtpoll_timer_elapsed(u->rtpoll); if (u->use_mmap) - work_done = mmap_read(u, &sleep_usec, revents & POLLIN); + work_done = mmap_read(u, &sleep_usec, revents & POLLIN, on_timeout); else - work_done = unix_read(u, &sleep_usec, revents & POLLIN); + work_done = unix_read(u, &sleep_usec, revents & POLLIN, on_timeout); if (work_done < 0) goto fail; @@ -1632,7 +1704,6 @@ pa_source *pa_alsa_source_new(pa_module *m, pa_modargs *ma, const char*driver, p u->fragment_size = frag_size = (uint32_t) (period_frames * frame_size); u->nfragments = nfrags; u->hwbuf_size = u->fragment_size * nfrags; - u->tsched_watermark = pa_usec_to_bytes_round_up(pa_bytes_to_usec_round_up(tsched_watermark, &requested_ss), &u->source->sample_spec); pa_cvolume_mute(&u->hardware_volume, u->source->sample_spec.channels); pa_log_info("Using %u fragments of size %lu bytes, buffer time is %0.2fms", @@ -1640,7 +1711,13 @@ pa_source *pa_alsa_source_new(pa_module *m, pa_modargs *ma, const char*driver, p (double) pa_bytes_to_usec(u->hwbuf_size, &ss) / PA_USEC_PER_MSEC); if (u->use_tsched) { - u->watermark_step = pa_usec_to_bytes(TSCHED_WATERMARK_STEP_USEC, &u->source->sample_spec); + u->tsched_watermark = pa_usec_to_bytes_round_up(pa_bytes_to_usec_round_up(tsched_watermark, &requested_ss), &u->source->sample_spec); + + u->watermark_inc_step = pa_usec_to_bytes(TSCHED_WATERMARK_INC_STEP_USEC, &u->source->sample_spec); + u->watermark_dec_step = pa_usec_to_bytes(TSCHED_WATERMARK_DEC_STEP_USEC, &u->source->sample_spec); + + u->watermark_inc_threshold = pa_usec_to_bytes_round_up(TSCHED_WATERMARK_INC_THRESHOLD_USEC, &u->source->sample_spec); + u->watermark_dec_threshold = pa_usec_to_bytes_round_up(TSCHED_WATERMARK_DEC_THRESHOLD_USEC, &u->source->sample_spec); fix_min_sleep_wakeup(u); fix_tsched_watermark(u); diff --git a/src/pulsecore/rtpoll.c b/src/pulsecore/rtpoll.c index 42708a8a..666cbc98 100644 --- a/src/pulsecore/rtpoll.c +++ b/src/pulsecore/rtpoll.c @@ -63,6 +63,7 @@ struct pa_rtpoll { pa_bool_t running:1; pa_bool_t rebuild_needed:1; pa_bool_t quit:1; + pa_bool_t timer_elapsed:1; #ifdef DEBUG_TIMING pa_usec_t timestamp; @@ -94,26 +95,14 @@ PA_STATIC_FLIST_DECLARE(items, 0, pa_xfree); pa_rtpoll *pa_rtpoll_new(void) { pa_rtpoll *p; - p = pa_xnew(pa_rtpoll, 1); + p = pa_xnew0(pa_rtpoll, 1); p->n_pollfd_alloc = 32; p->pollfd = pa_xnew(struct pollfd, p->n_pollfd_alloc); p->pollfd2 = pa_xnew(struct pollfd, p->n_pollfd_alloc); - p->n_pollfd_used = 0; - - pa_zero(p->next_elapse); - p->timer_enabled = FALSE; - - p->running = FALSE; - p->scan_for_dead = FALSE; - p->rebuild_needed = FALSE; - p->quit = FALSE; - - PA_LLIST_HEAD_INIT(pa_rtpoll_item, p->items); #ifdef DEBUG_TIMING p->timestamp = pa_rtclock_now(); - p->slept = p->awake = 0; #endif return p; @@ -229,6 +218,7 @@ int pa_rtpoll_run(pa_rtpoll *p, pa_bool_t wait_op) { pa_assert(!p->running); p->running = TRUE; + p->timer_elapsed = FALSE; /* First, let's do some work */ for (i = p->items; i && i->priority < PA_RTPOLL_NEVER; i = i->next) { @@ -286,7 +276,7 @@ int pa_rtpoll_run(pa_rtpoll *p, pa_bool_t wait_op) { if (p->rebuild_needed) rtpoll_rebuild(p); - memset(&timeout, 0, sizeof(timeout)); + pa_zero(timeout); /* Calculate timeout */ if (wait_op && !p->quit && p->timer_enabled) { @@ -314,9 +304,11 @@ int pa_rtpoll_run(pa_rtpoll *p, pa_bool_t wait_op) { r = ppoll(p->pollfd, p->n_pollfd_used, (!wait_op || p->quit || p->timer_enabled) ? &ts : NULL, NULL); } #else - r = poll(p->pollfd, p->n_pollfd_used, (!wait_op || p->quit || p->timer_enabled) ? (int) ((timeout.tv_sec*1000) + (timeout.tv_usec / 1000)) : -1); + r = poll(p->pollfd, p->n_pollfd_used, (!wait_op || p->quit || p->timer_enabled) ? (int) ((timeout.tv_sec*1000) + (timeout.tv_usec / 1000)) : -1); #endif + p->timer_elapsed = r == 0; + #ifdef DEBUG_TIMING { pa_usec_t now = pa_rtclock_now(); @@ -628,3 +620,9 @@ void pa_rtpoll_quit(pa_rtpoll *p) { p->quit = TRUE; } + +pa_bool_t pa_rtpoll_timer_elapsed(pa_rtpoll *p) { + pa_assert(p); + + return p->timer_elapsed; +} diff --git a/src/pulsecore/rtpoll.h b/src/pulsecore/rtpoll.h index d2d69cad..b2a87fca 100644 --- a/src/pulsecore/rtpoll.h +++ b/src/pulsecore/rtpoll.h @@ -73,6 +73,10 @@ void pa_rtpoll_set_timer_absolute(pa_rtpoll *p, pa_usec_t usec); void pa_rtpoll_set_timer_relative(pa_rtpoll *p, pa_usec_t usec); void pa_rtpoll_set_timer_disabled(pa_rtpoll *p); +/* Return TRUE when the elapsed timer was the reason for + * the last pa_rtpoll_run() invocation to finish */ +pa_bool_t pa_rtpoll_timer_elapsed(pa_rtpoll *p); + /* A new fd wakeup item for pa_rtpoll */ pa_rtpoll_item *pa_rtpoll_item_new(pa_rtpoll *p, pa_rtpoll_priority_t prio, unsigned n_fds); void pa_rtpoll_item_free(pa_rtpoll_item *i); -- cgit