From a8d76e99ff0b13f424b44433fb169df6735a5842 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Fri, 8 Oct 2010 18:47:00 +0200 Subject: SSE/MMX/ARM: Fix high frequency noise with unusual number of channels In the assembly optimized versions of SSE, a noise could occur when the number of channels were 3,5,6 or 7. For MMX and ARM, this could occur when the number of channels were 3. Signed-off-by: David Henningsson --- src/pulsecore/svolume_arm.c | 5 ++++- src/pulsecore/svolume_mmx.c | 14 ++++++++------ src/pulsecore/svolume_sse.c | 19 +++++++++++++------ 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/pulsecore/svolume_arm.c b/src/pulsecore/svolume_arm.c index 5bd1448f..fdd8f09a 100644 --- a/src/pulsecore/svolume_arm.c +++ b/src/pulsecore/svolume_arm.c @@ -47,7 +47,10 @@ pa_volume_s16ne_arm (int16_t *samples, int32_t *volumes, unsigned channels, unsi { int32_t *ve; - channels = PA_MAX (4U, channels); + /* Channels must be at least 4, and always a multiple of the original number. + * This is also the max amount we overread the volume array, which should + * have enough padding. */ + channels = channels == 3 ? 6 : PA_MAX (4U, channels); ve = volumes + channels; __asm__ __volatile__ ( diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c index e50ebee8..a71a39ba 100644 --- a/src/pulsecore/svolume_mmx.c +++ b/src/pulsecore/svolume_mmx.c @@ -98,9 +98,10 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi { pa_reg_x86 channel, temp; - /* the max number of samples we process at a time, this is also the max amount - * we overread the volume array, which should have enough padding. */ - channels = PA_MAX (4U, channels); + /* Channels must be at least 4, and always a multiple of the original number. + * This is also the max amount we overread the volume array, which should + * have enough padding. */ + channels = channels == 3 ? 6 : PA_MAX (4U, channels); __asm__ __volatile__ ( " xor %3, %3 \n\t" @@ -164,9 +165,10 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi { pa_reg_x86 channel, temp; - /* the max number of samples we process at a time, this is also the max amount - * we overread the volume array, which should have enough padding. */ - channels = PA_MAX (4U, channels); + /* Channels must be at least 4, and always a multiple of the original number. + * This is also the max amount we overread the volume array, which should + * have enough padding. */ + channels = channels == 3 ? 6 : PA_MAX (4U, channels); __asm__ __volatile__ ( " xor %3, %3 \n\t" diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c index 1cc4e0aa..5983164b 100644 --- a/src/pulsecore/svolume_sse.c +++ b/src/pulsecore/svolume_sse.c @@ -74,14 +74,19 @@ " por %%xmm4, "#s1" \n\t" /* .. | l h | */ \ " por %%xmm5, "#s2" \n\t" + +static int channel_overread_table[8] = {8,8,8,12,8,10,12,14}; + static void pa_volume_s16ne_sse2 (int16_t *samples, int32_t *volumes, unsigned channels, unsigned length) { pa_reg_x86 channel, temp; - /* the max number of samples we process at a time, this is also the max amount - * we overread the volume array, which should have enough padding. */ - channels = PA_MAX (8U, channels); + /* Channels must be at least 8 and always a multiple of the original number. + * This is also the max amount we overread the volume array, which should + * have enough padding. */ + if (channels < 8) + channels = channel_overread_table[channels]; __asm__ __volatile__ ( " xor %3, %3 \n\t" @@ -159,9 +164,11 @@ pa_volume_s16re_sse2 (int16_t *samples, int32_t *volumes, unsigned channels, uns { pa_reg_x86 channel, temp; - /* the max number of samples we process at a time, this is also the max amount - * we overread the volume array, which should have enough padding. */ - channels = PA_MAX (8U, channels); + /* Channels must be at least 8 and always a multiple of the original number. + * This is also the max amount we overread the volume array, which should + * have enough padding. */ + if (channels < 8) + channels = channel_overread_table[channels]; __asm__ __volatile__ ( " xor %3, %3 \n\t" -- cgit