From 71e066c873e5bd31bd446ac0f8d0e97cc0b12ace Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 9 Sep 2009 04:28:22 +0200 Subject: simd: be more precise which SIMD optimizations we activate --- src/pulsecore/svolume_mmx.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src/pulsecore/svolume_mmx.c') diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c index 8510b0c4..74918e78 100644 --- a/src/pulsecore/svolume_mmx.c +++ b/src/pulsecore/svolume_mmx.c @@ -301,13 +301,16 @@ static void run_test (void) { void pa_volume_func_init_mmx (pa_cpu_x86_flag_t flags) { #if defined (__i386__) || defined (__amd64__) - pa_log_info("Initialising MMX optimized functions."); #ifdef RUN_TEST run_test (); #endif - pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx); - pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_mmx); + if (flags & PA_CPU_X86_MMX) { + pa_log_info("Initialising MMX optimized functions."); + + pa_set_volume_func (PA_SAMPLE_S16NE, (pa_do_volume_func_t) pa_volume_s16ne_mmx); + pa_set_volume_func (PA_SAMPLE_S16RE, (pa_do_volume_func_t) pa_volume_s16re_mmx); + } #endif /* defined (__i386__) || defined (__amd64__) */ } -- cgit From 3d5a572694388008fd4cf522699fc6c431f97325 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Sat, 12 Sep 2009 12:02:59 +0200 Subject: svolume_mmx: optimize some more We can reorder the algortihm a bit like we do for sse so that we don't need the contants and masking instructions. Saves 2 instructions for the mmx code. --- src/pulsecore/svolume_mmx.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'src/pulsecore/svolume_mmx.c') diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c index 74918e78..170f01d4 100644 --- a/src/pulsecore/svolume_mmx.c +++ b/src/pulsecore/svolume_mmx.c @@ -57,14 +57,12 @@ " punpcklwd %%mm4, "#s" \n\t" /* .. | 0 | p0 | */ \ " pcmpgtw "#v", %%mm4 \n\t" /* .. | 0 | s(vl) | */ \ " pand "#s", %%mm4 \n\t" /* .. | 0 | (p0) | (vl >> 15) & p */ \ - " movq %%mm6, %%mm5 \n\t" /* .. | ffff | 0 | */ \ - " pand "#v", %%mm5 \n\t" /* .. | vh | 0 | */ \ - " por %%mm5, %%mm4 \n\t" /* .. | vh | (p0) | */ \ - " pmulhw "#s", "#v" \n\t" /* .. | 0 | vl*p0 | */ \ - " paddw %%mm4, "#v" \n\t" /* .. | vh | vl*p0 | vh + sign correct */ \ - " pslld $16, "#s" \n\t" /* .. | p0 | 0 | */ \ - " por %%mm7, "#s" \n\t" /* .. | p0 | 1 | */ \ - " pmaddwd "#s", "#v" \n\t" /* .. | p0 * v0 | */ \ + " movq "#s", %%mm5 \n\t" \ + " pmulhw "#v", "#s" \n\t" /* .. | 0 | vl*p0 | */ \ + " paddw %%mm4, "#s" \n\t" /* .. | 0 | vl*p0 | + sign correct */ \ + " psrld $16, "#v" \n\t" /* .. | 0 | vh | */ \ + " pmaddwd %%mm5, "#v" \n\t" /* .. | p0 * vh | */ \ + " paddd "#s", "#v" \n\t" /* .. | p0 * v0 | */ \ " packssdw "#v", "#v" \n\t" /* .. | p1*v1 | p0*v0 | */ /* approximately advances %3 = (%3 + a) % b. This function requires that @@ -105,10 +103,6 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi __asm__ __volatile__ ( " xor %3, %3 \n\t" " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */ - " pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */ - " pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */ - " pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */ - " psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */ " test $1, %2 \n\t" /* check for odd samples */ " je 2f \n\t" @@ -239,7 +233,7 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi ); } -#undef RUN_TEST +#define RUN_TEST #ifdef RUN_TEST #define CHANNELS 2 -- cgit From 231c17be0330a3621f5249c5c7ea0ce521085c61 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Thu, 17 Sep 2009 01:34:02 +0200 Subject: svolume_mmx: disable test accidentaly left on --- src/pulsecore/svolume_mmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/pulsecore/svolume_mmx.c') diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c index 170f01d4..62f3397e 100644 --- a/src/pulsecore/svolume_mmx.c +++ b/src/pulsecore/svolume_mmx.c @@ -233,7 +233,7 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi ); } -#define RUN_TEST +#undef RUN_TEST #ifdef RUN_TEST #define CHANNELS 2 -- cgit From 5eecd8ea7dcaca7536240b8a5800c686db51eee5 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Wed, 23 Sep 2009 17:16:04 +0200 Subject: svolume: tweak constraints for 32 bits Tweak the constraints a little so that register starved 32bit systems can select a stack variable for the channel paramter instead of reusing one of the registers we're using in the code. --- src/pulsecore/svolume_mmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/pulsecore/svolume_mmx.c') diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c index 62f3397e..1768eb50 100644 --- a/src/pulsecore/svolume_mmx.c +++ b/src/pulsecore/svolume_mmx.c @@ -152,7 +152,7 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi " emms \n\t" : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp) - : "r" ((pa_reg_x86)channels) + : "X" ((pa_reg_x86)channels) : "cc" ); } @@ -228,7 +228,7 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi " emms \n\t" : "+r" (samples), "+r" (volumes), "+r" (length), "=D" ((pa_reg_x86)channel), "=&r" (temp) - : "r" ((pa_reg_x86)channels) + : "X" ((pa_reg_x86)channels) : "cc" ); } -- cgit