From 3d5a572694388008fd4cf522699fc6c431f97325 Mon Sep 17 00:00:00 2001 From: Wim Taymans Date: Sat, 12 Sep 2009 12:02:59 +0200 Subject: svolume_mmx: optimize some more We can reorder the algortihm a bit like we do for sse so that we don't need the contants and masking instructions. Saves 2 instructions for the mmx code. --- src/pulsecore/svolume_mmx.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'src/pulsecore/svolume_mmx.c') diff --git a/src/pulsecore/svolume_mmx.c b/src/pulsecore/svolume_mmx.c index 74918e78..170f01d4 100644 --- a/src/pulsecore/svolume_mmx.c +++ b/src/pulsecore/svolume_mmx.c @@ -57,14 +57,12 @@ " punpcklwd %%mm4, "#s" \n\t" /* .. | 0 | p0 | */ \ " pcmpgtw "#v", %%mm4 \n\t" /* .. | 0 | s(vl) | */ \ " pand "#s", %%mm4 \n\t" /* .. | 0 | (p0) | (vl >> 15) & p */ \ - " movq %%mm6, %%mm5 \n\t" /* .. | ffff | 0 | */ \ - " pand "#v", %%mm5 \n\t" /* .. | vh | 0 | */ \ - " por %%mm5, %%mm4 \n\t" /* .. | vh | (p0) | */ \ - " pmulhw "#s", "#v" \n\t" /* .. | 0 | vl*p0 | */ \ - " paddw %%mm4, "#v" \n\t" /* .. | vh | vl*p0 | vh + sign correct */ \ - " pslld $16, "#s" \n\t" /* .. | p0 | 0 | */ \ - " por %%mm7, "#s" \n\t" /* .. | p0 | 1 | */ \ - " pmaddwd "#s", "#v" \n\t" /* .. | p0 * v0 | */ \ + " movq "#s", %%mm5 \n\t" \ + " pmulhw "#v", "#s" \n\t" /* .. | 0 | vl*p0 | */ \ + " paddw %%mm4, "#s" \n\t" /* .. | 0 | vl*p0 | + sign correct */ \ + " psrld $16, "#v" \n\t" /* .. | 0 | vh | */ \ + " pmaddwd %%mm5, "#v" \n\t" /* .. | p0 * vh | */ \ + " paddd "#s", "#v" \n\t" /* .. | p0 * v0 | */ \ " packssdw "#v", "#v" \n\t" /* .. | p1*v1 | p0*v0 | */ /* approximately advances %3 = (%3 + a) % b. This function requires that @@ -105,10 +103,6 @@ pa_volume_s16ne_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi __asm__ __volatile__ ( " xor %3, %3 \n\t" " sar $1, %2 \n\t" /* length /= sizeof (int16_t) */ - " pcmpeqw %%mm6, %%mm6 \n\t" /* .. | ffff | ffff | */ - " pcmpeqw %%mm7, %%mm7 \n\t" /* .. | ffff | ffff | */ - " pslld $16, %%mm6 \n\t" /* .. | ffff | 0 | */ - " psrld $31, %%mm7 \n\t" /* .. | 0 | 1 | */ " test $1, %2 \n\t" /* check for odd samples */ " je 2f \n\t" @@ -239,7 +233,7 @@ pa_volume_s16re_mmx (int16_t *samples, int32_t *volumes, unsigned channels, unsi ); } -#undef RUN_TEST +#define RUN_TEST #ifdef RUN_TEST #define CHANNELS 2 -- cgit