From 7fbcefdd87d222c04023dcb1b3ff3cf5aeae8613 Mon Sep 17 00:00:00 2001 From: Brad Midgley Date: Thu, 4 Oct 2007 18:28:17 +0000 Subject: enable 4 subband lower half optimizations --- sbc/sbc.c | 55 +++++++++++++++++++++++-------------------------------- 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/sbc/sbc.c b/sbc/sbc.c index dcff5382..a5cbf803 100644 --- a/sbc/sbc.c +++ b/sbc/sbc.c @@ -700,12 +700,7 @@ static inline void _sbc_analyze_four(const int32_t *in, int32_t *out) sbc_extended_t res; sbc_extended_t t[8]; - -#if 0 - /* temporary results */ - sbc_extended_t s[2], p[6], d[4]; -#endif - out[0] = out[1] = out[2] = out[3] = 0; + sbc_extended_t s[4]; MUL(res, _sbc_proto_4[0], (in[8] - in[32])); // Q18 MULA(res, _sbc_proto_4[1], (in[16] - in[24])); @@ -744,14 +739,15 @@ static inline void _sbc_analyze_four(const int32_t *in, int32_t *out) MULA(res, _sbc_proto_4[13], in[29]); MULA(res, _sbc_proto_4[12], in[37]); t[5] = SCALE4_STAGE1(res); - +#if 0 + /* don't compute... this term always multiplies with cos(pi) = 0*/ MUL(res, _sbc_proto_4[11], in[6]); MULA(res, _sbc_proto_4[10], in[14]); MULA(res, _sbc_proto_4[9], in[22]); MULA(res, _sbc_proto_4[8], in[30]); MULA(res, _sbc_proto_4[7], in[38]); t[6] = SCALE4_STAGE1(res); - +#endif MUL(res, _sbc_proto_4[6], in[7]); MULA(res, _sbc_proto_4[5], in[15]); MULA(res, _sbc_proto_4[4], in[23]); @@ -759,7 +755,7 @@ static inline void _sbc_analyze_four(const int32_t *in, int32_t *out) MULA(res, _sbc_proto_4[2], in[39]); t[7] = SCALE4_STAGE1(res); -#if 1 +#if 0 MUL(res, _anamatrix4[0], t[0]); MULA(res, _anamatrix4[1], t[1]); MULA(res, _anamatrix4[2], t[2]); @@ -796,24 +792,24 @@ static inline void _sbc_analyze_four(const int32_t *in, int32_t *out) MULA(res, -_anamatrix4[3], t[5]); MULA(res, _anamatrix4[3], t[7]); out[3] = SCALE4_STAGE2(res); -#endif -#if 0 - s[0] = t[1] + t[3]; - s[1] = t[5] - t[6]; - MUL(p[0], _anamatrix4[0], t[0] + t[4]); - MUL(p[1], _anamatrix4[1], s[0]); - MUL(p[2], _anamatrix4[2], t[2]); - MUL(p[3], _anamatrix4[3], s[0]); - MUL(p[4], _anamatrix4[3], s[1]); - MUL(p[5], _anamatrix4[1], s[1]); - d[0] = p[0] + p[2]; - d[1] = p[2] - p[0]; - d[2] = p[1] + p[4]; - d[3] = p[3] - p[5]; - out[0] = SCALE4_STAGE2(d[0] + d[2]); - out[1] = SCALE4_STAGE2(d[1] + d[3]); - out[2] = SCALE4_STAGE2(d[1] - d[3]); - out[3] = SCALE4_STAGE2(d[0] - d[2]); +#else + /* some of these multiplies could be factored more but something overflows */ + /* eg replace the first two lines with MUL(s[0], _anamatrix4[0], t[0] + t[4]) */ + MUL(s[0], _anamatrix4[0], t[0]); + MULA(s[0], _anamatrix4[0], t[4]); + MUL(s[1], _anamatrix4[2], t[2]); + MUL(s[2], _anamatrix4[1], t[1]); + MULA(s[2], _anamatrix4[1], t[3]); + MULA(s[2], _anamatrix4[3], t[5]); + MULA(s[2], -_anamatrix4[3], t[7]); + MUL(s[3], _anamatrix4[3], t[1]); + MULA(s[3], _anamatrix4[3], t[3]); + MULA(s[3], -_anamatrix4[1], t[5]); + MULA(s[3], _anamatrix4[1], t[7]); + out[0] = SCALE4_STAGE2( s[0] + s[1] + s[2]); + out[1] = SCALE4_STAGE2(-s[0] + s[1] + s[3]); + out[2] = SCALE4_STAGE2(-s[0] + s[1] - s[3]); + out[3] = SCALE4_STAGE2( s[0] + s[1] - s[2]); #endif } static inline void sbc_analyze_four(struct sbc_encoder_state *state, @@ -832,12 +828,7 @@ static inline void _sbc_analyze_eight(const int32_t *in, int32_t *out) { sbc_extended_t res; sbc_extended_t t[8]; - -#if 1 - /* temporary results */ sbc_extended_t s[8]; -#endif - out[0] = out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = out[7] = 0; MUL(res, _sbc_proto_8[0], (in[16] - in[64])); // Q18 = Q18 * Q0 MULA(res, _sbc_proto_8[1], (in[32] - in[48])); -- cgit