summaryrefslogtreecommitdiffstats
path: root/sbc
diff options
context:
space:
mode:
authorBrad Midgley <bmidgley@xmission.com>2008-01-28 17:26:22 +0000
committerBrad Midgley <bmidgley@xmission.com>2008-01-28 17:26:22 +0000
commit38158dc5dd8e7c62ad2decfec395e3ec2c7e280b (patch)
treec39ccd661cecba1ac5bea491d66d8ad8ca836fde /sbc
parentba255beb79afb9c00ae5b71821f84f911aa8d1fe (diff)
remove 16x16 mult optimization--gcc actually generates more costly code
Diffstat (limited to 'sbc')
-rw-r--r--sbc/sbc.c186
-rw-r--r--sbc/sbc_math.h10
-rw-r--r--sbc/sbc_tables.h8
3 files changed, 101 insertions, 103 deletions
diff --git a/sbc/sbc.c b/sbc/sbc.c
index c9ea5b5c..97614fb9 100644
--- a/sbc/sbc.c
+++ b/sbc/sbc.c
@@ -719,47 +719,47 @@ static inline void _sbc_analyze_four(const int16_t *in, int32_t *out)
MULA(res, _sbc_proto_4[1], in[16] - in[24]);
t[0] = SCALE4_STAGE1(res); /* Q8 */
- MUL32(res, _sbc_proto_4[2], in[1]);
- MULA32(res, _sbc_proto_4[3], in[9]);
- MULA32(res, _sbc_proto_4[4], in[17]);
- MULA32(res, _sbc_proto_4[5], in[25]);
- MULA32(res, _sbc_proto_4[6], in[33]);
+ MUL(res, _sbc_proto_4[2], in[1]);
+ MULA(res, _sbc_proto_4[3], in[9]);
+ MULA(res, _sbc_proto_4[4], in[17]);
+ MULA(res, _sbc_proto_4[5], in[25]);
+ MULA(res, _sbc_proto_4[6], in[33]);
t[1] = SCALE4_STAGE1(res);
- MUL32(res, _sbc_proto_4[7], in[2]);
- MULA32(res, _sbc_proto_4[8], in[10]);
- MULA32(res, _sbc_proto_4[9], in[18]);
- MULA32(res, _sbc_proto_4[10], in[26]);
- MULA32(res, _sbc_proto_4[11], in[34]);
+ MUL(res, _sbc_proto_4[7], in[2]);
+ MULA(res, _sbc_proto_4[8], in[10]);
+ MULA(res, _sbc_proto_4[9], in[18]);
+ MULA(res, _sbc_proto_4[10], in[26]);
+ MULA(res, _sbc_proto_4[11], in[34]);
t[2] = SCALE4_STAGE1(res);
- MUL32(res, _sbc_proto_4[12], in[3]);
- MULA32(res, _sbc_proto_4[13], in[11]);
- MULA32(res, _sbc_proto_4[14], in[19]);
- MULA32(res, _sbc_proto_4[15], in[27]);
- MULA32(res, _sbc_proto_4[16], in[35]);
+ MUL(res, _sbc_proto_4[12], in[3]);
+ MULA(res, _sbc_proto_4[13], in[11]);
+ MULA(res, _sbc_proto_4[14], in[19]);
+ MULA(res, _sbc_proto_4[15], in[27]);
+ MULA(res, _sbc_proto_4[16], in[35]);
t[3] = SCALE4_STAGE1(res);
MUL(res, _sbc_proto_4[17], in[4] + in[36]);
MULA(res, _sbc_proto_4[18], in[12] + in[28]);
- MULA32(res, _sbc_proto_4[19], in[20]);
+ MULA(res, _sbc_proto_4[19], in[20]);
t[4] = SCALE4_STAGE1(res);
- MUL32(res, _sbc_proto_4[16], in[5]);
- MULA32(res, _sbc_proto_4[15], in[13]);
- MULA32(res, _sbc_proto_4[14], in[21]);
- MULA32(res, _sbc_proto_4[13], in[29]);
- MULA32(res, _sbc_proto_4[12], in[37]);
+ MUL(res, _sbc_proto_4[16], in[5]);
+ MULA(res, _sbc_proto_4[15], in[13]);
+ MULA(res, _sbc_proto_4[14], in[21]);
+ MULA(res, _sbc_proto_4[13], in[29]);
+ MULA(res, _sbc_proto_4[12], in[37]);
t[5] = SCALE4_STAGE1(res);
/* don't compute t[6]... this term always multiplies
* with cos(pi/2) = 0 */
- MUL32(res, _sbc_proto_4[6], in[7]);
- MULA32(res, _sbc_proto_4[5], in[15]);
- MULA32(res, _sbc_proto_4[4], in[23]);
- MULA32(res, _sbc_proto_4[3], in[31]);
- MULA32(res, _sbc_proto_4[2], in[39]);
+ MUL(res, _sbc_proto_4[6], in[7]);
+ MULA(res, _sbc_proto_4[5], in[15]);
+ MULA(res, _sbc_proto_4[4], in[23]);
+ MULA(res, _sbc_proto_4[3], in[31]);
+ MULA(res, _sbc_proto_4[2], in[39]);
t[7] = SCALE4_STAGE1(res);
MUL(s[0], _anamatrix4[0], t[0] + t[4]);
@@ -800,89 +800,89 @@ static inline void _sbc_analyze_eight(const int16_t *in, int32_t *out)
sbc_fixed_t t[8];
sbc_extended_t s[8];
- MUL32(res, _sbc_proto_8[0], (in[16] - in[64])); /* Q18 = Q18 * Q0 */
- MULA32(res, _sbc_proto_8[1], (in[32] - in[48]));
- MULA32(res, _sbc_proto_8[2], in[4]);
- MULA32(res, _sbc_proto_8[3], in[20]);
- MULA32(res, _sbc_proto_8[4], in[36]);
- MULA32(res, _sbc_proto_8[5], in[52]);
+ MUL(res, _sbc_proto_8[0], (in[16] - in[64])); /* Q18 = Q18 * Q0 */
+ MULA(res, _sbc_proto_8[1], (in[32] - in[48]));
+ MULA(res, _sbc_proto_8[2], in[4]);
+ MULA(res, _sbc_proto_8[3], in[20]);
+ MULA(res, _sbc_proto_8[4], in[36]);
+ MULA(res, _sbc_proto_8[5], in[52]);
t[0] = SCALE8_STAGE1(res); /* Q10 */
- MUL32(res, _sbc_proto_8[6], in[2]);
- MULA32(res, _sbc_proto_8[7], in[18]);
- MULA32(res, _sbc_proto_8[8], in[34]);
- MULA32(res, _sbc_proto_8[9], in[50]);
- MULA32(res, _sbc_proto_8[10], in[66]);
+ MUL(res, _sbc_proto_8[6], in[2]);
+ MULA(res, _sbc_proto_8[7], in[18]);
+ MULA(res, _sbc_proto_8[8], in[34]);
+ MULA(res, _sbc_proto_8[9], in[50]);
+ MULA(res, _sbc_proto_8[10], in[66]);
t[1] = SCALE8_STAGE1(res);
- MUL32(res, _sbc_proto_8[11], in[1]);
- MULA32(res, _sbc_proto_8[12], in[17]);
- MULA32(res, _sbc_proto_8[13], in[33]);
- MULA32(res, _sbc_proto_8[14], in[49]);
- MULA32(res, _sbc_proto_8[15], in[65]);
- MULA32(res, _sbc_proto_8[16], in[3]);
- MULA32(res, _sbc_proto_8[17], in[19]);
- MULA32(res, _sbc_proto_8[18], in[35]);
- MULA32(res, _sbc_proto_8[19], in[51]);
- MULA32(res, _sbc_proto_8[20], in[67]);
+ MUL(res, _sbc_proto_8[11], in[1]);
+ MULA(res, _sbc_proto_8[12], in[17]);
+ MULA(res, _sbc_proto_8[13], in[33]);
+ MULA(res, _sbc_proto_8[14], in[49]);
+ MULA(res, _sbc_proto_8[15], in[65]);
+ MULA(res, _sbc_proto_8[16], in[3]);
+ MULA(res, _sbc_proto_8[17], in[19]);
+ MULA(res, _sbc_proto_8[18], in[35]);
+ MULA(res, _sbc_proto_8[19], in[51]);
+ MULA(res, _sbc_proto_8[20], in[67]);
t[2] = SCALE8_STAGE1(res);
- MUL32(res, _sbc_proto_8[21], in[5]);
- MULA32(res, _sbc_proto_8[22], in[21]);
- MULA32(res, _sbc_proto_8[23], in[37]);
- MULA32(res, _sbc_proto_8[24], in[53]);
- MULA32(res, _sbc_proto_8[25], in[69]);
- MULA32(res, -_sbc_proto_8[15], in[15]);
- MULA32(res, -_sbc_proto_8[14], in[31]);
- MULA32(res, -_sbc_proto_8[13], in[47]);
- MULA32(res, -_sbc_proto_8[12], in[63]);
- MULA32(res, -_sbc_proto_8[11], in[79]);
+ MUL(res, _sbc_proto_8[21], in[5]);
+ MULA(res, _sbc_proto_8[22], in[21]);
+ MULA(res, _sbc_proto_8[23], in[37]);
+ MULA(res, _sbc_proto_8[24], in[53]);
+ MULA(res, _sbc_proto_8[25], in[69]);
+ MULA(res, -_sbc_proto_8[15], in[15]);
+ MULA(res, -_sbc_proto_8[14], in[31]);
+ MULA(res, -_sbc_proto_8[13], in[47]);
+ MULA(res, -_sbc_proto_8[12], in[63]);
+ MULA(res, -_sbc_proto_8[11], in[79]);
t[3] = SCALE8_STAGE1(res);
- MUL32(res, _sbc_proto_8[26], in[6]);
- MULA32(res, _sbc_proto_8[27], in[22]);
- MULA32(res, _sbc_proto_8[28], in[38]);
- MULA32(res, _sbc_proto_8[29], in[54]);
- MULA32(res, _sbc_proto_8[30], in[70]);
- MULA32(res, -_sbc_proto_8[10], in[14]);
- MULA32(res, -_sbc_proto_8[9], in[30]);
- MULA32(res, -_sbc_proto_8[8], in[46]);
- MULA32(res, -_sbc_proto_8[7], in[62]);
- MULA32(res, -_sbc_proto_8[6], in[78]);
+ MUL(res, _sbc_proto_8[26], in[6]);
+ MULA(res, _sbc_proto_8[27], in[22]);
+ MULA(res, _sbc_proto_8[28], in[38]);
+ MULA(res, _sbc_proto_8[29], in[54]);
+ MULA(res, _sbc_proto_8[30], in[70]);
+ MULA(res, -_sbc_proto_8[10], in[14]);
+ MULA(res, -_sbc_proto_8[9], in[30]);
+ MULA(res, -_sbc_proto_8[8], in[46]);
+ MULA(res, -_sbc_proto_8[7], in[62]);
+ MULA(res, -_sbc_proto_8[6], in[78]);
t[4] = SCALE8_STAGE1(res);
- MUL32(res, _sbc_proto_8[31], in[7]);
- MULA32(res, _sbc_proto_8[32], in[23]);
- MULA32(res, _sbc_proto_8[33], in[39]);
- MULA32(res, _sbc_proto_8[34], in[55]);
- MULA32(res, _sbc_proto_8[35], in[71]);
- MULA32(res, -_sbc_proto_8[20], in[13]);
- MULA32(res, -_sbc_proto_8[19], in[29]);
- MULA32(res, -_sbc_proto_8[18], in[45]);
- MULA32(res, -_sbc_proto_8[17], in[61]);
- MULA32(res, -_sbc_proto_8[16], in[77]);
+ MUL(res, _sbc_proto_8[31], in[7]);
+ MULA(res, _sbc_proto_8[32], in[23]);
+ MULA(res, _sbc_proto_8[33], in[39]);
+ MULA(res, _sbc_proto_8[34], in[55]);
+ MULA(res, _sbc_proto_8[35], in[71]);
+ MULA(res, -_sbc_proto_8[20], in[13]);
+ MULA(res, -_sbc_proto_8[19], in[29]);
+ MULA(res, -_sbc_proto_8[18], in[45]);
+ MULA(res, -_sbc_proto_8[17], in[61]);
+ MULA(res, -_sbc_proto_8[16], in[77]);
t[5] = SCALE8_STAGE1(res);
MUL(res, _sbc_proto_8[36], in[8] + in[72]);
MULA(res, _sbc_proto_8[37], in[24] + in[56]);
- MULA32(res, _sbc_proto_8[38], in[40]);
- MULA32(res, -_sbc_proto_8[39], in[12]);
- MULA32(res, -_sbc_proto_8[5], in[28]);
- MULA32(res, -_sbc_proto_8[4], in[44]);
- MULA32(res, -_sbc_proto_8[3], in[60]);
- MULA32(res, -_sbc_proto_8[2], in[76]);
+ MULA(res, _sbc_proto_8[38], in[40]);
+ MULA(res, -_sbc_proto_8[39], in[12]);
+ MULA(res, -_sbc_proto_8[5], in[28]);
+ MULA(res, -_sbc_proto_8[4], in[44]);
+ MULA(res, -_sbc_proto_8[3], in[60]);
+ MULA(res, -_sbc_proto_8[2], in[76]);
t[6] = SCALE8_STAGE1(res);
- MUL32(res, _sbc_proto_8[35], in[9]);
- MULA32(res, _sbc_proto_8[34], in[25]);
- MULA32(res, _sbc_proto_8[33], in[41]);
- MULA32(res, _sbc_proto_8[32], in[57]);
- MULA32(res, _sbc_proto_8[31], in[73]);
- MULA32(res, -_sbc_proto_8[25], in[11]);
- MULA32(res, -_sbc_proto_8[24], in[27]);
- MULA32(res, -_sbc_proto_8[23], in[43]);
- MULA32(res, -_sbc_proto_8[22], in[59]);
- MULA32(res, -_sbc_proto_8[21], in[75]);
+ MUL(res, _sbc_proto_8[35], in[9]);
+ MULA(res, _sbc_proto_8[34], in[25]);
+ MULA(res, _sbc_proto_8[33], in[41]);
+ MULA(res, _sbc_proto_8[32], in[57]);
+ MULA(res, _sbc_proto_8[31], in[73]);
+ MULA(res, -_sbc_proto_8[25], in[11]);
+ MULA(res, -_sbc_proto_8[24], in[27]);
+ MULA(res, -_sbc_proto_8[23], in[43]);
+ MULA(res, -_sbc_proto_8[22], in[59]);
+ MULA(res, -_sbc_proto_8[21], in[75]);
t[7] = SCALE8_STAGE1(res);
MUL(s[0], _anamatrix8[0], t[0]); /* = Q14 * Q10 */
diff --git a/sbc/sbc_math.h b/sbc/sbc_math.h
index c8c72c75..625d4dd0 100644
--- a/sbc/sbc_math.h
+++ b/sbc/sbc_math.h
@@ -31,20 +31,20 @@
#define ASR_64(val, bits) ((-2 >> 1 == -1) ? \
((long long)(val)) >> (bits) : ((long long) (val)) / (1 << (bits)))
-#define SCALE_PROTO4_TBL 16
+#define SCALE_PROTO4_TBL 15
#define SCALE_ANA4_TBL 16
-#define SCALE_PROTO8_TBL 16
+#define SCALE_PROTO8_TBL 15
#define SCALE_ANA8_TBL 16
#define SCALE_SPROTO4_TBL 16
#define SCALE_SPROTO8_TBL 16
#define SCALE_NPROTO4_TBL 10
#define SCALE_NPROTO8_TBL 12
#define SCALE_SAMPLES 14
-#define SCALE4_STAGE1_BITS 9
+#define SCALE4_STAGE1_BITS 10
#define SCALE4_STAGE2_BITS 21
#define SCALE4_STAGED1_BITS 18
#define SCALE4_STAGED2_BITS 23
-#define SCALE8_STAGE1_BITS 7
+#define SCALE8_STAGE1_BITS 8
#define SCALE8_STAGE2_BITS 24
#define SCALE8_STAGED1_BITS 18
#define SCALE8_STAGED2_BITS 23
@@ -64,8 +64,6 @@ typedef long long sbc_extended_t;
#define SBC_FIXED_0(val) { val = 0; }
#define ADD(dst, src) { dst += src; }
#define SUB(dst, src) { dst -= src; }
-#define MUL32(dst, a, b) { dst = (sbc_fixed_t) (a) * (b); }
-#define MULA32(dst, a, b) { dst += (sbc_fixed_t) (a) * (b); }
#define MUL(dst, a, b) { dst = (sbc_extended_t) (a) * (b); }
#define MULA(dst, a, b) { dst += (sbc_extended_t) (a) * (b); }
#define DIV2(dst, src) { dst = ASR(src, 1); }
diff --git a/sbc/sbc_tables.h b/sbc/sbc_tables.h
index 0da2a2d3..5e00caca 100644
--- a/sbc/sbc_tables.h
+++ b/sbc/sbc_tables.h
@@ -48,7 +48,7 @@ static const int sbc_offset8[4][8] = {
#define SN4(val) ASR(val, SCALE_NPROTO4_TBL)
#define SN8(val) ASR(val, SCALE_NPROTO8_TBL)
-static const int16_t _sbc_proto_4[20] = {
+static const int32_t _sbc_proto_4[20] = {
SP4(0x02cb3e8c), SP4(0x22b63dc0), SP4(0x002329cc), SP4(0x053b7548),
SP4(0x31eab940), SP4(0xec1f5e60), SP4(0xff3773a8), SP4(0x0061c5a7),
SP4(0x07646680), SP4(0x3f239480), SP4(0xf89f23a8), SP4(0x007a4737),
@@ -56,11 +56,11 @@ static const int16_t _sbc_proto_4[20] = {
SP4(0x00ff11ca), SP4(0x00fb7991), SP4(0x069fdc58), SP4(0x4b584000)
};
-static const int16_t _anamatrix4[4] = {
+static const int32_t _anamatrix4[4] = {
SA4(0x2d413cc0), SA4(0x3b20d780), SA4(0x40000000), SA4(0x187de2a0)
};
-static const int16_t _sbc_proto_8[40] = {
+static const int32_t _sbc_proto_8[40] = {
SP8(0x02e5cd20), SP8(0x22d0c200), SP8(0x006bfe27), SP8(0x07808930),
SP8(0x3f1c8800), SP8(0xf8810d70), SP8(0x002cfdc6), SP8(0x055acf28),
SP8(0x31f566c0), SP8(0xebfe57e0), SP8(0xff27c437), SP8(0x001485cc),
@@ -115,7 +115,7 @@ static const int32_t sbc_proto_8_80m1[] = {
SS8(0x0d9daee0), SS8(0xeac182c0), SS8(0xfdf1c8d4), SS8(0xfff5bd1a)
};
-static const int16_t _anamatrix8[8] = {
+static const int32_t _anamatrix8[8] = {
SA8(0x3b20d780), SA8(0x187de2a0), SA8(0x3ec52f80), SA8(0x3536cc40),
SA8(0x238e7680), SA8(0x0c7c5c20), SA8(0x2d413cc0), SA8(0x40000000)
};