From 51a5483169faa3b64b50970891a545192836ee9b Mon Sep 17 00:00:00 2001 From: Brad Midgley Date: Thu, 6 Mar 2008 14:04:43 +0000 Subject: decoder optimization, now using nested multiply calls --- sbc/sbc.c | 99 ++++++++++++++++++++++++++++++++------------------------------- 1 file changed, 51 insertions(+), 48 deletions(-) (limited to 'sbc') diff --git a/sbc/sbc.c b/sbc/sbc.c index 5fc4752f..beacfd6e 100644 --- a/sbc/sbc.c +++ b/sbc/sbc.c @@ -550,41 +550,42 @@ static void sbc_decoder_init(struct sbc_decoder_state *state, static inline void sbc_synthesize_four(struct sbc_decoder_state *state, struct sbc_frame *frame, int ch, int blk) { - int i, j, k, idx; - sbc_fixed_t res; + int i, k, idx; + int32_t *v = state->V[ch]; + int *offset = state->offset[ch]; for (i = 0; i < 8; i++) { /* Shifting */ - state->offset[ch][i]--; - if (state->offset[ch][i] < 0) { - state->offset[ch][i] = 79; - for (j = 0; j < 9; j++) - state->V[ch][j+80] = state->V[ch][j]; + offset[i]--; + if (offset[i] < 0) { + offset[i] = 79; + memcpy(v + 80, v, 9 * sizeof(*v)); } - } - for (i = 0; i < 8; i++) { /* Distribute the new matrix value to the shifted position */ - SBC_FIXED_0(res); - for (j = 0; j < 4; j++) - res = MULA(synmatrix4[i][j], - frame->sb_sample[blk][ch][j], res); - state->V[ch][state->offset[ch][i]] = SCALE4_STAGED1(res); + v[offset[i]] = SCALE4_STAGED1( + MULA(synmatrix4[i][0], frame->sb_sample[blk][ch][0], + MULA(synmatrix4[i][1], frame->sb_sample[blk][ch][1], + MULA(synmatrix4[i][2], frame->sb_sample[blk][ch][2], + MUL (synmatrix4[i][3], frame->sb_sample[blk][ch][3]))))); } /* Compute the samples */ - for (idx = 0, i = 0; i < 4; i++) { + for (idx = 0, i = 0; i < 4; i++, idx += 5) { k = (i + 4) & 0xf; - SBC_FIXED_0(res); - for (j = 0; j < 10; idx++) { - res = MULA(state->V[ch][state->offset[ch][i]+j++], - sbc_proto_4_40m0[idx], res); - res = MULA(state->V[ch][state->offset[ch][k]+j++], - sbc_proto_4_40m1[idx], res); - } /* Store in output, Q0 */ - frame->pcm_sample[ch][blk * 4 + i] = SCALE4_STAGED2(res); + frame->pcm_sample[ch][blk * 4 + i] = SCALE4_STAGED2( + MULA(v[offset[i] + 0], sbc_proto_4_40m0[idx + 0], + MULA(v[offset[k] + 1], sbc_proto_4_40m1[idx + 0], + MULA(v[offset[i] + 2], sbc_proto_4_40m0[idx + 1], + MULA(v[offset[k] + 3], sbc_proto_4_40m1[idx + 1], + MULA(v[offset[i] + 4], sbc_proto_4_40m0[idx + 2], + MULA(v[offset[k] + 5], sbc_proto_4_40m1[idx + 2], + MULA(v[offset[i] + 6], sbc_proto_4_40m0[idx + 3], + MULA(v[offset[k] + 7], sbc_proto_4_40m1[idx + 3], + MULA(v[offset[i] + 8], sbc_proto_4_40m0[idx + 4], + MUL( v[offset[k] + 9], sbc_proto_4_40m1[idx + 4]))))))))))); } } @@ -592,43 +593,45 @@ static inline void sbc_synthesize_eight(struct sbc_decoder_state *state, struct sbc_frame *frame, int ch, int blk) { int i, j, k, idx; - sbc_fixed_t res; + int *offset = state->offset[ch]; for (i = 0; i < 16; i++) { /* Shifting */ - state->offset[ch][i]--; - if (state->offset[ch][i] < 0) { - state->offset[ch][i] = 159; + offset[i]--; + if (offset[i] < 0) { + offset[i] = 159; for (j = 0; j < 9; j++) - state->V[ch][j+160] = state->V[ch][j]; + state->V[ch][j + 160] = state->V[ch][j]; } - } - for (i = 0; i < 16; i++) { /* Distribute the new matrix value to the shifted position */ - SBC_FIXED_0(res); - for (j = 0; j < 8; j++) { - /* Q28 = Q15 * Q13 */ - res = MULA(synmatrix8[i][j], - frame->sb_sample[blk][ch][j], res); - } - /* Q10 */ - state->V[ch][state->offset[ch][i]] = SCALE8_STAGED1(res); + state->V[ch][offset[i]] = SCALE8_STAGED1( + MULA(synmatrix8[i][0], frame->sb_sample[blk][ch][0], + MULA(synmatrix8[i][1], frame->sb_sample[blk][ch][1], + MULA(synmatrix8[i][2], frame->sb_sample[blk][ch][2], + MULA(synmatrix8[i][3], frame->sb_sample[blk][ch][3], + MULA(synmatrix8[i][4], frame->sb_sample[blk][ch][4], + MULA(synmatrix8[i][5], frame->sb_sample[blk][ch][5], + MULA(synmatrix8[i][6], frame->sb_sample[blk][ch][6], + MUL( synmatrix8[i][7], frame->sb_sample[blk][ch][7]))))))))); } /* Compute the samples */ - for (idx = 0, i = 0; i < 8; i++) { + for (idx = 0, i = 0; i < 8; i++, idx += 5) { k = (i + 8) & 0xf; - SBC_FIXED_0(res); - for (j = 0; j < 10; idx++) { - res = MULA(state->V[ch][state->offset[ch][i]+j++], - sbc_proto_8_80m0[idx], res); - res = MULA(state->V[ch][state->offset[ch][k]+j++], - sbc_proto_8_80m1[idx], res); - } - /* Store in output */ - frame->pcm_sample[ch][blk * 8 + i] = SCALE8_STAGED2(res); // Q0 + /* Store in output */ + frame->pcm_sample[ch][blk * 8 + i] = SCALE8_STAGED2( // Q0 + MULA(state->V[ch][offset[i] + 0], sbc_proto_8_80m0[idx + 0], + MULA(state->V[ch][offset[k] + 1], sbc_proto_8_80m1[idx + 0], + MULA(state->V[ch][offset[i] + 2], sbc_proto_8_80m0[idx + 1], + MULA(state->V[ch][offset[k] + 3], sbc_proto_8_80m1[idx + 1], + MULA(state->V[ch][offset[i] + 4], sbc_proto_8_80m0[idx + 2], + MULA(state->V[ch][offset[k] + 5], sbc_proto_8_80m1[idx + 2], + MULA(state->V[ch][offset[i] + 6], sbc_proto_8_80m0[idx + 3], + MULA(state->V[ch][offset[k] + 7], sbc_proto_8_80m1[idx + 3], + MULA(state->V[ch][offset[i] + 8], sbc_proto_8_80m0[idx + 4], + MUL( state->V[ch][offset[k] + 9], sbc_proto_8_80m1[idx + 4]))))))))))); } } -- cgit