From a563c8ed5a5d5004f4270dd8836f4257c1da2fe8 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka Date: Thu, 29 Jan 2009 02:17:36 +0200 Subject: SBC encoder scale factors calculation optimized with __builtin_clz Count leading zeros operation is often implemented using a special instruction for it on various architectures (at least this is true for ARM and x86). Using __builtin_clz gcc intrinsic allows to eliminate innermost loop in scale factors calculation and improve performance. Also scale factors calculation can be optimized even more using SIMD instructions. --- sbc/sbc.c | 21 +++++---------------- sbc/sbc_primitives.c | 41 +++++++++++++++++++++++++++++++++++++++++ sbc/sbc_primitives.h | 4 ++++ 3 files changed, 50 insertions(+), 16 deletions(-) (limited to 'sbc') diff --git a/sbc/sbc.c b/sbc/sbc.c index 365ee1ff..8a2d7825 100644 --- a/sbc/sbc.c +++ b/sbc/sbc.c @@ -77,7 +77,7 @@ struct sbc_frame { uint8_t joint; /* only the lower 4 bits of every element are to be used */ - uint8_t scale_factor[2][8]; + uint32_t scale_factor[2][8]; /* raw integer subband samples in the frame */ int32_t SBC_ALIGNED sb_sample_f[16][2][8]; @@ -745,8 +745,6 @@ static SBC_ALWAYS_INLINE int sbc_pack_frame_internal( uint32_t levels[2][8]; /* levels are derived from that */ uint32_t sb_sample_delta[2][8]; - u_int32_t scalefactor[2][8]; /* derived from frame->scale_factor */ - data[0] = SBC_SYNCWORD; data[1] = (frame->frequency & 0x03) << 6; @@ -785,19 +783,6 @@ static SBC_ALWAYS_INLINE int sbc_pack_frame_internal( crc_header[1] = data[2]; crc_pos = 16; - for (ch = 0; ch < frame_channels; ch++) { - for (sb = 0; sb < frame_subbands; sb++) { - frame->scale_factor[ch][sb] = 0; - scalefactor[ch][sb] = 2 << SCALE_OUT_BITS; - for (blk = 0; blk < frame->blocks; blk++) { - while (scalefactor[ch][sb] < fabs(frame->sb_sample_f[blk][ch][sb])) { - frame->scale_factor[ch][sb]++; - scalefactor[ch][sb] *= 2; - } - } - } - } - if (frame->mode == JOINT_STEREO) { /* like frame->sb_sample but joint stereo */ int32_t sb_sample_j[16][2]; @@ -1115,6 +1100,10 @@ int sbc_encode(sbc_t *sbc, void *input, int input_len, void *output, samples = sbc_analyze_audio(&priv->enc_state, &priv->frame); + priv->enc_state.sbc_calc_scalefactors( + priv->frame.sb_sample_f, priv->frame.scale_factor, + priv->frame.blocks, priv->frame.channels, priv->frame.subbands); + framelen = sbc_pack_frame(output, &priv->frame, output_len); if (written) diff --git a/sbc/sbc_primitives.c b/sbc/sbc_primitives.c index 338feb96..303f3fee 100644 --- a/sbc/sbc_primitives.c +++ b/sbc/sbc_primitives.c @@ -401,6 +401,44 @@ static int sbc_enc_process_input_8s_be(int position, position, pcm, X, nsamples, 1, 1); } +/* Supplementary function to count the number of leading zeros */ + +static inline int sbc_clz(uint32_t x) +{ +#ifdef __GNUC__ + return __builtin_clz(x); +#else + /* TODO: this should be replaced with something better if good + * performance is wanted when using compilers other than gcc */ + int cnt = 0; + while (x) { + cnt++; + x >>= 1; + } + return 32 - cnt; +#endif +} + +static void sbc_calc_scalefactors( + int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int channels, int subbands) +{ + int ch, sb, blk; + for (ch = 0; ch < channels; ch++) { + for (sb = 0; sb < subbands; sb++) { + uint32_t x = 1 << SCALE_OUT_BITS; + for (blk = 0; blk < blocks; blk++) { + int32_t tmp = fabs(sb_sample_f[blk][ch][sb]); + if (tmp != 0) + x |= tmp - 1; + } + scale_factor[ch][sb] = (31 - SCALE_OUT_BITS) - + sbc_clz(x); + } + } +} + /* * Detect CPU features and setup function pointers */ @@ -416,6 +454,9 @@ void sbc_init_primitives(struct sbc_encoder_state *state) state->sbc_enc_process_input_8s_le = sbc_enc_process_input_8s_le; state->sbc_enc_process_input_8s_be = sbc_enc_process_input_8s_be; + /* Default implementation for scale factors calculation */ + state->sbc_calc_scalefactors = sbc_calc_scalefactors; + /* X86/AMD64 optimizations */ #ifdef SBC_BUILD_WITH_MMX_SUPPORT sbc_init_primitives_mmx(state); diff --git a/sbc/sbc_primitives.h b/sbc/sbc_primitives.h index 5b7c9acb..2708c829 100644 --- a/sbc/sbc_primitives.h +++ b/sbc/sbc_primitives.h @@ -58,6 +58,10 @@ struct sbc_encoder_state { int (*sbc_enc_process_input_8s_be)(int position, const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], int nsamples, int nchannels); + /* Scale factors calculation */ + void (*sbc_calc_scalefactors)(int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int channels, int subbands); }; /* -- cgit