From e4eb4670108ad2b4a0d9c3044e12ed0d933f834e Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 14 Mar 2011 14:46:10 -0300 Subject: build: move sbc related files to its own directory This should make it easier to apply patches from BlueZ which also uses sbc subdir for this files. --- src/modules/bluetooth/sbc/sbc.c | 1252 +++++++++++++++++++++++ src/modules/bluetooth/sbc/sbc.h | 111 ++ src/modules/bluetooth/sbc/sbc_math.h | 60 ++ src/modules/bluetooth/sbc/sbc_primitives.c | 470 +++++++++ src/modules/bluetooth/sbc/sbc_primitives.h | 75 ++ src/modules/bluetooth/sbc/sbc_primitives_mmx.c | 320 ++++++ src/modules/bluetooth/sbc/sbc_primitives_mmx.h | 40 + src/modules/bluetooth/sbc/sbc_primitives_neon.c | 246 +++++ src/modules/bluetooth/sbc/sbc_primitives_neon.h | 40 + src/modules/bluetooth/sbc/sbc_tables.h | 659 ++++++++++++ 10 files changed, 3273 insertions(+) create mode 100644 src/modules/bluetooth/sbc/sbc.c create mode 100644 src/modules/bluetooth/sbc/sbc.h create mode 100644 src/modules/bluetooth/sbc/sbc_math.h create mode 100644 src/modules/bluetooth/sbc/sbc_primitives.c create mode 100644 src/modules/bluetooth/sbc/sbc_primitives.h create mode 100644 src/modules/bluetooth/sbc/sbc_primitives_mmx.c create mode 100644 src/modules/bluetooth/sbc/sbc_primitives_mmx.h create mode 100644 src/modules/bluetooth/sbc/sbc_primitives_neon.c create mode 100644 src/modules/bluetooth/sbc/sbc_primitives_neon.h create mode 100644 src/modules/bluetooth/sbc/sbc_tables.h (limited to 'src/modules/bluetooth/sbc') diff --git a/src/modules/bluetooth/sbc/sbc.c b/src/modules/bluetooth/sbc/sbc.c new file mode 100644 index 00000000..5157c70f --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc.c @@ -0,0 +1,1252 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2008 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* todo items: + + use a log2 table for byte integer scale factors calculation (sum log2 results + for high and low bytes) fill bitpool by 16 bits instead of one at a time in + bits allocation/bitpool generation port to the dsp + +*/ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include +#include + +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc.h" +#include "sbc_primitives.h" + +#define SBC_SYNCWORD 0x9C + +/* This structure contains an unpacked SBC frame. + Yes, there is probably quite some unused space herein */ +struct sbc_frame { + uint8_t frequency; + uint8_t block_mode; + uint8_t blocks; + enum { + MONO = SBC_MODE_MONO, + DUAL_CHANNEL = SBC_MODE_DUAL_CHANNEL, + STEREO = SBC_MODE_STEREO, + JOINT_STEREO = SBC_MODE_JOINT_STEREO + } mode; + uint8_t channels; + enum { + LOUDNESS = SBC_AM_LOUDNESS, + SNR = SBC_AM_SNR + } allocation; + uint8_t subband_mode; + uint8_t subbands; + uint8_t bitpool; + uint16_t codesize; + uint8_t length; + + /* bit number x set means joint stereo has been used in subband x */ + uint8_t joint; + + /* only the lower 4 bits of every element are to be used */ + uint32_t scale_factor[2][8]; + + /* raw integer subband samples in the frame */ + int32_t SBC_ALIGNED sb_sample_f[16][2][8]; + + /* modified subband samples */ + int32_t SBC_ALIGNED sb_sample[16][2][8]; + + /* original pcm audio samples */ + int16_t SBC_ALIGNED pcm_sample[2][16*8]; +}; + +struct sbc_decoder_state { + int subbands; + int32_t V[2][170]; + int offset[2][16]; +}; + +/* + * Calculates the CRC-8 of the first len bits in data + */ +static const uint8_t crc_table[256] = { + 0x00, 0x1D, 0x3A, 0x27, 0x74, 0x69, 0x4E, 0x53, + 0xE8, 0xF5, 0xD2, 0xCF, 0x9C, 0x81, 0xA6, 0xBB, + 0xCD, 0xD0, 0xF7, 0xEA, 0xB9, 0xA4, 0x83, 0x9E, + 0x25, 0x38, 0x1F, 0x02, 0x51, 0x4C, 0x6B, 0x76, + 0x87, 0x9A, 0xBD, 0xA0, 0xF3, 0xEE, 0xC9, 0xD4, + 0x6F, 0x72, 0x55, 0x48, 0x1B, 0x06, 0x21, 0x3C, + 0x4A, 0x57, 0x70, 0x6D, 0x3E, 0x23, 0x04, 0x19, + 0xA2, 0xBF, 0x98, 0x85, 0xD6, 0xCB, 0xEC, 0xF1, + 0x13, 0x0E, 0x29, 0x34, 0x67, 0x7A, 0x5D, 0x40, + 0xFB, 0xE6, 0xC1, 0xDC, 0x8F, 0x92, 0xB5, 0xA8, + 0xDE, 0xC3, 0xE4, 0xF9, 0xAA, 0xB7, 0x90, 0x8D, + 0x36, 0x2B, 0x0C, 0x11, 0x42, 0x5F, 0x78, 0x65, + 0x94, 0x89, 0xAE, 0xB3, 0xE0, 0xFD, 0xDA, 0xC7, + 0x7C, 0x61, 0x46, 0x5B, 0x08, 0x15, 0x32, 0x2F, + 0x59, 0x44, 0x63, 0x7E, 0x2D, 0x30, 0x17, 0x0A, + 0xB1, 0xAC, 0x8B, 0x96, 0xC5, 0xD8, 0xFF, 0xE2, + 0x26, 0x3B, 0x1C, 0x01, 0x52, 0x4F, 0x68, 0x75, + 0xCE, 0xD3, 0xF4, 0xE9, 0xBA, 0xA7, 0x80, 0x9D, + 0xEB, 0xF6, 0xD1, 0xCC, 0x9F, 0x82, 0xA5, 0xB8, + 0x03, 0x1E, 0x39, 0x24, 0x77, 0x6A, 0x4D, 0x50, + 0xA1, 0xBC, 0x9B, 0x86, 0xD5, 0xC8, 0xEF, 0xF2, + 0x49, 0x54, 0x73, 0x6E, 0x3D, 0x20, 0x07, 0x1A, + 0x6C, 0x71, 0x56, 0x4B, 0x18, 0x05, 0x22, 0x3F, + 0x84, 0x99, 0xBE, 0xA3, 0xF0, 0xED, 0xCA, 0xD7, + 0x35, 0x28, 0x0F, 0x12, 0x41, 0x5C, 0x7B, 0x66, + 0xDD, 0xC0, 0xE7, 0xFA, 0xA9, 0xB4, 0x93, 0x8E, + 0xF8, 0xE5, 0xC2, 0xDF, 0x8C, 0x91, 0xB6, 0xAB, + 0x10, 0x0D, 0x2A, 0x37, 0x64, 0x79, 0x5E, 0x43, + 0xB2, 0xAF, 0x88, 0x95, 0xC6, 0xDB, 0xFC, 0xE1, + 0x5A, 0x47, 0x60, 0x7D, 0x2E, 0x33, 0x14, 0x09, + 0x7F, 0x62, 0x45, 0x58, 0x0B, 0x16, 0x31, 0x2C, + 0x97, 0x8A, 0xAD, 0xB0, 0xE3, 0xFE, 0xD9, 0xC4 +}; + +static uint8_t sbc_crc8(const uint8_t *data, size_t len) +{ + uint8_t crc = 0x0f; + size_t i; + uint8_t octet; + + for (i = 0; i < len / 8; i++) + crc = crc_table[crc ^ data[i]]; + + octet = data[i]; + for (i = 0; i < len % 8; i++) { + char bit = ((octet ^ crc) & 0x80) >> 7; + + crc = ((crc & 0x7f) << 1) ^ (bit ? 0x1d : 0); + + octet = octet << 1; + } + + return crc; +} + +/* + * Code straight from the spec to calculate the bits array + * Takes a pointer to the frame in question, a pointer to the bits array and + * the sampling frequency (as 2 bit integer) + */ +static void sbc_calculate_bits(const struct sbc_frame *frame, int (*bits)[8]) +{ + uint8_t sf = frame->frequency; + + if (frame->mode == MONO || frame->mode == DUAL_CHANNEL) { + int bitneed[2][8], loudness, max_bitneed, bitcount, slicecount, bitslice; + int ch, sb; + + for (ch = 0; ch < frame->channels; ch++) { + max_bitneed = 0; + if (frame->allocation == SNR) { + for (sb = 0; sb < frame->subbands; sb++) { + bitneed[ch][sb] = frame->scale_factor[ch][sb]; + if (bitneed[ch][sb] > max_bitneed) + max_bitneed = bitneed[ch][sb]; + } + } else { + for (sb = 0; sb < frame->subbands; sb++) { + if (frame->scale_factor[ch][sb] == 0) + bitneed[ch][sb] = -5; + else { + if (frame->subbands == 4) + loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb]; + else + loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb]; + if (loudness > 0) + bitneed[ch][sb] = loudness / 2; + else + bitneed[ch][sb] = loudness; + } + if (bitneed[ch][sb] > max_bitneed) + max_bitneed = bitneed[ch][sb]; + } + } + + bitcount = 0; + slicecount = 0; + bitslice = max_bitneed + 1; + do { + bitslice--; + bitcount += slicecount; + slicecount = 0; + for (sb = 0; sb < frame->subbands; sb++) { + if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16)) + slicecount++; + else if (bitneed[ch][sb] == bitslice + 1) + slicecount += 2; + } + } while (bitcount + slicecount < frame->bitpool); + + if (bitcount + slicecount == frame->bitpool) { + bitcount += slicecount; + bitslice--; + } + + for (sb = 0; sb < frame->subbands; sb++) { + if (bitneed[ch][sb] < bitslice + 2) + bits[ch][sb] = 0; + else { + bits[ch][sb] = bitneed[ch][sb] - bitslice; + if (bits[ch][sb] > 16) + bits[ch][sb] = 16; + } + } + + for (sb = 0; bitcount < frame->bitpool && sb < frame->subbands; sb++) { + if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) { + bits[ch][sb]++; + bitcount++; + } else if ((bitneed[ch][sb] == bitslice + 1) && (frame->bitpool > bitcount + 1)) { + bits[ch][sb] = 2; + bitcount += 2; + } + } + + for (sb = 0; bitcount < frame->bitpool && sb < frame->subbands; sb++) { + if (bits[ch][sb] < 16) { + bits[ch][sb]++; + bitcount++; + } + } + + } + + } else if (frame->mode == STEREO || frame->mode == JOINT_STEREO) { + int bitneed[2][8], loudness, max_bitneed, bitcount, slicecount, bitslice; + int ch, sb; + + max_bitneed = 0; + if (frame->allocation == SNR) { + for (ch = 0; ch < 2; ch++) { + for (sb = 0; sb < frame->subbands; sb++) { + bitneed[ch][sb] = frame->scale_factor[ch][sb]; + if (bitneed[ch][sb] > max_bitneed) + max_bitneed = bitneed[ch][sb]; + } + } + } else { + for (ch = 0; ch < 2; ch++) { + for (sb = 0; sb < frame->subbands; sb++) { + if (frame->scale_factor[ch][sb] == 0) + bitneed[ch][sb] = -5; + else { + if (frame->subbands == 4) + loudness = frame->scale_factor[ch][sb] - sbc_offset4[sf][sb]; + else + loudness = frame->scale_factor[ch][sb] - sbc_offset8[sf][sb]; + if (loudness > 0) + bitneed[ch][sb] = loudness / 2; + else + bitneed[ch][sb] = loudness; + } + if (bitneed[ch][sb] > max_bitneed) + max_bitneed = bitneed[ch][sb]; + } + } + } + + bitcount = 0; + slicecount = 0; + bitslice = max_bitneed + 1; + do { + bitslice--; + bitcount += slicecount; + slicecount = 0; + for (ch = 0; ch < 2; ch++) { + for (sb = 0; sb < frame->subbands; sb++) { + if ((bitneed[ch][sb] > bitslice + 1) && (bitneed[ch][sb] < bitslice + 16)) + slicecount++; + else if (bitneed[ch][sb] == bitslice + 1) + slicecount += 2; + } + } + } while (bitcount + slicecount < frame->bitpool); + + if (bitcount + slicecount == frame->bitpool) { + bitcount += slicecount; + bitslice--; + } + + for (ch = 0; ch < 2; ch++) { + for (sb = 0; sb < frame->subbands; sb++) { + if (bitneed[ch][sb] < bitslice + 2) { + bits[ch][sb] = 0; + } else { + bits[ch][sb] = bitneed[ch][sb] - bitslice; + if (bits[ch][sb] > 16) + bits[ch][sb] = 16; + } + } + } + + ch = 0; + sb = 0; + while (bitcount < frame->bitpool) { + if ((bits[ch][sb] >= 2) && (bits[ch][sb] < 16)) { + bits[ch][sb]++; + bitcount++; + } else if ((bitneed[ch][sb] == bitslice + 1) && (frame->bitpool > bitcount + 1)) { + bits[ch][sb] = 2; + bitcount += 2; + } + if (ch == 1) { + ch = 0; + sb++; + if (sb >= frame->subbands) break; + } else + ch = 1; + } + + ch = 0; + sb = 0; + while (bitcount < frame->bitpool) { + if (bits[ch][sb] < 16) { + bits[ch][sb]++; + bitcount++; + } + if (ch == 1) { + ch = 0; + sb++; + if (sb >= frame->subbands) break; + } else + ch = 1; + } + + } + +} + +/* + * Unpacks a SBC frame at the beginning of the stream in data, + * which has at most len bytes into frame. + * Returns the length in bytes of the packed frame, or a negative + * value on error. The error codes are: + * + * -1 Data stream too short + * -2 Sync byte incorrect + * -3 CRC8 incorrect + * -4 Bitpool value out of bounds + */ +static int sbc_unpack_frame(const uint8_t *data, struct sbc_frame *frame, + size_t len) +{ + unsigned int consumed; + /* Will copy the parts of the header that are relevant to crc + * calculation here */ + uint8_t crc_header[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + int crc_pos = 0; + int32_t temp; + + int audio_sample; + int ch, sb, blk, bit; /* channel, subband, block and bit standard + counters */ + int bits[2][8]; /* bits distribution */ + uint32_t levels[2][8]; /* levels derived from that */ + + if (len < 4) + return -1; + + if (data[0] != SBC_SYNCWORD) + return -2; + + frame->frequency = (data[1] >> 6) & 0x03; + + frame->block_mode = (data[1] >> 4) & 0x03; + switch (frame->block_mode) { + case SBC_BLK_4: + frame->blocks = 4; + break; + case SBC_BLK_8: + frame->blocks = 8; + break; + case SBC_BLK_12: + frame->blocks = 12; + break; + case SBC_BLK_16: + frame->blocks = 16; + break; + } + + frame->mode = (data[1] >> 2) & 0x03; + switch (frame->mode) { + case MONO: + frame->channels = 1; + break; + case DUAL_CHANNEL: /* fall-through */ + case STEREO: + case JOINT_STEREO: + frame->channels = 2; + break; + } + + frame->allocation = (data[1] >> 1) & 0x01; + + frame->subband_mode = (data[1] & 0x01); + frame->subbands = frame->subband_mode ? 8 : 4; + + frame->bitpool = data[2]; + + if ((frame->mode == MONO || frame->mode == DUAL_CHANNEL) && + frame->bitpool > 16 * frame->subbands) + return -4; + + if ((frame->mode == STEREO || frame->mode == JOINT_STEREO) && + frame->bitpool > 32 * frame->subbands) + return -4; + + /* data[3] is crc, we're checking it later */ + + consumed = 32; + + crc_header[0] = data[1]; + crc_header[1] = data[2]; + crc_pos = 16; + + if (frame->mode == JOINT_STEREO) { + if (len * 8 < consumed + frame->subbands) + return -1; + + frame->joint = 0x00; + for (sb = 0; sb < frame->subbands - 1; sb++) + frame->joint |= ((data[4] >> (7 - sb)) & 0x01) << sb; + if (frame->subbands == 4) + crc_header[crc_pos / 8] = data[4] & 0xf0; + else + crc_header[crc_pos / 8] = data[4]; + + consumed += frame->subbands; + crc_pos += frame->subbands; + } + + if (len * 8 < consumed + (4 * frame->subbands * frame->channels)) + return -1; + + for (ch = 0; ch < frame->channels; ch++) { + for (sb = 0; sb < frame->subbands; sb++) { + /* FIXME assert(consumed % 4 == 0); */ + frame->scale_factor[ch][sb] = + (data[consumed >> 3] >> (4 - (consumed & 0x7))) & 0x0F; + crc_header[crc_pos >> 3] |= + frame->scale_factor[ch][sb] << (4 - (crc_pos & 0x7)); + + consumed += 4; + crc_pos += 4; + } + } + + if (data[3] != sbc_crc8(crc_header, crc_pos)) + return -3; + + sbc_calculate_bits(frame, bits); + + for (ch = 0; ch < frame->channels; ch++) { + for (sb = 0; sb < frame->subbands; sb++) + levels[ch][sb] = (1 << bits[ch][sb]) - 1; + } + + for (blk = 0; blk < frame->blocks; blk++) { + for (ch = 0; ch < frame->channels; ch++) { + for (sb = 0; sb < frame->subbands; sb++) { + if (levels[ch][sb] > 0) { + audio_sample = 0; + for (bit = 0; bit < bits[ch][sb]; bit++) { + if (consumed > len * 8) + return -1; + + if ((data[consumed >> 3] >> (7 - (consumed & 0x7))) & 0x01) + audio_sample |= 1 << (bits[ch][sb] - bit - 1); + + consumed++; + } + + frame->sb_sample[blk][ch][sb] = + (((audio_sample << 1) | 1) << frame->scale_factor[ch][sb]) / + levels[ch][sb] - (1 << frame->scale_factor[ch][sb]); + } else + frame->sb_sample[blk][ch][sb] = 0; + } + } + } + + if (frame->mode == JOINT_STEREO) { + for (blk = 0; blk < frame->blocks; blk++) { + for (sb = 0; sb < frame->subbands; sb++) { + if (frame->joint & (0x01 << sb)) { + temp = frame->sb_sample[blk][0][sb] + + frame->sb_sample[blk][1][sb]; + frame->sb_sample[blk][1][sb] = + frame->sb_sample[blk][0][sb] - + frame->sb_sample[blk][1][sb]; + frame->sb_sample[blk][0][sb] = temp; + } + } + } + } + + if ((consumed & 0x7) != 0) + consumed += 8 - (consumed & 0x7); + + return consumed >> 3; +} + +static void sbc_decoder_init(struct sbc_decoder_state *state, + const struct sbc_frame *frame) +{ + int i, ch; + + memset(state->V, 0, sizeof(state->V)); + state->subbands = frame->subbands; + + for (ch = 0; ch < 2; ch++) + for (i = 0; i < frame->subbands * 2; i++) + state->offset[ch][i] = (10 * i + 10); +} + +static inline void sbc_synthesize_four(struct sbc_decoder_state *state, + struct sbc_frame *frame, int ch, int blk) +{ + int i, k, idx; + int32_t *v = state->V[ch]; + int *offset = state->offset[ch]; + + for (i = 0; i < 8; i++) { + /* Shifting */ + offset[i]--; + if (offset[i] < 0) { + offset[i] = 79; + memcpy(v + 80, v, 9 * sizeof(*v)); + } + + /* Distribute the new matrix value to the shifted position */ + v[offset[i]] = SCALE4_STAGED1( + MULA(synmatrix4[i][0], frame->sb_sample[blk][ch][0], + MULA(synmatrix4[i][1], frame->sb_sample[blk][ch][1], + MULA(synmatrix4[i][2], frame->sb_sample[blk][ch][2], + MUL (synmatrix4[i][3], frame->sb_sample[blk][ch][3]))))); + } + + /* Compute the samples */ + for (idx = 0, i = 0; i < 4; i++, idx += 5) { + k = (i + 4) & 0xf; + + /* Store in output, Q0 */ + frame->pcm_sample[ch][blk * 4 + i] = SCALE4_STAGED1( + MULA(v[offset[i] + 0], sbc_proto_4_40m0[idx + 0], + MULA(v[offset[k] + 1], sbc_proto_4_40m1[idx + 0], + MULA(v[offset[i] + 2], sbc_proto_4_40m0[idx + 1], + MULA(v[offset[k] + 3], sbc_proto_4_40m1[idx + 1], + MULA(v[offset[i] + 4], sbc_proto_4_40m0[idx + 2], + MULA(v[offset[k] + 5], sbc_proto_4_40m1[idx + 2], + MULA(v[offset[i] + 6], sbc_proto_4_40m0[idx + 3], + MULA(v[offset[k] + 7], sbc_proto_4_40m1[idx + 3], + MULA(v[offset[i] + 8], sbc_proto_4_40m0[idx + 4], + MUL( v[offset[k] + 9], sbc_proto_4_40m1[idx + 4]))))))))))); + } +} + +static inline void sbc_synthesize_eight(struct sbc_decoder_state *state, + struct sbc_frame *frame, int ch, int blk) +{ + int i, j, k, idx; + int *offset = state->offset[ch]; + + for (i = 0; i < 16; i++) { + /* Shifting */ + offset[i]--; + if (offset[i] < 0) { + offset[i] = 159; + for (j = 0; j < 9; j++) + state->V[ch][j + 160] = state->V[ch][j]; + } + + /* Distribute the new matrix value to the shifted position */ + state->V[ch][offset[i]] = SCALE8_STAGED1( + MULA(synmatrix8[i][0], frame->sb_sample[blk][ch][0], + MULA(synmatrix8[i][1], frame->sb_sample[blk][ch][1], + MULA(synmatrix8[i][2], frame->sb_sample[blk][ch][2], + MULA(synmatrix8[i][3], frame->sb_sample[blk][ch][3], + MULA(synmatrix8[i][4], frame->sb_sample[blk][ch][4], + MULA(synmatrix8[i][5], frame->sb_sample[blk][ch][5], + MULA(synmatrix8[i][6], frame->sb_sample[blk][ch][6], + MUL( synmatrix8[i][7], frame->sb_sample[blk][ch][7]))))))))); + } + + /* Compute the samples */ + for (idx = 0, i = 0; i < 8; i++, idx += 5) { + k = (i + 8) & 0xf; + + /* Store in output */ + frame->pcm_sample[ch][blk * 8 + i] = SCALE8_STAGED1( // Q0 + MULA(state->V[ch][offset[i] + 0], sbc_proto_8_80m0[idx + 0], + MULA(state->V[ch][offset[k] + 1], sbc_proto_8_80m1[idx + 0], + MULA(state->V[ch][offset[i] + 2], sbc_proto_8_80m0[idx + 1], + MULA(state->V[ch][offset[k] + 3], sbc_proto_8_80m1[idx + 1], + MULA(state->V[ch][offset[i] + 4], sbc_proto_8_80m0[idx + 2], + MULA(state->V[ch][offset[k] + 5], sbc_proto_8_80m1[idx + 2], + MULA(state->V[ch][offset[i] + 6], sbc_proto_8_80m0[idx + 3], + MULA(state->V[ch][offset[k] + 7], sbc_proto_8_80m1[idx + 3], + MULA(state->V[ch][offset[i] + 8], sbc_proto_8_80m0[idx + 4], + MUL( state->V[ch][offset[k] + 9], sbc_proto_8_80m1[idx + 4]))))))))))); + } +} + +static int sbc_synthesize_audio(struct sbc_decoder_state *state, + struct sbc_frame *frame) +{ + int ch, blk; + + switch (frame->subbands) { + case 4: + for (ch = 0; ch < frame->channels; ch++) { + for (blk = 0; blk < frame->blocks; blk++) + sbc_synthesize_four(state, frame, ch, blk); + } + return frame->blocks * 4; + + case 8: + for (ch = 0; ch < frame->channels; ch++) { + for (blk = 0; blk < frame->blocks; blk++) + sbc_synthesize_eight(state, frame, ch, blk); + } + return frame->blocks * 8; + + default: + return -EIO; + } +} + +static int sbc_analyze_audio(struct sbc_encoder_state *state, + struct sbc_frame *frame) +{ + int ch, blk; + int16_t *x; + + switch (frame->subbands) { + case 4: + for (ch = 0; ch < frame->channels; ch++) { + x = &state->X[ch][state->position - 16 + + frame->blocks * 4]; + for (blk = 0; blk < frame->blocks; blk += 4) { + state->sbc_analyze_4b_4s( + x, + frame->sb_sample_f[blk][ch], + frame->sb_sample_f[blk + 1][ch] - + frame->sb_sample_f[blk][ch]); + x -= 16; + } + } + return frame->blocks * 4; + + case 8: + for (ch = 0; ch < frame->channels; ch++) { + x = &state->X[ch][state->position - 32 + + frame->blocks * 8]; + for (blk = 0; blk < frame->blocks; blk += 4) { + state->sbc_analyze_4b_8s( + x, + frame->sb_sample_f[blk][ch], + frame->sb_sample_f[blk + 1][ch] - + frame->sb_sample_f[blk][ch]); + x -= 32; + } + } + return frame->blocks * 8; + + default: + return -EIO; + } +} + +/* Supplementary bitstream writing macros for 'sbc_pack_frame' */ + +#define PUT_BITS(data_ptr, bits_cache, bits_count, v, n) \ + do { \ + bits_cache = (v) | (bits_cache << (n)); \ + bits_count += (n); \ + if (bits_count >= 16) { \ + bits_count -= 8; \ + *data_ptr++ = (uint8_t) \ + (bits_cache >> bits_count); \ + bits_count -= 8; \ + *data_ptr++ = (uint8_t) \ + (bits_cache >> bits_count); \ + } \ + } while (0) + +#define FLUSH_BITS(data_ptr, bits_cache, bits_count) \ + do { \ + while (bits_count >= 8) { \ + bits_count -= 8; \ + *data_ptr++ = (uint8_t) \ + (bits_cache >> bits_count); \ + } \ + if (bits_count > 0) \ + *data_ptr++ = (uint8_t) \ + (bits_cache << (8 - bits_count)); \ + } while (0) + +/* + * Packs the SBC frame from frame into the memory at data. At most len + * bytes will be used, should more memory be needed an appropriate + * error code will be returned. Returns the length of the packed frame + * on success or a negative value on error. + * + * The error codes are: + * -1 Not enough memory reserved + * -2 Unsupported sampling rate + * -3 Unsupported number of blocks + * -4 Unsupported number of subbands + * -5 Bitpool value out of bounds + * -99 not implemented + */ + +static SBC_ALWAYS_INLINE int sbc_pack_frame_internal( + uint8_t *data, struct sbc_frame *frame, size_t len, + int frame_subbands, int frame_channels) +{ + /* Bitstream writer starts from the fourth byte */ + uint8_t *data_ptr = data + 4; + uint32_t bits_cache = 0; + uint32_t bits_count = 0; + + /* Will copy the header parts for CRC-8 calculation here */ + uint8_t crc_header[11] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + int crc_pos = 0; + + uint32_t audio_sample; + + int ch, sb, blk; /* channel, subband, block and bit counters */ + int bits[2][8]; /* bits distribution */ + uint32_t levels[2][8]; /* levels are derived from that */ + uint32_t sb_sample_delta[2][8]; + + data[0] = SBC_SYNCWORD; + + data[1] = (frame->frequency & 0x03) << 6; + + data[1] |= (frame->block_mode & 0x03) << 4; + + data[1] |= (frame->mode & 0x03) << 2; + + data[1] |= (frame->allocation & 0x01) << 1; + + switch (frame_subbands) { + case 4: + /* Nothing to do */ + break; + case 8: + data[1] |= 0x01; + break; + default: + return -4; + break; + } + + data[2] = frame->bitpool; + + if ((frame->mode == MONO || frame->mode == DUAL_CHANNEL) && + frame->bitpool > frame_subbands << 4) + return -5; + + if ((frame->mode == STEREO || frame->mode == JOINT_STEREO) && + frame->bitpool > frame_subbands << 5) + return -5; + + /* Can't fill in crc yet */ + + crc_header[0] = data[1]; + crc_header[1] = data[2]; + crc_pos = 16; + + if (frame->mode == JOINT_STEREO) { + /* like frame->sb_sample but joint stereo */ + int32_t sb_sample_j[16][2]; + /* scalefactor and scale_factor in joint case */ + uint32_t scalefactor_j[2]; + uint8_t scale_factor_j[2]; + + uint8_t joint = 0; + frame->joint = 0; + + for (sb = 0; sb < frame_subbands - 1; sb++) { + scale_factor_j[0] = 0; + scalefactor_j[0] = 2 << SCALE_OUT_BITS; + scale_factor_j[1] = 0; + scalefactor_j[1] = 2 << SCALE_OUT_BITS; + + for (blk = 0; blk < frame->blocks; blk++) { + uint32_t tmp; + /* Calculate joint stereo signal */ + sb_sample_j[blk][0] = + ASR(frame->sb_sample_f[blk][0][sb], 1) + + ASR(frame->sb_sample_f[blk][1][sb], 1); + sb_sample_j[blk][1] = + ASR(frame->sb_sample_f[blk][0][sb], 1) - + ASR(frame->sb_sample_f[blk][1][sb], 1); + + /* calculate scale_factor_j and scalefactor_j for joint case */ + tmp = fabs(sb_sample_j[blk][0]); + while (scalefactor_j[0] < tmp) { + scale_factor_j[0]++; + scalefactor_j[0] *= 2; + } + tmp = fabs(sb_sample_j[blk][1]); + while (scalefactor_j[1] < tmp) { + scale_factor_j[1]++; + scalefactor_j[1] *= 2; + } + } + + /* decide whether to join this subband */ + if ((frame->scale_factor[0][sb] + + frame->scale_factor[1][sb]) > + (scale_factor_j[0] + + scale_factor_j[1])) { + /* use joint stereo for this subband */ + joint |= 1 << (frame_subbands - 1 - sb); + frame->joint |= 1 << sb; + frame->scale_factor[0][sb] = scale_factor_j[0]; + frame->scale_factor[1][sb] = scale_factor_j[1]; + for (blk = 0; blk < frame->blocks; blk++) { + frame->sb_sample_f[blk][0][sb] = + sb_sample_j[blk][0]; + frame->sb_sample_f[blk][1][sb] = + sb_sample_j[blk][1]; + } + } + } + + PUT_BITS(data_ptr, bits_cache, bits_count, + joint, frame_subbands); + crc_header[crc_pos >> 3] = joint; + crc_pos += frame_subbands; + } + + for (ch = 0; ch < frame_channels; ch++) { + for (sb = 0; sb < frame_subbands; sb++) { + PUT_BITS(data_ptr, bits_cache, bits_count, + frame->scale_factor[ch][sb] & 0x0F, 4); + crc_header[crc_pos >> 3] <<= 4; + crc_header[crc_pos >> 3] |= frame->scale_factor[ch][sb] & 0x0F; + crc_pos += 4; + } + } + + /* align the last crc byte */ + if (crc_pos % 8) + crc_header[crc_pos >> 3] <<= 8 - (crc_pos % 8); + + data[3] = sbc_crc8(crc_header, crc_pos); + + sbc_calculate_bits(frame, bits); + + for (ch = 0; ch < frame_channels; ch++) { + for (sb = 0; sb < frame_subbands; sb++) { + levels[ch][sb] = ((1 << bits[ch][sb]) - 1) << + (32 - (frame->scale_factor[ch][sb] + + SCALE_OUT_BITS + 2)); + sb_sample_delta[ch][sb] = (uint32_t) 1 << + (frame->scale_factor[ch][sb] + + SCALE_OUT_BITS + 1); + } + } + + for (blk = 0; blk < frame->blocks; blk++) { + for (ch = 0; ch < frame_channels; ch++) { + for (sb = 0; sb < frame_subbands; sb++) { + + if (bits[ch][sb] == 0) + continue; + + audio_sample = ((uint64_t) levels[ch][sb] * + (sb_sample_delta[ch][sb] + + frame->sb_sample_f[blk][ch][sb])) >> 32; + + PUT_BITS(data_ptr, bits_cache, bits_count, + audio_sample, bits[ch][sb]); + } + } + } + + FLUSH_BITS(data_ptr, bits_cache, bits_count); + + return data_ptr - data; +} + +static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len) +{ + if (frame->subbands == 4) { + if (frame->channels == 1) + return sbc_pack_frame_internal(data, frame, len, 4, 1); + else + return sbc_pack_frame_internal(data, frame, len, 4, 2); + } else { + if (frame->channels == 1) + return sbc_pack_frame_internal(data, frame, len, 8, 1); + else + return sbc_pack_frame_internal(data, frame, len, 8, 2); + } +} + +static void sbc_encoder_init(struct sbc_encoder_state *state, + const struct sbc_frame *frame) +{ + memset(&state->X, 0, sizeof(state->X)); + state->position = SBC_X_BUFFER_SIZE - frame->subbands * 9; + + sbc_init_primitives(state); +} + +struct sbc_priv { + int init; + struct SBC_ALIGNED sbc_frame frame; + struct SBC_ALIGNED sbc_decoder_state dec_state; + struct SBC_ALIGNED sbc_encoder_state enc_state; +}; + +static void sbc_set_defaults(sbc_t *sbc, unsigned long flags) +{ + sbc->frequency = SBC_FREQ_44100; + sbc->mode = SBC_MODE_STEREO; + sbc->subbands = SBC_SB_8; + sbc->blocks = SBC_BLK_16; + sbc->bitpool = 32; +#if __BYTE_ORDER == __LITTLE_ENDIAN + sbc->endian = SBC_LE; +#elif __BYTE_ORDER == __BIG_ENDIAN + sbc->endian = SBC_BE; +#else +#error "Unknown byte order" +#endif +} + +int sbc_init(sbc_t *sbc, unsigned long flags) +{ + if (!sbc) + return -EIO; + + memset(sbc, 0, sizeof(sbc_t)); + + sbc->priv_alloc_base = malloc(sizeof(struct sbc_priv) + SBC_ALIGN_MASK); + if (!sbc->priv_alloc_base) + return -ENOMEM; + + sbc->priv = (void *) (((uintptr_t) sbc->priv_alloc_base + + SBC_ALIGN_MASK) & ~((uintptr_t) SBC_ALIGN_MASK)); + + memset(sbc->priv, 0, sizeof(struct sbc_priv)); + + sbc_set_defaults(sbc, flags); + + return 0; +} + +ssize_t sbc_parse(sbc_t *sbc, const void *input, size_t input_len) +{ + return sbc_decode(sbc, input, input_len, NULL, 0, NULL); +} + +ssize_t sbc_decode(sbc_t *sbc, const void *input, size_t input_len, + void *output, size_t output_len, size_t *written) +{ + struct sbc_priv *priv; + char *ptr; + int i, ch, framelen, samples; + + if (!sbc || !input) + return -EIO; + + priv = sbc->priv; + + framelen = sbc_unpack_frame(input, &priv->frame, input_len); + + if (!priv->init) { + sbc_decoder_init(&priv->dec_state, &priv->frame); + priv->init = 1; + + sbc->frequency = priv->frame.frequency; + sbc->mode = priv->frame.mode; + sbc->subbands = priv->frame.subband_mode; + sbc->blocks = priv->frame.block_mode; + sbc->allocation = priv->frame.allocation; + sbc->bitpool = priv->frame.bitpool; + + priv->frame.codesize = sbc_get_codesize(sbc); + priv->frame.length = framelen; + } else if (priv->frame.bitpool != sbc->bitpool) + sbc->bitpool = priv->frame.bitpool; + + if (!output) + return framelen; + + if (written) + *written = 0; + + if (framelen <= 0) + return framelen; + + samples = sbc_synthesize_audio(&priv->dec_state, &priv->frame); + + ptr = output; + + if (output_len < (size_t) (samples * priv->frame.channels * 2)) + samples = output_len / (priv->frame.channels * 2); + + for (i = 0; i < samples; i++) { + for (ch = 0; ch < priv->frame.channels; ch++) { + int16_t s; + s = priv->frame.pcm_sample[ch][i]; + + if (sbc->endian == SBC_BE) { + *ptr++ = (s & 0xff00) >> 8; + *ptr++ = (s & 0x00ff); + } else { + *ptr++ = (s & 0x00ff); + *ptr++ = (s & 0xff00) >> 8; + } + } + } + + if (written) + *written = samples * priv->frame.channels * 2; + + return framelen; +} + +ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len, + void *output, size_t output_len, size_t *written) +{ + struct sbc_priv *priv; + int framelen, samples; + int (*sbc_enc_process_input)(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels); + + if (!sbc || !input) + return -EIO; + + priv = sbc->priv; + + if (written) + *written = 0; + + if (!priv->init) { + priv->frame.frequency = sbc->frequency; + priv->frame.mode = sbc->mode; + priv->frame.channels = sbc->mode == SBC_MODE_MONO ? 1 : 2; + priv->frame.allocation = sbc->allocation; + priv->frame.subband_mode = sbc->subbands; + priv->frame.subbands = sbc->subbands ? 8 : 4; + priv->frame.block_mode = sbc->blocks; + priv->frame.blocks = 4 + (sbc->blocks * 4); + priv->frame.bitpool = sbc->bitpool; + priv->frame.codesize = sbc_get_codesize(sbc); + priv->frame.length = sbc_get_frame_length(sbc); + + sbc_encoder_init(&priv->enc_state, &priv->frame); + priv->init = 1; + } else if (priv->frame.bitpool != sbc->bitpool) { + priv->frame.length = sbc_get_frame_length(sbc); + priv->frame.bitpool = sbc->bitpool; + } + + /* input must be large enough to encode a complete frame */ + if (input_len < priv->frame.codesize) + return 0; + + /* output must be large enough to receive the encoded frame */ + if (!output || output_len < priv->frame.length) + return -ENOSPC; + + /* Select the needed input data processing function and call it */ + if (priv->frame.subbands == 8) { + if (sbc->endian == SBC_BE) + sbc_enc_process_input = + priv->enc_state.sbc_enc_process_input_8s_be; + else + sbc_enc_process_input = + priv->enc_state.sbc_enc_process_input_8s_le; + } else { + if (sbc->endian == SBC_BE) + sbc_enc_process_input = + priv->enc_state.sbc_enc_process_input_4s_be; + else + sbc_enc_process_input = + priv->enc_state.sbc_enc_process_input_4s_le; + } + + priv->enc_state.position = sbc_enc_process_input( + priv->enc_state.position, (const uint8_t *) input, + priv->enc_state.X, priv->frame.subbands * priv->frame.blocks, + priv->frame.channels); + + samples = sbc_analyze_audio(&priv->enc_state, &priv->frame); + + priv->enc_state.sbc_calc_scalefactors( + priv->frame.sb_sample_f, priv->frame.scale_factor, + priv->frame.blocks, priv->frame.channels, priv->frame.subbands); + + framelen = sbc_pack_frame(output, &priv->frame, output_len); + + if (written) + *written = framelen; + + return samples * priv->frame.channels * 2; +} + +void sbc_finish(sbc_t *sbc) +{ + if (!sbc) + return; + + if (sbc->priv_alloc_base) + free(sbc->priv_alloc_base); + + memset(sbc, 0, sizeof(sbc_t)); +} + +size_t sbc_get_frame_length(sbc_t *sbc) +{ + size_t ret; + uint8_t subbands, channels, blocks, joint, bitpool; + struct sbc_priv *priv; + + priv = sbc->priv; + if (priv->init && priv->frame.bitpool == sbc->bitpool) + return priv->frame.length; + + subbands = sbc->subbands ? 8 : 4; + blocks = 4 + (sbc->blocks * 4); + channels = sbc->mode == SBC_MODE_MONO ? 1 : 2; + joint = sbc->mode == SBC_MODE_JOINT_STEREO ? 1 : 0; + bitpool = sbc->bitpool; + + ret = 4 + (4 * subbands * channels) / 8; + /* This term is not always evenly divide so we round it up */ + if (channels == 1) + ret += ((blocks * channels * bitpool) + 7) / 8; + else + ret += (((joint ? subbands : 0) + blocks * bitpool) + 7) / 8; + + return ret; +} + +unsigned sbc_get_frame_duration(sbc_t *sbc) +{ + uint8_t subbands, blocks; + uint16_t frequency; + struct sbc_priv *priv; + + priv = sbc->priv; + if (!priv->init) { + subbands = sbc->subbands ? 8 : 4; + blocks = 4 + (sbc->blocks * 4); + } else { + subbands = priv->frame.subbands; + blocks = priv->frame.blocks; + } + + switch (sbc->frequency) { + case SBC_FREQ_16000: + frequency = 16000; + break; + + case SBC_FREQ_32000: + frequency = 32000; + break; + + case SBC_FREQ_44100: + frequency = 44100; + break; + + case SBC_FREQ_48000: + frequency = 48000; + break; + default: + return 0; + } + + return (1000000 * blocks * subbands) / frequency; +} + +size_t sbc_get_codesize(sbc_t *sbc) +{ + uint16_t subbands, channels, blocks; + struct sbc_priv *priv; + + priv = sbc->priv; + if (!priv->init) { + subbands = sbc->subbands ? 8 : 4; + blocks = 4 + (sbc->blocks * 4); + channels = sbc->mode == SBC_MODE_MONO ? 1 : 2; + } else { + subbands = priv->frame.subbands; + blocks = priv->frame.blocks; + channels = priv->frame.channels; + } + + return subbands * blocks * channels * 2; +} + +const char *sbc_get_implementation_info(sbc_t *sbc) +{ + struct sbc_priv *priv; + + if (!sbc) + return NULL; + + priv = sbc->priv; + if (!priv) + return NULL; + + return priv->enc_state.implementation_info; +} + +int sbc_reinit(sbc_t *sbc, unsigned long flags) +{ + struct sbc_priv *priv; + + if (!sbc || !sbc->priv) + return -EIO; + + priv = sbc->priv; + + if (priv->init == 1) + memset(sbc->priv, 0, sizeof(struct sbc_priv)); + + sbc_set_defaults(sbc, flags); + + return 0; +} diff --git a/src/modules/bluetooth/sbc/sbc.h b/src/modules/bluetooth/sbc/sbc.h new file mode 100644 index 00000000..65435884 --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc.h @@ -0,0 +1,111 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_H +#define __SBC_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/* sampling frequency */ +#define SBC_FREQ_16000 0x00 +#define SBC_FREQ_32000 0x01 +#define SBC_FREQ_44100 0x02 +#define SBC_FREQ_48000 0x03 + +/* blocks */ +#define SBC_BLK_4 0x00 +#define SBC_BLK_8 0x01 +#define SBC_BLK_12 0x02 +#define SBC_BLK_16 0x03 + +/* channel mode */ +#define SBC_MODE_MONO 0x00 +#define SBC_MODE_DUAL_CHANNEL 0x01 +#define SBC_MODE_STEREO 0x02 +#define SBC_MODE_JOINT_STEREO 0x03 + +/* allocation method */ +#define SBC_AM_LOUDNESS 0x00 +#define SBC_AM_SNR 0x01 + +/* subbands */ +#define SBC_SB_4 0x00 +#define SBC_SB_8 0x01 + +/* Data endianess */ +#define SBC_LE 0x00 +#define SBC_BE 0x01 + +struct sbc_struct { + unsigned long flags; + + uint8_t frequency; + uint8_t blocks; + uint8_t subbands; + uint8_t mode; + uint8_t allocation; + uint8_t bitpool; + uint8_t endian; + + void *priv; + void *priv_alloc_base; +}; + +typedef struct sbc_struct sbc_t; + +int sbc_init(sbc_t *sbc, unsigned long flags); +int sbc_reinit(sbc_t *sbc, unsigned long flags); + +ssize_t sbc_parse(sbc_t *sbc, const void *input, size_t input_len); + +ssize_t sbc_decode(sbc_t *sbc, const void *input, size_t input_len, + void *output, size_t output_len, size_t *written); + +/* Encodes ONE input block into ONE output block */ +ssize_t sbc_encode(sbc_t *sbc, const void *input, size_t input_len, + void *output, size_t output_len, size_t *written); + +/* Returns the output block size in bytes */ +size_t sbc_get_frame_length(sbc_t *sbc); + +/* Returns the time one input/output block takes to play in msec*/ +unsigned sbc_get_frame_duration(sbc_t *sbc); + +/* Returns the input block size in bytes */ +size_t sbc_get_codesize(sbc_t *sbc); + +const char *sbc_get_implementation_info(sbc_t *sbc); +void sbc_finish(sbc_t *sbc); + +#ifdef __cplusplus +} +#endif + +#endif /* __SBC_H */ diff --git a/src/modules/bluetooth/sbc/sbc_math.h b/src/modules/bluetooth/sbc/sbc_math.h new file mode 100644 index 00000000..b87bc81c --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_math.h @@ -0,0 +1,60 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2008 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#define fabs(x) ((x) < 0 ? -(x) : (x)) +/* C does not provide an explicit arithmetic shift right but this will + always be correct and every compiler *should* generate optimal code */ +#define ASR(val, bits) ((-2 >> 1 == -1) ? \ + ((int32_t)(val)) >> (bits) : ((int32_t) (val)) / (1 << (bits))) + +#define SCALE_SPROTO4_TBL 12 +#define SCALE_SPROTO8_TBL 14 +#define SCALE_NPROTO4_TBL 11 +#define SCALE_NPROTO8_TBL 11 +#define SCALE4_STAGED1_BITS 15 +#define SCALE4_STAGED2_BITS 16 +#define SCALE8_STAGED1_BITS 15 +#define SCALE8_STAGED2_BITS 16 + +typedef int32_t sbc_fixed_t; + +#define SCALE4_STAGED1(src) ASR(src, SCALE4_STAGED1_BITS) +#define SCALE4_STAGED2(src) ASR(src, SCALE4_STAGED2_BITS) +#define SCALE8_STAGED1(src) ASR(src, SCALE8_STAGED1_BITS) +#define SCALE8_STAGED2(src) ASR(src, SCALE8_STAGED2_BITS) + +#define SBC_FIXED_0(val) { val = 0; } +#define MUL(a, b) ((a) * (b)) +#ifdef __arm__ +#define MULA(a, b, res) ({ \ + int tmp = res; \ + __asm__( \ + "mla %0, %2, %3, %0" \ + : "=&r" (tmp) \ + : "0" (tmp), "r" (a), "r" (b)); \ + tmp; }) +#else +#define MULA(a, b, res) ((a) * (b) + (res)) +#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives.c b/src/modules/bluetooth/sbc/sbc_primitives.c new file mode 100644 index 00000000..6b0be3f5 --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives.c @@ -0,0 +1,470 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include "sbc.h" +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc_primitives.h" +#include "sbc_primitives_mmx.h" +#include "sbc_primitives_neon.h" + +/* + * A reference C code of analysis filter with SIMD-friendly tables + * reordering and code layout. This code can be used to develop platform + * specific SIMD optimizations. Also it may be used as some kind of test + * for compiler autovectorization capabilities (who knows, if the compiler + * is very good at this stuff, hand optimized assembly may be not strictly + * needed for some platform). + * + * Note: It is also possible to make a simple variant of analysis filter, + * which needs only a single constants table without taking care about + * even/odd cases. This simple variant of filter can be implemented without + * input data permutation. The only thing that would be lost is the + * possibility to use pairwise SIMD multiplications. But for some simple + * CPU cores without SIMD extensions it can be useful. If anybody is + * interested in implementing such variant of a filter, sourcecode from + * bluez versions 4.26/4.27 can be used as a reference and the history of + * the changes in git repository done around that time may be worth checking. + */ + +static inline void sbc_analyze_four_simd(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + FIXED_A t1[4]; + FIXED_T t2[4]; + int hop = 0; + + /* rounding coefficient */ + t1[0] = t1[1] = t1[2] = t1[3] = + (FIXED_A) 1 << (SBC_PROTO_FIXED4_SCALE - 1); + + /* low pass polyphase filter */ + for (hop = 0; hop < 40; hop += 8) { + t1[0] += (FIXED_A) in[hop] * consts[hop]; + t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; + t1[1] += (FIXED_A) in[hop + 2] * consts[hop + 2]; + t1[1] += (FIXED_A) in[hop + 3] * consts[hop + 3]; + t1[2] += (FIXED_A) in[hop + 4] * consts[hop + 4]; + t1[2] += (FIXED_A) in[hop + 5] * consts[hop + 5]; + t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6]; + t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7]; + } + + /* scaling */ + t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE; + t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE; + t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE; + t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE; + + /* do the cos transform */ + t1[0] = (FIXED_A) t2[0] * consts[40 + 0]; + t1[0] += (FIXED_A) t2[1] * consts[40 + 1]; + t1[1] = (FIXED_A) t2[0] * consts[40 + 2]; + t1[1] += (FIXED_A) t2[1] * consts[40 + 3]; + t1[2] = (FIXED_A) t2[0] * consts[40 + 4]; + t1[2] += (FIXED_A) t2[1] * consts[40 + 5]; + t1[3] = (FIXED_A) t2[0] * consts[40 + 6]; + t1[3] += (FIXED_A) t2[1] * consts[40 + 7]; + + t1[0] += (FIXED_A) t2[2] * consts[40 + 8]; + t1[0] += (FIXED_A) t2[3] * consts[40 + 9]; + t1[1] += (FIXED_A) t2[2] * consts[40 + 10]; + t1[1] += (FIXED_A) t2[3] * consts[40 + 11]; + t1[2] += (FIXED_A) t2[2] * consts[40 + 12]; + t1[2] += (FIXED_A) t2[3] * consts[40 + 13]; + t1[3] += (FIXED_A) t2[2] * consts[40 + 14]; + t1[3] += (FIXED_A) t2[3] * consts[40 + 15]; + + out[0] = t1[0] >> + (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); + out[1] = t1[1] >> + (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); + out[2] = t1[2] >> + (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); + out[3] = t1[3] >> + (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); +} + +static inline void sbc_analyze_eight_simd(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + FIXED_A t1[8]; + FIXED_T t2[8]; + int i, hop; + + /* rounding coefficient */ + t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = + (FIXED_A) 1 << (SBC_PROTO_FIXED8_SCALE-1); + + /* low pass polyphase filter */ + for (hop = 0; hop < 80; hop += 16) { + t1[0] += (FIXED_A) in[hop] * consts[hop]; + t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; + t1[1] += (FIXED_A) in[hop + 2] * consts[hop + 2]; + t1[1] += (FIXED_A) in[hop + 3] * consts[hop + 3]; + t1[2] += (FIXED_A) in[hop + 4] * consts[hop + 4]; + t1[2] += (FIXED_A) in[hop + 5] * consts[hop + 5]; + t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6]; + t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7]; + t1[4] += (FIXED_A) in[hop + 8] * consts[hop + 8]; + t1[4] += (FIXED_A) in[hop + 9] * consts[hop + 9]; + t1[5] += (FIXED_A) in[hop + 10] * consts[hop + 10]; + t1[5] += (FIXED_A) in[hop + 11] * consts[hop + 11]; + t1[6] += (FIXED_A) in[hop + 12] * consts[hop + 12]; + t1[6] += (FIXED_A) in[hop + 13] * consts[hop + 13]; + t1[7] += (FIXED_A) in[hop + 14] * consts[hop + 14]; + t1[7] += (FIXED_A) in[hop + 15] * consts[hop + 15]; + } + + /* scaling */ + t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE; + t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE; + t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE; + t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE; + t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE; + t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE; + t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE; + t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE; + + + /* do the cos transform */ + t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = 0; + + for (i = 0; i < 4; i++) { + t1[0] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 0]; + t1[0] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 1]; + t1[1] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 2]; + t1[1] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 3]; + t1[2] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 4]; + t1[2] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 5]; + t1[3] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 6]; + t1[3] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 7]; + t1[4] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 8]; + t1[4] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 9]; + t1[5] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 10]; + t1[5] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 11]; + t1[6] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 12]; + t1[6] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 13]; + t1[7] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 14]; + t1[7] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 15]; + } + + for (i = 0; i < 8; i++) + out[i] = t1[i] >> + (SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS); +} + +static inline void sbc_analyze_4b_4s_simd(int16_t *x, + int32_t *out, int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_four_simd(x + 12, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_simd(x + 8, out, analysis_consts_fixed4_simd_even); + out += out_stride; + sbc_analyze_four_simd(x + 4, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_simd(x + 0, out, analysis_consts_fixed4_simd_even); +} + +static inline void sbc_analyze_4b_8s_simd(int16_t *x, + int32_t *out, int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_eight_simd(x + 24, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_simd(x + 16, out, analysis_consts_fixed8_simd_even); + out += out_stride; + sbc_analyze_eight_simd(x + 8, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_simd(x + 0, out, analysis_consts_fixed8_simd_even); +} + +static inline int16_t unaligned16_be(const uint8_t *ptr) +{ + return (int16_t) ((ptr[0] << 8) | ptr[1]); +} + +static inline int16_t unaligned16_le(const uint8_t *ptr) +{ + return (int16_t) (ptr[0] | (ptr[1] << 8)); +} + +/* + * Internal helper functions for input data processing. In order to get + * optimal performance, it is important to have "nsamples", "nchannels" + * and "big_endian" arguments used with this inline function as compile + * time constants. + */ + +static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s4_internal( + int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels, int big_endian) +{ + /* handle X buffer wraparound */ + if (position < nsamples) { + if (nchannels > 0) + memcpy(&X[0][SBC_X_BUFFER_SIZE - 36], &X[0][position], + 36 * sizeof(int16_t)); + if (nchannels > 1) + memcpy(&X[1][SBC_X_BUFFER_SIZE - 36], &X[1][position], + 36 * sizeof(int16_t)); + position = SBC_X_BUFFER_SIZE - 36; + } + + #define PCM(i) (big_endian ? \ + unaligned16_be(pcm + (i) * 2) : unaligned16_le(pcm + (i) * 2)) + + /* copy/permutate audio samples */ + while ((nsamples -= 8) >= 0) { + position -= 8; + if (nchannels > 0) { + int16_t *x = &X[0][position]; + x[0] = PCM(0 + 7 * nchannels); + x[1] = PCM(0 + 3 * nchannels); + x[2] = PCM(0 + 6 * nchannels); + x[3] = PCM(0 + 4 * nchannels); + x[4] = PCM(0 + 0 * nchannels); + x[5] = PCM(0 + 2 * nchannels); + x[6] = PCM(0 + 1 * nchannels); + x[7] = PCM(0 + 5 * nchannels); + } + if (nchannels > 1) { + int16_t *x = &X[1][position]; + x[0] = PCM(1 + 7 * nchannels); + x[1] = PCM(1 + 3 * nchannels); + x[2] = PCM(1 + 6 * nchannels); + x[3] = PCM(1 + 4 * nchannels); + x[4] = PCM(1 + 0 * nchannels); + x[5] = PCM(1 + 2 * nchannels); + x[6] = PCM(1 + 1 * nchannels); + x[7] = PCM(1 + 5 * nchannels); + } + pcm += 16 * nchannels; + } + #undef PCM + + return position; +} + +static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s8_internal( + int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels, int big_endian) +{ + /* handle X buffer wraparound */ + if (position < nsamples) { + if (nchannels > 0) + memcpy(&X[0][SBC_X_BUFFER_SIZE - 72], &X[0][position], + 72 * sizeof(int16_t)); + if (nchannels > 1) + memcpy(&X[1][SBC_X_BUFFER_SIZE - 72], &X[1][position], + 72 * sizeof(int16_t)); + position = SBC_X_BUFFER_SIZE - 72; + } + + #define PCM(i) (big_endian ? \ + unaligned16_be(pcm + (i) * 2) : unaligned16_le(pcm + (i) * 2)) + + /* copy/permutate audio samples */ + while ((nsamples -= 16) >= 0) { + position -= 16; + if (nchannels > 0) { + int16_t *x = &X[0][position]; + x[0] = PCM(0 + 15 * nchannels); + x[1] = PCM(0 + 7 * nchannels); + x[2] = PCM(0 + 14 * nchannels); + x[3] = PCM(0 + 8 * nchannels); + x[4] = PCM(0 + 13 * nchannels); + x[5] = PCM(0 + 9 * nchannels); + x[6] = PCM(0 + 12 * nchannels); + x[7] = PCM(0 + 10 * nchannels); + x[8] = PCM(0 + 11 * nchannels); + x[9] = PCM(0 + 3 * nchannels); + x[10] = PCM(0 + 6 * nchannels); + x[11] = PCM(0 + 0 * nchannels); + x[12] = PCM(0 + 5 * nchannels); + x[13] = PCM(0 + 1 * nchannels); + x[14] = PCM(0 + 4 * nchannels); + x[15] = PCM(0 + 2 * nchannels); + } + if (nchannels > 1) { + int16_t *x = &X[1][position]; + x[0] = PCM(1 + 15 * nchannels); + x[1] = PCM(1 + 7 * nchannels); + x[2] = PCM(1 + 14 * nchannels); + x[3] = PCM(1 + 8 * nchannels); + x[4] = PCM(1 + 13 * nchannels); + x[5] = PCM(1 + 9 * nchannels); + x[6] = PCM(1 + 12 * nchannels); + x[7] = PCM(1 + 10 * nchannels); + x[8] = PCM(1 + 11 * nchannels); + x[9] = PCM(1 + 3 * nchannels); + x[10] = PCM(1 + 6 * nchannels); + x[11] = PCM(1 + 0 * nchannels); + x[12] = PCM(1 + 5 * nchannels); + x[13] = PCM(1 + 1 * nchannels); + x[14] = PCM(1 + 4 * nchannels); + x[15] = PCM(1 + 2 * nchannels); + } + pcm += 32 * nchannels; + } + #undef PCM + + return position; +} + +/* + * Input data processing functions. The data is endian converted if needed, + * channels are deintrleaved and audio samples are reordered for use in + * SIMD-friendly analysis filter function. The results are put into "X" + * array, getting appended to the previous data (or it is better to say + * prepended, as the buffer is filled from top to bottom). Old data is + * discarded when neededed, but availability of (10 * nrof_subbands) + * contiguous samples is always guaranteed for the input to the analysis + * filter. This is achieved by copying a sufficient part of old data + * to the top of the buffer on buffer wraparound. + */ + +static int sbc_enc_process_input_4s_le(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + if (nchannels > 1) + return sbc_encoder_process_input_s4_internal( + position, pcm, X, nsamples, 2, 0); + else + return sbc_encoder_process_input_s4_internal( + position, pcm, X, nsamples, 1, 0); +} + +static int sbc_enc_process_input_4s_be(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + if (nchannels > 1) + return sbc_encoder_process_input_s4_internal( + position, pcm, X, nsamples, 2, 1); + else + return sbc_encoder_process_input_s4_internal( + position, pcm, X, nsamples, 1, 1); +} + +static int sbc_enc_process_input_8s_le(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + if (nchannels > 1) + return sbc_encoder_process_input_s8_internal( + position, pcm, X, nsamples, 2, 0); + else + return sbc_encoder_process_input_s8_internal( + position, pcm, X, nsamples, 1, 0); +} + +static int sbc_enc_process_input_8s_be(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels) +{ + if (nchannels > 1) + return sbc_encoder_process_input_s8_internal( + position, pcm, X, nsamples, 2, 1); + else + return sbc_encoder_process_input_s8_internal( + position, pcm, X, nsamples, 1, 1); +} + +/* Supplementary function to count the number of leading zeros */ + +static inline int sbc_clz(uint32_t x) +{ +#ifdef __GNUC__ + return __builtin_clz(x); +#else + /* TODO: this should be replaced with something better if good + * performance is wanted when using compilers other than gcc */ + int cnt = 0; + while (x) { + cnt++; + x >>= 1; + } + return 32 - cnt; +#endif +} + +static void sbc_calc_scalefactors( + int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int channels, int subbands) +{ + int ch, sb, blk; + for (ch = 0; ch < channels; ch++) { + for (sb = 0; sb < subbands; sb++) { + uint32_t x = 1 << SCALE_OUT_BITS; + for (blk = 0; blk < blocks; blk++) { + int32_t tmp = fabs(sb_sample_f[blk][ch][sb]); + if (tmp != 0) + x |= tmp - 1; + } + scale_factor[ch][sb] = (31 - SCALE_OUT_BITS) - + sbc_clz(x); + } + } +} + +/* + * Detect CPU features and setup function pointers + */ +void sbc_init_primitives(struct sbc_encoder_state *state) +{ + /* Default implementation for analyze functions */ + state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_simd; + state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_simd; + + /* Default implementation for input reordering / deinterleaving */ + state->sbc_enc_process_input_4s_le = sbc_enc_process_input_4s_le; + state->sbc_enc_process_input_4s_be = sbc_enc_process_input_4s_be; + state->sbc_enc_process_input_8s_le = sbc_enc_process_input_8s_le; + state->sbc_enc_process_input_8s_be = sbc_enc_process_input_8s_be; + + /* Default implementation for scale factors calculation */ + state->sbc_calc_scalefactors = sbc_calc_scalefactors; + state->implementation_info = "Generic C"; + + /* X86/AMD64 optimizations */ +#ifdef SBC_BUILD_WITH_MMX_SUPPORT + sbc_init_primitives_mmx(state); +#endif + + /* ARM optimizations */ +#ifdef SBC_BUILD_WITH_NEON_SUPPORT + sbc_init_primitives_neon(state); +#endif +} diff --git a/src/modules/bluetooth/sbc/sbc_primitives.h b/src/modules/bluetooth/sbc/sbc_primitives.h new file mode 100644 index 00000000..3d01c115 --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives.h @@ -0,0 +1,75 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_PRIMITIVES_H +#define __SBC_PRIMITIVES_H + +#define SCALE_OUT_BITS 15 +#define SBC_X_BUFFER_SIZE 328 + +#ifdef __GNUC__ +#define SBC_ALWAYS_INLINE __attribute__((always_inline)) +#else +#define SBC_ALWAYS_INLINE inline +#endif + +struct sbc_encoder_state { + int position; + int16_t SBC_ALIGNED X[2][SBC_X_BUFFER_SIZE]; + /* Polyphase analysis filter for 4 subbands configuration, + * it handles 4 blocks at once */ + void (*sbc_analyze_4b_4s)(int16_t *x, int32_t *out, int out_stride); + /* Polyphase analysis filter for 8 subbands configuration, + * it handles 4 blocks at once */ + void (*sbc_analyze_4b_8s)(int16_t *x, int32_t *out, int out_stride); + /* Process input data (deinterleave, endian conversion, reordering), + * depending on the number of subbands and input data byte order */ + int (*sbc_enc_process_input_4s_le)(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels); + int (*sbc_enc_process_input_4s_be)(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels); + int (*sbc_enc_process_input_8s_le)(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels); + int (*sbc_enc_process_input_8s_be)(int position, + const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], + int nsamples, int nchannels); + /* Scale factors calculation */ + void (*sbc_calc_scalefactors)(int32_t sb_sample_f[16][2][8], + uint32_t scale_factor[2][8], + int blocks, int channels, int subbands); + const char *implementation_info; +}; + +/* + * Initialize pointers to the functions which are the basic "building bricks" + * of SBC codec. Best implementation is selected based on target CPU + * capabilities. + */ +void sbc_init_primitives(struct sbc_encoder_state *encoder_state); + +#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_mmx.c b/src/modules/bluetooth/sbc/sbc_primitives_mmx.c new file mode 100644 index 00000000..08e9ca28 --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives_mmx.c @@ -0,0 +1,320 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include "sbc.h" +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc_primitives_mmx.h" + +/* + * MMX optimizations + */ + +#ifdef SBC_BUILD_WITH_MMX_SUPPORT + +static inline void sbc_analyze_four_mmx(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + static const SBC_ALIGNED int32_t round_c[2] = { + 1 << (SBC_PROTO_FIXED4_SCALE - 1), + 1 << (SBC_PROTO_FIXED4_SCALE - 1), + }; + asm volatile ( + "movq (%0), %%mm0\n" + "movq 8(%0), %%mm1\n" + "pmaddwd (%1), %%mm0\n" + "pmaddwd 8(%1), %%mm1\n" + "paddd (%2), %%mm0\n" + "paddd (%2), %%mm1\n" + "\n" + "movq 16(%0), %%mm2\n" + "movq 24(%0), %%mm3\n" + "pmaddwd 16(%1), %%mm2\n" + "pmaddwd 24(%1), %%mm3\n" + "paddd %%mm2, %%mm0\n" + "paddd %%mm3, %%mm1\n" + "\n" + "movq 32(%0), %%mm2\n" + "movq 40(%0), %%mm3\n" + "pmaddwd 32(%1), %%mm2\n" + "pmaddwd 40(%1), %%mm3\n" + "paddd %%mm2, %%mm0\n" + "paddd %%mm3, %%mm1\n" + "\n" + "movq 48(%0), %%mm2\n" + "movq 56(%0), %%mm3\n" + "pmaddwd 48(%1), %%mm2\n" + "pmaddwd 56(%1), %%mm3\n" + "paddd %%mm2, %%mm0\n" + "paddd %%mm3, %%mm1\n" + "\n" + "movq 64(%0), %%mm2\n" + "movq 72(%0), %%mm3\n" + "pmaddwd 64(%1), %%mm2\n" + "pmaddwd 72(%1), %%mm3\n" + "paddd %%mm2, %%mm0\n" + "paddd %%mm3, %%mm1\n" + "\n" + "psrad %4, %%mm0\n" + "psrad %4, %%mm1\n" + "packssdw %%mm0, %%mm0\n" + "packssdw %%mm1, %%mm1\n" + "\n" + "movq %%mm0, %%mm2\n" + "pmaddwd 80(%1), %%mm0\n" + "pmaddwd 88(%1), %%mm2\n" + "\n" + "movq %%mm1, %%mm3\n" + "pmaddwd 96(%1), %%mm1\n" + "pmaddwd 104(%1), %%mm3\n" + "paddd %%mm1, %%mm0\n" + "paddd %%mm3, %%mm2\n" + "\n" + "movq %%mm0, (%3)\n" + "movq %%mm2, 8(%3)\n" + : + : "r" (in), "r" (consts), "r" (&round_c), "r" (out), + "i" (SBC_PROTO_FIXED4_SCALE) + : "memory"); +} + +static inline void sbc_analyze_eight_mmx(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + static const SBC_ALIGNED int32_t round_c[2] = { + 1 << (SBC_PROTO_FIXED8_SCALE - 1), + 1 << (SBC_PROTO_FIXED8_SCALE - 1), + }; + asm volatile ( + "movq (%0), %%mm0\n" + "movq 8(%0), %%mm1\n" + "movq 16(%0), %%mm2\n" + "movq 24(%0), %%mm3\n" + "pmaddwd (%1), %%mm0\n" + "pmaddwd 8(%1), %%mm1\n" + "pmaddwd 16(%1), %%mm2\n" + "pmaddwd 24(%1), %%mm3\n" + "paddd (%2), %%mm0\n" + "paddd (%2), %%mm1\n" + "paddd (%2), %%mm2\n" + "paddd (%2), %%mm3\n" + "\n" + "movq 32(%0), %%mm4\n" + "movq 40(%0), %%mm5\n" + "movq 48(%0), %%mm6\n" + "movq 56(%0), %%mm7\n" + "pmaddwd 32(%1), %%mm4\n" + "pmaddwd 40(%1), %%mm5\n" + "pmaddwd 48(%1), %%mm6\n" + "pmaddwd 56(%1), %%mm7\n" + "paddd %%mm4, %%mm0\n" + "paddd %%mm5, %%mm1\n" + "paddd %%mm6, %%mm2\n" + "paddd %%mm7, %%mm3\n" + "\n" + "movq 64(%0), %%mm4\n" + "movq 72(%0), %%mm5\n" + "movq 80(%0), %%mm6\n" + "movq 88(%0), %%mm7\n" + "pmaddwd 64(%1), %%mm4\n" + "pmaddwd 72(%1), %%mm5\n" + "pmaddwd 80(%1), %%mm6\n" + "pmaddwd 88(%1), %%mm7\n" + "paddd %%mm4, %%mm0\n" + "paddd %%mm5, %%mm1\n" + "paddd %%mm6, %%mm2\n" + "paddd %%mm7, %%mm3\n" + "\n" + "movq 96(%0), %%mm4\n" + "movq 104(%0), %%mm5\n" + "movq 112(%0), %%mm6\n" + "movq 120(%0), %%mm7\n" + "pmaddwd 96(%1), %%mm4\n" + "pmaddwd 104(%1), %%mm5\n" + "pmaddwd 112(%1), %%mm6\n" + "pmaddwd 120(%1), %%mm7\n" + "paddd %%mm4, %%mm0\n" + "paddd %%mm5, %%mm1\n" + "paddd %%mm6, %%mm2\n" + "paddd %%mm7, %%mm3\n" + "\n" + "movq 128(%0), %%mm4\n" + "movq 136(%0), %%mm5\n" + "movq 144(%0), %%mm6\n" + "movq 152(%0), %%mm7\n" + "pmaddwd 128(%1), %%mm4\n" + "pmaddwd 136(%1), %%mm5\n" + "pmaddwd 144(%1), %%mm6\n" + "pmaddwd 152(%1), %%mm7\n" + "paddd %%mm4, %%mm0\n" + "paddd %%mm5, %%mm1\n" + "paddd %%mm6, %%mm2\n" + "paddd %%mm7, %%mm3\n" + "\n" + "psrad %4, %%mm0\n" + "psrad %4, %%mm1\n" + "psrad %4, %%mm2\n" + "psrad %4, %%mm3\n" + "\n" + "packssdw %%mm0, %%mm0\n" + "packssdw %%mm1, %%mm1\n" + "packssdw %%mm2, %%mm2\n" + "packssdw %%mm3, %%mm3\n" + "\n" + "movq %%mm0, %%mm4\n" + "movq %%mm0, %%mm5\n" + "pmaddwd 160(%1), %%mm4\n" + "pmaddwd 168(%1), %%mm5\n" + "\n" + "movq %%mm1, %%mm6\n" + "movq %%mm1, %%mm7\n" + "pmaddwd 192(%1), %%mm6\n" + "pmaddwd 200(%1), %%mm7\n" + "paddd %%mm6, %%mm4\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm2, %%mm6\n" + "movq %%mm2, %%mm7\n" + "pmaddwd 224(%1), %%mm6\n" + "pmaddwd 232(%1), %%mm7\n" + "paddd %%mm6, %%mm4\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm3, %%mm6\n" + "movq %%mm3, %%mm7\n" + "pmaddwd 256(%1), %%mm6\n" + "pmaddwd 264(%1), %%mm7\n" + "paddd %%mm6, %%mm4\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm4, (%3)\n" + "movq %%mm5, 8(%3)\n" + "\n" + "movq %%mm0, %%mm5\n" + "pmaddwd 176(%1), %%mm0\n" + "pmaddwd 184(%1), %%mm5\n" + "\n" + "movq %%mm1, %%mm7\n" + "pmaddwd 208(%1), %%mm1\n" + "pmaddwd 216(%1), %%mm7\n" + "paddd %%mm1, %%mm0\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm2, %%mm7\n" + "pmaddwd 240(%1), %%mm2\n" + "pmaddwd 248(%1), %%mm7\n" + "paddd %%mm2, %%mm0\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm3, %%mm7\n" + "pmaddwd 272(%1), %%mm3\n" + "pmaddwd 280(%1), %%mm7\n" + "paddd %%mm3, %%mm0\n" + "paddd %%mm7, %%mm5\n" + "\n" + "movq %%mm0, 16(%3)\n" + "movq %%mm5, 24(%3)\n" + : + : "r" (in), "r" (consts), "r" (&round_c), "r" (out), + "i" (SBC_PROTO_FIXED8_SCALE) + : "memory"); +} + +static inline void sbc_analyze_4b_4s_mmx(int16_t *x, int32_t *out, + int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_four_mmx(x + 12, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_mmx(x + 8, out, analysis_consts_fixed4_simd_even); + out += out_stride; + sbc_analyze_four_mmx(x + 4, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + sbc_analyze_four_mmx(x + 0, out, analysis_consts_fixed4_simd_even); + + asm volatile ("emms\n"); +} + +static inline void sbc_analyze_4b_8s_mmx(int16_t *x, int32_t *out, + int out_stride) +{ + /* Analyze blocks */ + sbc_analyze_eight_mmx(x + 24, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_mmx(x + 16, out, analysis_consts_fixed8_simd_even); + out += out_stride; + sbc_analyze_eight_mmx(x + 8, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + sbc_analyze_eight_mmx(x + 0, out, analysis_consts_fixed8_simd_even); + + asm volatile ("emms\n"); +} + +static int check_mmx_support(void) +{ +#ifdef __amd64__ + return 1; /* We assume that all 64-bit processors have MMX support */ +#else + int cpuid_feature_information; + asm volatile ( + /* According to Intel manual, CPUID instruction is supported + * if the value of ID bit (bit 21) in EFLAGS can be modified */ + "pushf\n" + "movl (%%esp), %0\n" + "xorl $0x200000, (%%esp)\n" /* try to modify ID bit */ + "popf\n" + "pushf\n" + "xorl (%%esp), %0\n" /* check if ID bit changed */ + "jz 1f\n" + "push %%eax\n" + "push %%ebx\n" + "push %%ecx\n" + "mov $1, %%eax\n" + "cpuid\n" + "pop %%ecx\n" + "pop %%ebx\n" + "pop %%eax\n" + "1:\n" + "popf\n" + : "=d" (cpuid_feature_information) + : + : "cc"); + return cpuid_feature_information & (1 << 23); +#endif +} + +void sbc_init_primitives_mmx(struct sbc_encoder_state *state) +{ + if (check_mmx_support()) { + state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_mmx; + state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_mmx; + state->implementation_info = "MMX"; + } +} + +#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_mmx.h b/src/modules/bluetooth/sbc/sbc_primitives_mmx.h new file mode 100644 index 00000000..c1e44a5d --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives_mmx.h @@ -0,0 +1,40 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_PRIMITIVES_MMX_H +#define __SBC_PRIMITIVES_MMX_H + +#include "sbc_primitives.h" + +#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__)) && \ + !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) + +#define SBC_BUILD_WITH_MMX_SUPPORT + +void sbc_init_primitives_mmx(struct sbc_encoder_state *encoder_state); + +#endif + +#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_neon.c b/src/modules/bluetooth/sbc/sbc_primitives_neon.c new file mode 100644 index 00000000..f1bc7b48 --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives_neon.c @@ -0,0 +1,246 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include "sbc.h" +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc_primitives_neon.h" + +/* + * ARM NEON optimizations + */ + +#ifdef SBC_BUILD_WITH_NEON_SUPPORT + +static inline void _sbc_analyze_four_neon(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + /* TODO: merge even and odd cases (or even merge all four calls to this + * function) in order to have only aligned reads from 'in' array + * and reduce number of load instructions */ + asm volatile ( + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmull.s16 q0, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmull.s16 q1, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + + "vmlal.s16 q0, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q1, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q0, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q1, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + + "vmlal.s16 q0, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q1, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q0, d4, d8\n" + "vmlal.s16 q1, d5, d9\n" + + "vpadd.s32 d0, d0, d1\n" + "vpadd.s32 d1, d2, d3\n" + + "vrshrn.s32 d0, q0, %3\n" + + "vld1.16 {d2, d3, d4, d5}, [%1, :128]!\n" + + "vdup.i32 d1, d0[1]\n" /* TODO: can be eliminated */ + "vdup.i32 d0, d0[0]\n" /* TODO: can be eliminated */ + + "vmull.s16 q3, d2, d0\n" + "vmull.s16 q4, d3, d0\n" + "vmlal.s16 q3, d4, d1\n" + "vmlal.s16 q4, d5, d1\n" + + "vpadd.s32 d0, d6, d7\n" /* TODO: can be eliminated */ + "vpadd.s32 d1, d8, d9\n" /* TODO: can be eliminated */ + + "vst1.32 {d0, d1}, [%2, :128]\n" + : "+r" (in), "+r" (consts) + : "r" (out), + "i" (SBC_PROTO_FIXED4_SCALE) + : "memory", + "d0", "d1", "d2", "d3", "d4", "d5", + "d6", "d7", "d8", "d9", "d10", "d11"); +} + +static inline void _sbc_analyze_eight_neon(const int16_t *in, int32_t *out, + const FIXED_T *consts) +{ + /* TODO: merge even and odd cases (or even merge all four calls to this + * function) in order to have only aligned reads from 'in' array + * and reduce number of load instructions */ + asm volatile ( + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmull.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmull.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + "vmull.s16 q8, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmull.s16 q9, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + "vmlal.s16 q8, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q9, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + "vmlal.s16 q8, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q9, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + "vmlal.s16 q8, d6, d10\n" + "vld1.16 {d4, d5}, [%0, :64]!\n" + "vmlal.s16 q9, d7, d11\n" + "vld1.16 {d8, d9}, [%1, :128]!\n" + + "vmlal.s16 q6, d4, d8\n" + "vld1.16 {d6, d7}, [%0, :64]!\n" + "vmlal.s16 q7, d5, d9\n" + "vld1.16 {d10, d11}, [%1, :128]!\n" + + "vmlal.s16 q8, d6, d10\n" + "vmlal.s16 q9, d7, d11\n" + + "vpadd.s32 d0, d12, d13\n" + "vpadd.s32 d1, d14, d15\n" + "vpadd.s32 d2, d16, d17\n" + "vpadd.s32 d3, d18, d19\n" + + "vrshr.s32 q0, q0, %3\n" + "vrshr.s32 q1, q1, %3\n" + "vmovn.s32 d0, q0\n" + "vmovn.s32 d1, q1\n" + + "vdup.i32 d3, d1[1]\n" /* TODO: can be eliminated */ + "vdup.i32 d2, d1[0]\n" /* TODO: can be eliminated */ + "vdup.i32 d1, d0[1]\n" /* TODO: can be eliminated */ + "vdup.i32 d0, d0[0]\n" /* TODO: can be eliminated */ + + "vld1.16 {d4, d5}, [%1, :128]!\n" + "vmull.s16 q6, d4, d0\n" + "vld1.16 {d6, d7}, [%1, :128]!\n" + "vmull.s16 q7, d5, d0\n" + "vmull.s16 q8, d6, d0\n" + "vmull.s16 q9, d7, d0\n" + + "vld1.16 {d4, d5}, [%1, :128]!\n" + "vmlal.s16 q6, d4, d1\n" + "vld1.16 {d6, d7}, [%1, :128]!\n" + "vmlal.s16 q7, d5, d1\n" + "vmlal.s16 q8, d6, d1\n" + "vmlal.s16 q9, d7, d1\n" + + "vld1.16 {d4, d5}, [%1, :128]!\n" + "vmlal.s16 q6, d4, d2\n" + "vld1.16 {d6, d7}, [%1, :128]!\n" + "vmlal.s16 q7, d5, d2\n" + "vmlal.s16 q8, d6, d2\n" + "vmlal.s16 q9, d7, d2\n" + + "vld1.16 {d4, d5}, [%1, :128]!\n" + "vmlal.s16 q6, d4, d3\n" + "vld1.16 {d6, d7}, [%1, :128]!\n" + "vmlal.s16 q7, d5, d3\n" + "vmlal.s16 q8, d6, d3\n" + "vmlal.s16 q9, d7, d3\n" + + "vpadd.s32 d0, d12, d13\n" /* TODO: can be eliminated */ + "vpadd.s32 d1, d14, d15\n" /* TODO: can be eliminated */ + "vpadd.s32 d2, d16, d17\n" /* TODO: can be eliminated */ + "vpadd.s32 d3, d18, d19\n" /* TODO: can be eliminated */ + + "vst1.32 {d0, d1, d2, d3}, [%2, :128]\n" + : "+r" (in), "+r" (consts) + : "r" (out), + "i" (SBC_PROTO_FIXED8_SCALE) + : "memory", + "d0", "d1", "d2", "d3", "d4", "d5", + "d6", "d7", "d8", "d9", "d10", "d11", + "d12", "d13", "d14", "d15", "d16", "d17", + "d18", "d19"); +} + +static inline void sbc_analyze_4b_4s_neon(int16_t *x, + int32_t *out, int out_stride) +{ + /* Analyze blocks */ + _sbc_analyze_four_neon(x + 12, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + _sbc_analyze_four_neon(x + 8, out, analysis_consts_fixed4_simd_even); + out += out_stride; + _sbc_analyze_four_neon(x + 4, out, analysis_consts_fixed4_simd_odd); + out += out_stride; + _sbc_analyze_four_neon(x + 0, out, analysis_consts_fixed4_simd_even); +} + +static inline void sbc_analyze_4b_8s_neon(int16_t *x, + int32_t *out, int out_stride) +{ + /* Analyze blocks */ + _sbc_analyze_eight_neon(x + 24, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + _sbc_analyze_eight_neon(x + 16, out, analysis_consts_fixed8_simd_even); + out += out_stride; + _sbc_analyze_eight_neon(x + 8, out, analysis_consts_fixed8_simd_odd); + out += out_stride; + _sbc_analyze_eight_neon(x + 0, out, analysis_consts_fixed8_simd_even); +} + +void sbc_init_primitives_neon(struct sbc_encoder_state *state) +{ + state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_neon; + state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_neon; + state->implementation_info = "NEON"; +} + +#endif diff --git a/src/modules/bluetooth/sbc/sbc_primitives_neon.h b/src/modules/bluetooth/sbc/sbc_primitives_neon.h new file mode 100644 index 00000000..30766ed8 --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_primitives_neon.h @@ -0,0 +1,40 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef __SBC_PRIMITIVES_NEON_H +#define __SBC_PRIMITIVES_NEON_H + +#include "sbc_primitives.h" + +#if defined(__GNUC__) && defined(__ARM_NEON__) && \ + !defined(SBC_HIGH_PRECISION) && (SCALE_OUT_BITS == 15) + +#define SBC_BUILD_WITH_NEON_SUPPORT + +void sbc_init_primitives_neon(struct sbc_encoder_state *encoder_state); + +#endif + +#endif diff --git a/src/modules/bluetooth/sbc/sbc_tables.h b/src/modules/bluetooth/sbc/sbc_tables.h new file mode 100644 index 00000000..0057c73f --- /dev/null +++ b/src/modules/bluetooth/sbc/sbc_tables.h @@ -0,0 +1,659 @@ +/* + * + * Bluetooth low-complexity, subband codec (SBC) library + * + * Copyright (C) 2004-2009 Marcel Holtmann + * Copyright (C) 2004-2005 Henryk Ploetz + * Copyright (C) 2005-2006 Brad Midgley + * + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +/* A2DP specification: Appendix B, page 69 */ +static const int sbc_offset4[4][4] = { + { -1, 0, 0, 0 }, + { -2, 0, 0, 1 }, + { -2, 0, 0, 1 }, + { -2, 0, 0, 1 } +}; + +/* A2DP specification: Appendix B, page 69 */ +static const int sbc_offset8[4][8] = { + { -2, 0, 0, 0, 0, 0, 0, 1 }, + { -3, 0, 0, 0, 0, 0, 1, 2 }, + { -4, 0, 0, 0, 0, 0, 1, 2 }, + { -4, 0, 0, 0, 0, 0, 1, 2 } +}; + + +#define SS4(val) ASR(val, SCALE_SPROTO4_TBL) +#define SS8(val) ASR(val, SCALE_SPROTO8_TBL) +#define SN4(val) ASR(val, SCALE_NPROTO4_TBL) +#define SN8(val) ASR(val, SCALE_NPROTO8_TBL) + +static const int32_t sbc_proto_4_40m0[] = { + SS4(0x00000000), SS4(0xffa6982f), SS4(0xfba93848), SS4(0x0456c7b8), + SS4(0x005967d1), SS4(0xfffb9ac7), SS4(0xff589157), SS4(0xf9c2a8d8), + SS4(0x027c1434), SS4(0x0019118b), SS4(0xfff3c74c), SS4(0xff137330), + SS4(0xf81b8d70), SS4(0x00ec1b8b), SS4(0xfff0b71a), SS4(0xffe99b00), + SS4(0xfef84470), SS4(0xf6fb4370), SS4(0xffcdc351), SS4(0xffe01dc7) +}; + +static const int32_t sbc_proto_4_40m1[] = { + SS4(0xffe090ce), SS4(0xff2c0475), SS4(0xf694f800), SS4(0xff2c0475), + SS4(0xffe090ce), SS4(0xffe01dc7), SS4(0xffcdc351), SS4(0xf6fb4370), + SS4(0xfef84470), SS4(0xffe99b00), SS4(0xfff0b71a), SS4(0x00ec1b8b), + SS4(0xf81b8d70), SS4(0xff137330), SS4(0xfff3c74c), SS4(0x0019118b), + SS4(0x027c1434), SS4(0xf9c2a8d8), SS4(0xff589157), SS4(0xfffb9ac7) +}; + +static const int32_t sbc_proto_8_80m0[] = { + SS8(0x00000000), SS8(0xfe8d1970), SS8(0xee979f00), SS8(0x11686100), + SS8(0x0172e690), SS8(0xfff5bd1a), SS8(0xfdf1c8d4), SS8(0xeac182c0), + SS8(0x0d9daee0), SS8(0x00e530da), SS8(0xffe9811d), SS8(0xfd52986c), + SS8(0xe7054ca0), SS8(0x0a00d410), SS8(0x006c1de4), SS8(0xffdba705), + SS8(0xfcbc98e8), SS8(0xe3889d20), SS8(0x06af2308), SS8(0x000bb7db), + SS8(0xffca00ed), SS8(0xfc3fbb68), SS8(0xe071bc00), SS8(0x03bf7948), + SS8(0xffc4e05c), SS8(0xffb54b3b), SS8(0xfbedadc0), SS8(0xdde26200), + SS8(0x0142291c), SS8(0xff960e94), SS8(0xff9f3e17), SS8(0xfbd8f358), + SS8(0xdbf79400), SS8(0xff405e01), SS8(0xff7d4914), SS8(0xff8b1a31), + SS8(0xfc1417b8), SS8(0xdac7bb40), SS8(0xfdbb828c), SS8(0xff762170) +}; + +static const int32_t sbc_proto_8_80m1[] = { + SS8(0xff7c272c), SS8(0xfcb02620), SS8(0xda612700), SS8(0xfcb02620), + SS8(0xff7c272c), SS8(0xff762170), SS8(0xfdbb828c), SS8(0xdac7bb40), + SS8(0xfc1417b8), SS8(0xff8b1a31), SS8(0xff7d4914), SS8(0xff405e01), + SS8(0xdbf79400), SS8(0xfbd8f358), SS8(0xff9f3e17), SS8(0xff960e94), + SS8(0x0142291c), SS8(0xdde26200), SS8(0xfbedadc0), SS8(0xffb54b3b), + SS8(0xffc4e05c), SS8(0x03bf7948), SS8(0xe071bc00), SS8(0xfc3fbb68), + SS8(0xffca00ed), SS8(0x000bb7db), SS8(0x06af2308), SS8(0xe3889d20), + SS8(0xfcbc98e8), SS8(0xffdba705), SS8(0x006c1de4), SS8(0x0a00d410), + SS8(0xe7054ca0), SS8(0xfd52986c), SS8(0xffe9811d), SS8(0x00e530da), + SS8(0x0d9daee0), SS8(0xeac182c0), SS8(0xfdf1c8d4), SS8(0xfff5bd1a) +}; + +static const int32_t synmatrix4[8][4] = { + { SN4(0x05a82798), SN4(0xfa57d868), SN4(0xfa57d868), SN4(0x05a82798) }, + { SN4(0x030fbc54), SN4(0xf89be510), SN4(0x07641af0), SN4(0xfcf043ac) }, + { SN4(0x00000000), SN4(0x00000000), SN4(0x00000000), SN4(0x00000000) }, + { SN4(0xfcf043ac), SN4(0x07641af0), SN4(0xf89be510), SN4(0x030fbc54) }, + { SN4(0xfa57d868), SN4(0x05a82798), SN4(0x05a82798), SN4(0xfa57d868) }, + { SN4(0xf89be510), SN4(0xfcf043ac), SN4(0x030fbc54), SN4(0x07641af0) }, + { SN4(0xf8000000), SN4(0xf8000000), SN4(0xf8000000), SN4(0xf8000000) }, + { SN4(0xf89be510), SN4(0xfcf043ac), SN4(0x030fbc54), SN4(0x07641af0) } +}; + +static const int32_t synmatrix8[16][8] = { + { SN8(0x05a82798), SN8(0xfa57d868), SN8(0xfa57d868), SN8(0x05a82798), + SN8(0x05a82798), SN8(0xfa57d868), SN8(0xfa57d868), SN8(0x05a82798) }, + { SN8(0x0471ced0), SN8(0xf8275a10), SN8(0x018f8b84), SN8(0x06a6d988), + SN8(0xf9592678), SN8(0xfe70747c), SN8(0x07d8a5f0), SN8(0xfb8e3130) }, + { SN8(0x030fbc54), SN8(0xf89be510), SN8(0x07641af0), SN8(0xfcf043ac), + SN8(0xfcf043ac), SN8(0x07641af0), SN8(0xf89be510), SN8(0x030fbc54) }, + { SN8(0x018f8b84), SN8(0xfb8e3130), SN8(0x06a6d988), SN8(0xf8275a10), + SN8(0x07d8a5f0), SN8(0xf9592678), SN8(0x0471ced0), SN8(0xfe70747c) }, + { SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), + SN8(0x00000000), SN8(0x00000000), SN8(0x00000000), SN8(0x00000000) }, + { SN8(0xfe70747c), SN8(0x0471ced0), SN8(0xf9592678), SN8(0x07d8a5f0), + SN8(0xf8275a10), SN8(0x06a6d988), SN8(0xfb8e3130), SN8(0x018f8b84) }, + { SN8(0xfcf043ac), SN8(0x07641af0), SN8(0xf89be510), SN8(0x030fbc54), + SN8(0x030fbc54), SN8(0xf89be510), SN8(0x07641af0), SN8(0xfcf043ac) }, + { SN8(0xfb8e3130), SN8(0x07d8a5f0), SN8(0xfe70747c), SN8(0xf9592678), + SN8(0x06a6d988), SN8(0x018f8b84), SN8(0xf8275a10), SN8(0x0471ced0) }, + { SN8(0xfa57d868), SN8(0x05a82798), SN8(0x05a82798), SN8(0xfa57d868), + SN8(0xfa57d868), SN8(0x05a82798), SN8(0x05a82798), SN8(0xfa57d868) }, + { SN8(0xf9592678), SN8(0x018f8b84), SN8(0x07d8a5f0), SN8(0x0471ced0), + SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) }, + { SN8(0xf89be510), SN8(0xfcf043ac), SN8(0x030fbc54), SN8(0x07641af0), + SN8(0x07641af0), SN8(0x030fbc54), SN8(0xfcf043ac), SN8(0xf89be510) }, + { SN8(0xf8275a10), SN8(0xf9592678), SN8(0xfb8e3130), SN8(0xfe70747c), + SN8(0x018f8b84), SN8(0x0471ced0), SN8(0x06a6d988), SN8(0x07d8a5f0) }, + { SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), + SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000), SN8(0xf8000000) }, + { SN8(0xf8275a10), SN8(0xf9592678), SN8(0xfb8e3130), SN8(0xfe70747c), + SN8(0x018f8b84), SN8(0x0471ced0), SN8(0x06a6d988), SN8(0x07d8a5f0) }, + { SN8(0xf89be510), SN8(0xfcf043ac), SN8(0x030fbc54), SN8(0x07641af0), + SN8(0x07641af0), SN8(0x030fbc54), SN8(0xfcf043ac), SN8(0xf89be510) }, + { SN8(0xf9592678), SN8(0x018f8b84), SN8(0x07d8a5f0), SN8(0x0471ced0), + SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) } +}; + +/* Uncomment the following line to enable high precision build of SBC encoder */ + +/* #define SBC_HIGH_PRECISION */ + +#ifdef SBC_HIGH_PRECISION +#define FIXED_A int64_t /* data type for fixed point accumulator */ +#define FIXED_T int32_t /* data type for fixed point constants */ +#define SBC_FIXED_EXTRA_BITS 16 +#else +#define FIXED_A int32_t /* data type for fixed point accumulator */ +#define FIXED_T int16_t /* data type for fixed point constants */ +#define SBC_FIXED_EXTRA_BITS 0 +#endif + +/* A2DP specification: Section 12.8 Tables + * + * Original values are premultiplied by 2 for better precision (that is the + * maximum which is possible without overflows) + * + * Note: in each block of 8 numbers sign was changed for elements 2 and 7 + * in order to compensate the same change applied to cos_table_fixed_4 + */ +#define SBC_PROTO_FIXED4_SCALE \ + ((sizeof(FIXED_T) * CHAR_BIT - 1) - SBC_FIXED_EXTRA_BITS + 1) +#define F_PROTO4(x) (FIXED_A) ((x * 2) * \ + ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) +#define F(x) F_PROTO4(x) +static const FIXED_T _sbc_proto_fixed4[40] = { + F(0.00000000E+00), F(5.36548976E-04), + -F(1.49188357E-03), F(2.73370904E-03), + F(3.83720193E-03), F(3.89205149E-03), + F(1.86581691E-03), F(3.06012286E-03), + + F(1.09137620E-02), F(2.04385087E-02), + -F(2.88757392E-02), F(3.21939290E-02), + F(2.58767811E-02), F(6.13245186E-03), + -F(2.88217274E-02), F(7.76463494E-02), + + F(1.35593274E-01), F(1.94987841E-01), + -F(2.46636662E-01), F(2.81828203E-01), + F(2.94315332E-01), F(2.81828203E-01), + F(2.46636662E-01), -F(1.94987841E-01), + + -F(1.35593274E-01), -F(7.76463494E-02), + F(2.88217274E-02), F(6.13245186E-03), + F(2.58767811E-02), F(3.21939290E-02), + F(2.88757392E-02), -F(2.04385087E-02), + + -F(1.09137620E-02), -F(3.06012286E-03), + -F(1.86581691E-03), F(3.89205149E-03), + F(3.83720193E-03), F(2.73370904E-03), + F(1.49188357E-03), -F(5.36548976E-04), +}; +#undef F + +/* + * To produce this cosine matrix in Octave: + * + * b = zeros(4, 8); + * for i = 0:3 + * for j = 0:7 b(i+1, j+1) = cos((i + 0.5) * (j - 2) * (pi/4)) + * endfor + * endfor; + * printf("%.10f, ", b'); + * + * Note: in each block of 8 numbers sign was changed for elements 2 and 7 + * + * Change of sign for element 2 allows to replace constant 1.0 (not + * representable in Q15 format) with -1.0 (fine with Q15). + * Changed sign for element 7 allows to have more similar constants + * and simplify subband filter function code. + */ +#define SBC_COS_TABLE_FIXED4_SCALE \ + ((sizeof(FIXED_T) * CHAR_BIT - 1) + SBC_FIXED_EXTRA_BITS) +#define F_COS4(x) (FIXED_A) ((x) * \ + ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) +#define F(x) F_COS4(x) +static const FIXED_T cos_table_fixed_4[32] = { + F(0.7071067812), F(0.9238795325), -F(1.0000000000), F(0.9238795325), + F(0.7071067812), F(0.3826834324), F(0.0000000000), F(0.3826834324), + + -F(0.7071067812), F(0.3826834324), -F(1.0000000000), F(0.3826834324), + -F(0.7071067812), -F(0.9238795325), -F(0.0000000000), -F(0.9238795325), + + -F(0.7071067812), -F(0.3826834324), -F(1.0000000000), -F(0.3826834324), + -F(0.7071067812), F(0.9238795325), F(0.0000000000), F(0.9238795325), + + F(0.7071067812), -F(0.9238795325), -F(1.0000000000), -F(0.9238795325), + F(0.7071067812), -F(0.3826834324), -F(0.0000000000), -F(0.3826834324), +}; +#undef F + +/* A2DP specification: Section 12.8 Tables + * + * Original values are premultiplied by 4 for better precision (that is the + * maximum which is possible without overflows) + * + * Note: in each block of 16 numbers sign was changed for elements 4, 13, 14, 15 + * in order to compensate the same change applied to cos_table_fixed_8 + */ +#define SBC_PROTO_FIXED8_SCALE \ + ((sizeof(FIXED_T) * CHAR_BIT - 1) - SBC_FIXED_EXTRA_BITS + 1) +#define F_PROTO8(x) (FIXED_A) ((x * 2) * \ + ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) +#define F(x) F_PROTO8(x) +static const FIXED_T _sbc_proto_fixed8[80] = { + F(0.00000000E+00), F(1.56575398E-04), + F(3.43256425E-04), F(5.54620202E-04), + -F(8.23919506E-04), F(1.13992507E-03), + F(1.47640169E-03), F(1.78371725E-03), + F(2.01182542E-03), F(2.10371989E-03), + F(1.99454554E-03), F(1.61656283E-03), + F(9.02154502E-04), F(1.78805361E-04), + F(1.64973098E-03), F(3.49717454E-03), + + F(5.65949473E-03), F(8.02941163E-03), + F(1.04584443E-02), F(1.27472335E-02), + -F(1.46525263E-02), F(1.59045603E-02), + F(1.62208471E-02), F(1.53184106E-02), + F(1.29371806E-02), F(8.85757540E-03), + F(2.92408442E-03), -F(4.91578024E-03), + -F(1.46404076E-02), F(2.61098752E-02), + F(3.90751381E-02), F(5.31873032E-02), + + F(6.79989431E-02), F(8.29847578E-02), + F(9.75753918E-02), F(1.11196689E-01), + -F(1.23264548E-01), F(1.33264415E-01), + F(1.40753505E-01), F(1.45389847E-01), + F(1.46955068E-01), F(1.45389847E-01), + F(1.40753505E-01), F(1.33264415E-01), + F(1.23264548E-01), -F(1.11196689E-01), + -F(9.75753918E-02), -F(8.29847578E-02), + + -F(6.79989431E-02), -F(5.31873032E-02), + -F(3.90751381E-02), -F(2.61098752E-02), + F(1.46404076E-02), -F(4.91578024E-03), + F(2.92408442E-03), F(8.85757540E-03), + F(1.29371806E-02), F(1.53184106E-02), + F(1.62208471E-02), F(1.59045603E-02), + F(1.46525263E-02), -F(1.27472335E-02), + -F(1.04584443E-02), -F(8.02941163E-03), + + -F(5.65949473E-03), -F(3.49717454E-03), + -F(1.64973098E-03), -F(1.78805361E-04), + -F(9.02154502E-04), F(1.61656283E-03), + F(1.99454554E-03), F(2.10371989E-03), + F(2.01182542E-03), F(1.78371725E-03), + F(1.47640169E-03), F(1.13992507E-03), + F(8.23919506E-04), -F(5.54620202E-04), + -F(3.43256425E-04), -F(1.56575398E-04), +}; +#undef F + +/* + * To produce this cosine matrix in Octave: + * + * b = zeros(8, 16); + * for i = 0:7 + * for j = 0:15 b(i+1, j+1) = cos((i + 0.5) * (j - 4) * (pi/8)) + * endfor endfor; + * printf("%.10f, ", b'); + * + * Note: in each block of 16 numbers sign was changed for elements 4, 13, 14, 15 + * + * Change of sign for element 4 allows to replace constant 1.0 (not + * representable in Q15 format) with -1.0 (fine with Q15). + * Changed signs for elements 13, 14, 15 allow to have more similar constants + * and simplify subband filter function code. + */ +#define SBC_COS_TABLE_FIXED8_SCALE \ + ((sizeof(FIXED_T) * CHAR_BIT - 1) + SBC_FIXED_EXTRA_BITS) +#define F_COS8(x) (FIXED_A) ((x) * \ + ((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) +#define F(x) F_COS8(x) +static const FIXED_T cos_table_fixed_8[128] = { + F(0.7071067812), F(0.8314696123), F(0.9238795325), F(0.9807852804), + -F(1.0000000000), F(0.9807852804), F(0.9238795325), F(0.8314696123), + F(0.7071067812), F(0.5555702330), F(0.3826834324), F(0.1950903220), + F(0.0000000000), F(0.1950903220), F(0.3826834324), F(0.5555702330), + + -F(0.7071067812), -F(0.1950903220), F(0.3826834324), F(0.8314696123), + -F(1.0000000000), F(0.8314696123), F(0.3826834324), -F(0.1950903220), + -F(0.7071067812), -F(0.9807852804), -F(0.9238795325), -F(0.5555702330), + -F(0.0000000000), -F(0.5555702330), -F(0.9238795325), -F(0.9807852804), + + -F(0.7071067812), -F(0.9807852804), -F(0.3826834324), F(0.5555702330), + -F(1.0000000000), F(0.5555702330), -F(0.3826834324), -F(0.9807852804), + -F(0.7071067812), F(0.1950903220), F(0.9238795325), F(0.8314696123), + F(0.0000000000), F(0.8314696123), F(0.9238795325), F(0.1950903220), + + F(0.7071067812), -F(0.5555702330), -F(0.9238795325), F(0.1950903220), + -F(1.0000000000), F(0.1950903220), -F(0.9238795325), -F(0.5555702330), + F(0.7071067812), F(0.8314696123), -F(0.3826834324), -F(0.9807852804), + -F(0.0000000000), -F(0.9807852804), -F(0.3826834324), F(0.8314696123), + + F(0.7071067812), F(0.5555702330), -F(0.9238795325), -F(0.1950903220), + -F(1.0000000000), -F(0.1950903220), -F(0.9238795325), F(0.5555702330), + F(0.7071067812), -F(0.8314696123), -F(0.3826834324), F(0.9807852804), + F(0.0000000000), F(0.9807852804), -F(0.3826834324), -F(0.8314696123), + + -F(0.7071067812), F(0.9807852804), -F(0.3826834324), -F(0.5555702330), + -F(1.0000000000), -F(0.5555702330), -F(0.3826834324), F(0.9807852804), + -F(0.7071067812), -F(0.1950903220), F(0.9238795325), -F(0.8314696123), + -F(0.0000000000), -F(0.8314696123), F(0.9238795325), -F(0.1950903220), + + -F(0.7071067812), F(0.1950903220), F(0.3826834324), -F(0.8314696123), + -F(1.0000000000), -F(0.8314696123), F(0.3826834324), F(0.1950903220), + -F(0.7071067812), F(0.9807852804), -F(0.9238795325), F(0.5555702330), + -F(0.0000000000), F(0.5555702330), -F(0.9238795325), F(0.9807852804), + + F(0.7071067812), -F(0.8314696123), F(0.9238795325), -F(0.9807852804), + -F(1.0000000000), -F(0.9807852804), F(0.9238795325), -F(0.8314696123), + F(0.7071067812), -F(0.5555702330), F(0.3826834324), -F(0.1950903220), + -F(0.0000000000), -F(0.1950903220), F(0.3826834324), -F(0.5555702330), +}; +#undef F + +/* + * Enforce 16 byte alignment for the data, which is supposed to be used + * with SIMD optimized code. + */ + +#define SBC_ALIGN_BITS 4 +#define SBC_ALIGN_MASK ((1 << (SBC_ALIGN_BITS)) - 1) + +#ifdef __GNUC__ +#define SBC_ALIGNED __attribute__((aligned(1 << (SBC_ALIGN_BITS)))) +#else +#define SBC_ALIGNED +#endif + +/* + * Constant tables for the use in SIMD optimized analysis filters + * Each table consists of two parts: + * 1. reordered "proto" table + * 2. reordered "cos" table + * + * Due to non-symmetrical reordering, separate tables for "even" + * and "odd" cases are needed + */ + +static const FIXED_T SBC_ALIGNED analysis_consts_fixed4_simd_even[40 + 16] = { +#define C0 1.0932568993 +#define C1 1.3056875580 +#define C2 1.3056875580 +#define C3 1.6772280856 + +#define F(x) F_PROTO4(x) + F(0.00000000E+00 * C0), F(3.83720193E-03 * C0), + F(5.36548976E-04 * C1), F(2.73370904E-03 * C1), + F(3.06012286E-03 * C2), F(3.89205149E-03 * C2), + F(0.00000000E+00 * C3), -F(1.49188357E-03 * C3), + F(1.09137620E-02 * C0), F(2.58767811E-02 * C0), + F(2.04385087E-02 * C1), F(3.21939290E-02 * C1), + F(7.76463494E-02 * C2), F(6.13245186E-03 * C2), + F(0.00000000E+00 * C3), -F(2.88757392E-02 * C3), + F(1.35593274E-01 * C0), F(2.94315332E-01 * C0), + F(1.94987841E-01 * C1), F(2.81828203E-01 * C1), + -F(1.94987841E-01 * C2), F(2.81828203E-01 * C2), + F(0.00000000E+00 * C3), -F(2.46636662E-01 * C3), + -F(1.35593274E-01 * C0), F(2.58767811E-02 * C0), + -F(7.76463494E-02 * C1), F(6.13245186E-03 * C1), + -F(2.04385087E-02 * C2), F(3.21939290E-02 * C2), + F(0.00000000E+00 * C3), F(2.88217274E-02 * C3), + -F(1.09137620E-02 * C0), F(3.83720193E-03 * C0), + -F(3.06012286E-03 * C1), F(3.89205149E-03 * C1), + -F(5.36548976E-04 * C2), F(2.73370904E-03 * C2), + F(0.00000000E+00 * C3), -F(1.86581691E-03 * C3), +#undef F +#define F(x) F_COS4(x) + F(0.7071067812 / C0), F(0.9238795325 / C1), + -F(0.7071067812 / C0), F(0.3826834324 / C1), + -F(0.7071067812 / C0), -F(0.3826834324 / C1), + F(0.7071067812 / C0), -F(0.9238795325 / C1), + F(0.3826834324 / C2), -F(1.0000000000 / C3), + -F(0.9238795325 / C2), -F(1.0000000000 / C3), + F(0.9238795325 / C2), -F(1.0000000000 / C3), + -F(0.3826834324 / C2), -F(1.0000000000 / C3), +#undef F + +#undef C0 +#undef C1 +#undef C2 +#undef C3 +}; + +static const FIXED_T SBC_ALIGNED analysis_consts_fixed4_simd_odd[40 + 16] = { +#define C0 1.3056875580 +#define C1 1.6772280856 +#define C2 1.0932568993 +#define C3 1.3056875580 + +#define F(x) F_PROTO4(x) + F(2.73370904E-03 * C0), F(5.36548976E-04 * C0), + -F(1.49188357E-03 * C1), F(0.00000000E+00 * C1), + F(3.83720193E-03 * C2), F(1.09137620E-02 * C2), + F(3.89205149E-03 * C3), F(3.06012286E-03 * C3), + F(3.21939290E-02 * C0), F(2.04385087E-02 * C0), + -F(2.88757392E-02 * C1), F(0.00000000E+00 * C1), + F(2.58767811E-02 * C2), F(1.35593274E-01 * C2), + F(6.13245186E-03 * C3), F(7.76463494E-02 * C3), + F(2.81828203E-01 * C0), F(1.94987841E-01 * C0), + -F(2.46636662E-01 * C1), F(0.00000000E+00 * C1), + F(2.94315332E-01 * C2), -F(1.35593274E-01 * C2), + F(2.81828203E-01 * C3), -F(1.94987841E-01 * C3), + F(6.13245186E-03 * C0), -F(7.76463494E-02 * C0), + F(2.88217274E-02 * C1), F(0.00000000E+00 * C1), + F(2.58767811E-02 * C2), -F(1.09137620E-02 * C2), + F(3.21939290E-02 * C3), -F(2.04385087E-02 * C3), + F(3.89205149E-03 * C0), -F(3.06012286E-03 * C0), + -F(1.86581691E-03 * C1), F(0.00000000E+00 * C1), + F(3.83720193E-03 * C2), F(0.00000000E+00 * C2), + F(2.73370904E-03 * C3), -F(5.36548976E-04 * C3), +#undef F +#define F(x) F_COS4(x) + F(0.9238795325 / C0), -F(1.0000000000 / C1), + F(0.3826834324 / C0), -F(1.0000000000 / C1), + -F(0.3826834324 / C0), -F(1.0000000000 / C1), + -F(0.9238795325 / C0), -F(1.0000000000 / C1), + F(0.7071067812 / C2), F(0.3826834324 / C3), + -F(0.7071067812 / C2), -F(0.9238795325 / C3), + -F(0.7071067812 / C2), F(0.9238795325 / C3), + F(0.7071067812 / C2), -F(0.3826834324 / C3), +#undef F + +#undef C0 +#undef C1 +#undef C2 +#undef C3 +}; + +static const FIXED_T SBC_ALIGNED analysis_consts_fixed8_simd_even[80 + 64] = { +#define C0 2.7906148894 +#define C1 2.4270044280 +#define C2 2.8015616024 +#define C3 3.1710363741 +#define C4 2.5377944043 +#define C5 2.4270044280 +#define C6 2.8015616024 +#define C7 3.1710363741 + +#define F(x) F_PROTO8(x) + F(0.00000000E+00 * C0), F(2.01182542E-03 * C0), + F(1.56575398E-04 * C1), F(1.78371725E-03 * C1), + F(3.43256425E-04 * C2), F(1.47640169E-03 * C2), + F(5.54620202E-04 * C3), F(1.13992507E-03 * C3), + -F(8.23919506E-04 * C4), F(0.00000000E+00 * C4), + F(2.10371989E-03 * C5), F(3.49717454E-03 * C5), + F(1.99454554E-03 * C6), F(1.64973098E-03 * C6), + F(1.61656283E-03 * C7), F(1.78805361E-04 * C7), + F(5.65949473E-03 * C0), F(1.29371806E-02 * C0), + F(8.02941163E-03 * C1), F(1.53184106E-02 * C1), + F(1.04584443E-02 * C2), F(1.62208471E-02 * C2), + F(1.27472335E-02 * C3), F(1.59045603E-02 * C3), + -F(1.46525263E-02 * C4), F(0.00000000E+00 * C4), + F(8.85757540E-03 * C5), F(5.31873032E-02 * C5), + F(2.92408442E-03 * C6), F(3.90751381E-02 * C6), + -F(4.91578024E-03 * C7), F(2.61098752E-02 * C7), + F(6.79989431E-02 * C0), F(1.46955068E-01 * C0), + F(8.29847578E-02 * C1), F(1.45389847E-01 * C1), + F(9.75753918E-02 * C2), F(1.40753505E-01 * C2), + F(1.11196689E-01 * C3), F(1.33264415E-01 * C3), + -F(1.23264548E-01 * C4), F(0.00000000E+00 * C4), + F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5), + F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6), + F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7), + -F(6.79989431E-02 * C0), F(1.29371806E-02 * C0), + -F(5.31873032E-02 * C1), F(8.85757540E-03 * C1), + -F(3.90751381E-02 * C2), F(2.92408442E-03 * C2), + -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3), + F(1.46404076E-02 * C4), F(0.00000000E+00 * C4), + F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5), + F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6), + F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7), + -F(5.65949473E-03 * C0), F(2.01182542E-03 * C0), + -F(3.49717454E-03 * C1), F(2.10371989E-03 * C1), + -F(1.64973098E-03 * C2), F(1.99454554E-03 * C2), + -F(1.78805361E-04 * C3), F(1.61656283E-03 * C3), + -F(9.02154502E-04 * C4), F(0.00000000E+00 * C4), + F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5), + F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6), + F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7), +#undef F +#define F(x) F_COS8(x) + F(0.7071067812 / C0), F(0.8314696123 / C1), + -F(0.7071067812 / C0), -F(0.1950903220 / C1), + -F(0.7071067812 / C0), -F(0.9807852804 / C1), + F(0.7071067812 / C0), -F(0.5555702330 / C1), + F(0.7071067812 / C0), F(0.5555702330 / C1), + -F(0.7071067812 / C0), F(0.9807852804 / C1), + -F(0.7071067812 / C0), F(0.1950903220 / C1), + F(0.7071067812 / C0), -F(0.8314696123 / C1), + F(0.9238795325 / C2), F(0.9807852804 / C3), + F(0.3826834324 / C2), F(0.8314696123 / C3), + -F(0.3826834324 / C2), F(0.5555702330 / C3), + -F(0.9238795325 / C2), F(0.1950903220 / C3), + -F(0.9238795325 / C2), -F(0.1950903220 / C3), + -F(0.3826834324 / C2), -F(0.5555702330 / C3), + F(0.3826834324 / C2), -F(0.8314696123 / C3), + F(0.9238795325 / C2), -F(0.9807852804 / C3), + -F(1.0000000000 / C4), F(0.5555702330 / C5), + -F(1.0000000000 / C4), -F(0.9807852804 / C5), + -F(1.0000000000 / C4), F(0.1950903220 / C5), + -F(1.0000000000 / C4), F(0.8314696123 / C5), + -F(1.0000000000 / C4), -F(0.8314696123 / C5), + -F(1.0000000000 / C4), -F(0.1950903220 / C5), + -F(1.0000000000 / C4), F(0.9807852804 / C5), + -F(1.0000000000 / C4), -F(0.5555702330 / C5), + F(0.3826834324 / C6), F(0.1950903220 / C7), + -F(0.9238795325 / C6), -F(0.5555702330 / C7), + F(0.9238795325 / C6), F(0.8314696123 / C7), + -F(0.3826834324 / C6), -F(0.9807852804 / C7), + -F(0.3826834324 / C6), F(0.9807852804 / C7), + F(0.9238795325 / C6), -F(0.8314696123 / C7), + -F(0.9238795325 / C6), F(0.5555702330 / C7), + F(0.3826834324 / C6), -F(0.1950903220 / C7), +#undef F + +#undef C0 +#undef C1 +#undef C2 +#undef C3 +#undef C4 +#undef C5 +#undef C6 +#undef C7 +}; + +static const FIXED_T SBC_ALIGNED analysis_consts_fixed8_simd_odd[80 + 64] = { +#define C0 2.5377944043 +#define C1 2.4270044280 +#define C2 2.8015616024 +#define C3 3.1710363741 +#define C4 2.7906148894 +#define C5 2.4270044280 +#define C6 2.8015616024 +#define C7 3.1710363741 + +#define F(x) F_PROTO8(x) + F(0.00000000E+00 * C0), -F(8.23919506E-04 * C0), + F(1.56575398E-04 * C1), F(1.78371725E-03 * C1), + F(3.43256425E-04 * C2), F(1.47640169E-03 * C2), + F(5.54620202E-04 * C3), F(1.13992507E-03 * C3), + F(2.01182542E-03 * C4), F(5.65949473E-03 * C4), + F(2.10371989E-03 * C5), F(3.49717454E-03 * C5), + F(1.99454554E-03 * C6), F(1.64973098E-03 * C6), + F(1.61656283E-03 * C7), F(1.78805361E-04 * C7), + F(0.00000000E+00 * C0), -F(1.46525263E-02 * C0), + F(8.02941163E-03 * C1), F(1.53184106E-02 * C1), + F(1.04584443E-02 * C2), F(1.62208471E-02 * C2), + F(1.27472335E-02 * C3), F(1.59045603E-02 * C3), + F(1.29371806E-02 * C4), F(6.79989431E-02 * C4), + F(8.85757540E-03 * C5), F(5.31873032E-02 * C5), + F(2.92408442E-03 * C6), F(3.90751381E-02 * C6), + -F(4.91578024E-03 * C7), F(2.61098752E-02 * C7), + F(0.00000000E+00 * C0), -F(1.23264548E-01 * C0), + F(8.29847578E-02 * C1), F(1.45389847E-01 * C1), + F(9.75753918E-02 * C2), F(1.40753505E-01 * C2), + F(1.11196689E-01 * C3), F(1.33264415E-01 * C3), + F(1.46955068E-01 * C4), -F(6.79989431E-02 * C4), + F(1.45389847E-01 * C5), -F(8.29847578E-02 * C5), + F(1.40753505E-01 * C6), -F(9.75753918E-02 * C6), + F(1.33264415E-01 * C7), -F(1.11196689E-01 * C7), + F(0.00000000E+00 * C0), F(1.46404076E-02 * C0), + -F(5.31873032E-02 * C1), F(8.85757540E-03 * C1), + -F(3.90751381E-02 * C2), F(2.92408442E-03 * C2), + -F(2.61098752E-02 * C3), -F(4.91578024E-03 * C3), + F(1.29371806E-02 * C4), -F(5.65949473E-03 * C4), + F(1.53184106E-02 * C5), -F(8.02941163E-03 * C5), + F(1.62208471E-02 * C6), -F(1.04584443E-02 * C6), + F(1.59045603E-02 * C7), -F(1.27472335E-02 * C7), + F(0.00000000E+00 * C0), -F(9.02154502E-04 * C0), + -F(3.49717454E-03 * C1), F(2.10371989E-03 * C1), + -F(1.64973098E-03 * C2), F(1.99454554E-03 * C2), + -F(1.78805361E-04 * C3), F(1.61656283E-03 * C3), + F(2.01182542E-03 * C4), F(0.00000000E+00 * C4), + F(1.78371725E-03 * C5), -F(1.56575398E-04 * C5), + F(1.47640169E-03 * C6), -F(3.43256425E-04 * C6), + F(1.13992507E-03 * C7), -F(5.54620202E-04 * C7), +#undef F +#define F(x) F_COS8(x) + -F(1.0000000000 / C0), F(0.8314696123 / C1), + -F(1.0000000000 / C0), -F(0.1950903220 / C1), + -F(1.0000000000 / C0), -F(0.9807852804 / C1), + -F(1.0000000000 / C0), -F(0.5555702330 / C1), + -F(1.0000000000 / C0), F(0.5555702330 / C1), + -F(1.0000000000 / C0), F(0.9807852804 / C1), + -F(1.0000000000 / C0), F(0.1950903220 / C1), + -F(1.0000000000 / C0), -F(0.8314696123 / C1), + F(0.9238795325 / C2), F(0.9807852804 / C3), + F(0.3826834324 / C2), F(0.8314696123 / C3), + -F(0.3826834324 / C2), F(0.5555702330 / C3), + -F(0.9238795325 / C2), F(0.1950903220 / C3), + -F(0.9238795325 / C2), -F(0.1950903220 / C3), + -F(0.3826834324 / C2), -F(0.5555702330 / C3), + F(0.3826834324 / C2), -F(0.8314696123 / C3), + F(0.9238795325 / C2), -F(0.9807852804 / C3), + F(0.7071067812 / C4), F(0.5555702330 / C5), + -F(0.7071067812 / C4), -F(0.9807852804 / C5), + -F(0.7071067812 / C4), F(0.1950903220 / C5), + F(0.7071067812 / C4), F(0.8314696123 / C5), + F(0.7071067812 / C4), -F(0.8314696123 / C5), + -F(0.7071067812 / C4), -F(0.1950903220 / C5), + -F(0.7071067812 / C4), F(0.9807852804 / C5), + F(0.7071067812 / C4), -F(0.5555702330 / C5), + F(0.3826834324 / C6), F(0.1950903220 / C7), + -F(0.9238795325 / C6), -F(0.5555702330 / C7), + F(0.9238795325 / C6), F(0.8314696123 / C7), + -F(0.3826834324 / C6), -F(0.9807852804 / C7), + -F(0.3826834324 / C6), F(0.9807852804 / C7), + F(0.9238795325 / C6), -F(0.8314696123 / C7), + -F(0.9238795325 / C6), F(0.5555702330 / C7), + F(0.3826834324 / C6), -F(0.1950903220 / C7), +#undef F + +#undef C0 +#undef C1 +#undef C2 +#undef C3 +#undef C4 +#undef C5 +#undef C6 +#undef C7 +}; -- cgit