summaryrefslogtreecommitdiffstats
path: root/sbc/sbc_primitives.c
diff options
context:
space:
mode:
Diffstat (limited to 'sbc/sbc_primitives.c')
-rw-r--r--sbc/sbc_primitives.c260
1 files changed, 204 insertions, 56 deletions
diff --git a/sbc/sbc_primitives.c b/sbc/sbc_primitives.c
index 602b473d..338feb96 100644
--- a/sbc/sbc_primitives.c
+++ b/sbc/sbc_primitives.c
@@ -25,6 +25,7 @@
#include <stdint.h>
#include <limits.h>
+#include <string.h>
#include "sbc.h"
#include "sbc_math.h"
#include "sbc_tables.h"
@@ -179,28 +180,9 @@ static inline void sbc_analyze_eight_simd(const int16_t *in, int32_t *out,
(SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS);
}
-static inline void sbc_analyze_4b_4s_simd(int16_t *pcm, int16_t *x,
+static inline void sbc_analyze_4b_4s_simd(int16_t *x,
int32_t *out, int out_stride)
{
- /* Fetch audio samples and do input data reordering for SIMD */
- x[64] = x[0] = pcm[8 + 7];
- x[65] = x[1] = pcm[8 + 3];
- x[66] = x[2] = pcm[8 + 6];
- x[67] = x[3] = pcm[8 + 4];
- x[68] = x[4] = pcm[8 + 0];
- x[69] = x[5] = pcm[8 + 2];
- x[70] = x[6] = pcm[8 + 1];
- x[71] = x[7] = pcm[8 + 5];
-
- x[72] = x[8] = pcm[0 + 7];
- x[73] = x[9] = pcm[0 + 3];
- x[74] = x[10] = pcm[0 + 6];
- x[75] = x[11] = pcm[0 + 4];
- x[76] = x[12] = pcm[0 + 0];
- x[77] = x[13] = pcm[0 + 2];
- x[78] = x[14] = pcm[0 + 1];
- x[79] = x[15] = pcm[0 + 5];
-
/* Analyze blocks */
sbc_analyze_four_simd(x + 12, out, analysis_consts_fixed4_simd_odd);
out += out_stride;
@@ -211,44 +193,9 @@ static inline void sbc_analyze_4b_4s_simd(int16_t *pcm, int16_t *x,
sbc_analyze_four_simd(x + 0, out, analysis_consts_fixed4_simd_even);
}
-static inline void sbc_analyze_4b_8s_simd(int16_t *pcm, int16_t *x,
+static inline void sbc_analyze_4b_8s_simd(int16_t *x,
int32_t *out, int out_stride)
{
- /* Fetch audio samples and do input data reordering for SIMD */
- x[128] = x[0] = pcm[16 + 15];
- x[129] = x[1] = pcm[16 + 7];
- x[130] = x[2] = pcm[16 + 14];
- x[131] = x[3] = pcm[16 + 8];
- x[132] = x[4] = pcm[16 + 13];
- x[133] = x[5] = pcm[16 + 9];
- x[134] = x[6] = pcm[16 + 12];
- x[135] = x[7] = pcm[16 + 10];
- x[136] = x[8] = pcm[16 + 11];
- x[137] = x[9] = pcm[16 + 3];
- x[138] = x[10] = pcm[16 + 6];
- x[139] = x[11] = pcm[16 + 0];
- x[140] = x[12] = pcm[16 + 5];
- x[141] = x[13] = pcm[16 + 1];
- x[142] = x[14] = pcm[16 + 4];
- x[143] = x[15] = pcm[16 + 2];
-
- x[144] = x[16] = pcm[0 + 15];
- x[145] = x[17] = pcm[0 + 7];
- x[146] = x[18] = pcm[0 + 14];
- x[147] = x[19] = pcm[0 + 8];
- x[148] = x[20] = pcm[0 + 13];
- x[149] = x[21] = pcm[0 + 9];
- x[150] = x[22] = pcm[0 + 12];
- x[151] = x[23] = pcm[0 + 10];
- x[152] = x[24] = pcm[0 + 11];
- x[153] = x[25] = pcm[0 + 3];
- x[154] = x[26] = pcm[0 + 6];
- x[155] = x[27] = pcm[0 + 0];
- x[156] = x[28] = pcm[0 + 5];
- x[157] = x[29] = pcm[0 + 1];
- x[158] = x[30] = pcm[0 + 4];
- x[159] = x[31] = pcm[0 + 2];
-
/* Analyze blocks */
sbc_analyze_eight_simd(x + 24, out, analysis_consts_fixed8_simd_odd);
out += out_stride;
@@ -259,6 +206,201 @@ static inline void sbc_analyze_4b_8s_simd(int16_t *pcm, int16_t *x,
sbc_analyze_eight_simd(x + 0, out, analysis_consts_fixed8_simd_even);
}
+static inline int16_t unaligned16_be(const uint8_t *ptr)
+{
+ return (int16_t) ((ptr[0] << 8) | ptr[1]);
+}
+
+static inline int16_t unaligned16_le(const uint8_t *ptr)
+{
+ return (int16_t) (ptr[0] | (ptr[1] << 8));
+}
+
+/*
+ * Internal helper functions for input data processing. In order to get
+ * optimal performance, it is important to have "nsamples", "nchannels"
+ * and "big_endian" arguments used with this inline function as compile
+ * time constants.
+ */
+
+static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s4_internal(
+ int position,
+ const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
+ int nsamples, int nchannels, int big_endian)
+{
+ /* handle X buffer wraparound */
+ if (position < nsamples) {
+ if (nchannels > 0)
+ memcpy(&X[0][SBC_X_BUFFER_SIZE - 36], &X[0][position],
+ 36 * sizeof(int16_t));
+ if (nchannels > 1)
+ memcpy(&X[1][SBC_X_BUFFER_SIZE - 36], &X[1][position],
+ 36 * sizeof(int16_t));
+ position = SBC_X_BUFFER_SIZE - 36;
+ }
+
+ #define PCM(i) (big_endian ? \
+ unaligned16_be(pcm + (i) * 2) : unaligned16_le(pcm + (i) * 2))
+
+ /* copy/permutate audio samples */
+ while ((nsamples -= 8) >= 0) {
+ position -= 8;
+ if (nchannels > 0) {
+ int16_t *x = &X[0][position];
+ x[0] = PCM(0 + 7 * nchannels);
+ x[1] = PCM(0 + 3 * nchannels);
+ x[2] = PCM(0 + 6 * nchannels);
+ x[3] = PCM(0 + 4 * nchannels);
+ x[4] = PCM(0 + 0 * nchannels);
+ x[5] = PCM(0 + 2 * nchannels);
+ x[6] = PCM(0 + 1 * nchannels);
+ x[7] = PCM(0 + 5 * nchannels);
+ }
+ if (nchannels > 1) {
+ int16_t *x = &X[1][position];
+ x[0] = PCM(1 + 7 * nchannels);
+ x[1] = PCM(1 + 3 * nchannels);
+ x[2] = PCM(1 + 6 * nchannels);
+ x[3] = PCM(1 + 4 * nchannels);
+ x[4] = PCM(1 + 0 * nchannels);
+ x[5] = PCM(1 + 2 * nchannels);
+ x[6] = PCM(1 + 1 * nchannels);
+ x[7] = PCM(1 + 5 * nchannels);
+ }
+ pcm += 16 * nchannels;
+ }
+ #undef PCM
+
+ return position;
+}
+
+static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s8_internal(
+ int position,
+ const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
+ int nsamples, int nchannels, int big_endian)
+{
+ /* handle X buffer wraparound */
+ if (position < nsamples) {
+ if (nchannels > 0)
+ memcpy(&X[0][SBC_X_BUFFER_SIZE - 72], &X[0][position],
+ 72 * sizeof(int16_t));
+ if (nchannels > 1)
+ memcpy(&X[1][SBC_X_BUFFER_SIZE - 72], &X[1][position],
+ 72 * sizeof(int16_t));
+ position = SBC_X_BUFFER_SIZE - 72;
+ }
+
+ #define PCM(i) (big_endian ? \
+ unaligned16_be(pcm + (i) * 2) : unaligned16_le(pcm + (i) * 2))
+
+ /* copy/permutate audio samples */
+ while ((nsamples -= 16) >= 0) {
+ position -= 16;
+ if (nchannels > 0) {
+ int16_t *x = &X[0][position];
+ x[0] = PCM(0 + 15 * nchannels);
+ x[1] = PCM(0 + 7 * nchannels);
+ x[2] = PCM(0 + 14 * nchannels);
+ x[3] = PCM(0 + 8 * nchannels);
+ x[4] = PCM(0 + 13 * nchannels);
+ x[5] = PCM(0 + 9 * nchannels);
+ x[6] = PCM(0 + 12 * nchannels);
+ x[7] = PCM(0 + 10 * nchannels);
+ x[8] = PCM(0 + 11 * nchannels);
+ x[9] = PCM(0 + 3 * nchannels);
+ x[10] = PCM(0 + 6 * nchannels);
+ x[11] = PCM(0 + 0 * nchannels);
+ x[12] = PCM(0 + 5 * nchannels);
+ x[13] = PCM(0 + 1 * nchannels);
+ x[14] = PCM(0 + 4 * nchannels);
+ x[15] = PCM(0 + 2 * nchannels);
+ }
+ if (nchannels > 1) {
+ int16_t *x = &X[1][position];
+ x[0] = PCM(1 + 15 * nchannels);
+ x[1] = PCM(1 + 7 * nchannels);
+ x[2] = PCM(1 + 14 * nchannels);
+ x[3] = PCM(1 + 8 * nchannels);
+ x[4] = PCM(1 + 13 * nchannels);
+ x[5] = PCM(1 + 9 * nchannels);
+ x[6] = PCM(1 + 12 * nchannels);
+ x[7] = PCM(1 + 10 * nchannels);
+ x[8] = PCM(1 + 11 * nchannels);
+ x[9] = PCM(1 + 3 * nchannels);
+ x[10] = PCM(1 + 6 * nchannels);
+ x[11] = PCM(1 + 0 * nchannels);
+ x[12] = PCM(1 + 5 * nchannels);
+ x[13] = PCM(1 + 1 * nchannels);
+ x[14] = PCM(1 + 4 * nchannels);
+ x[15] = PCM(1 + 2 * nchannels);
+ }
+ pcm += 32 * nchannels;
+ }
+ #undef PCM
+
+ return position;
+}
+
+/*
+ * Input data processing functions. The data is endian converted if needed,
+ * channels are deintrleaved and audio samples are reordered for use in
+ * SIMD-friendly analysis filter function. The results are put into "X"
+ * array, getting appended to the previous data (or it is better to say
+ * prepended, as the buffer is filled from top to bottom). Old data is
+ * discarded when neededed, but availability of (10 * nrof_subbands)
+ * contiguous samples is always guaranteed for the input to the analysis
+ * filter. This is achieved by copying a sufficient part of old data
+ * to the top of the buffer on buffer wraparound.
+ */
+
+static int sbc_enc_process_input_4s_le(int position,
+ const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
+ int nsamples, int nchannels)
+{
+ if (nchannels > 1)
+ return sbc_encoder_process_input_s4_internal(
+ position, pcm, X, nsamples, 2, 0);
+ else
+ return sbc_encoder_process_input_s4_internal(
+ position, pcm, X, nsamples, 1, 0);
+}
+
+static int sbc_enc_process_input_4s_be(int position,
+ const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
+ int nsamples, int nchannels)
+{
+ if (nchannels > 1)
+ return sbc_encoder_process_input_s4_internal(
+ position, pcm, X, nsamples, 2, 1);
+ else
+ return sbc_encoder_process_input_s4_internal(
+ position, pcm, X, nsamples, 1, 1);
+}
+
+static int sbc_enc_process_input_8s_le(int position,
+ const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
+ int nsamples, int nchannels)
+{
+ if (nchannels > 1)
+ return sbc_encoder_process_input_s8_internal(
+ position, pcm, X, nsamples, 2, 0);
+ else
+ return sbc_encoder_process_input_s8_internal(
+ position, pcm, X, nsamples, 1, 0);
+}
+
+static int sbc_enc_process_input_8s_be(int position,
+ const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE],
+ int nsamples, int nchannels)
+{
+ if (nchannels > 1)
+ return sbc_encoder_process_input_s8_internal(
+ position, pcm, X, nsamples, 2, 1);
+ else
+ return sbc_encoder_process_input_s8_internal(
+ position, pcm, X, nsamples, 1, 1);
+}
+
/*
* Detect CPU features and setup function pointers
*/
@@ -268,6 +410,12 @@ void sbc_init_primitives(struct sbc_encoder_state *state)
state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_simd;
state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_simd;
+ /* Default implementation for input reordering / deinterleaving */
+ state->sbc_enc_process_input_4s_le = sbc_enc_process_input_4s_le;
+ state->sbc_enc_process_input_4s_be = sbc_enc_process_input_4s_be;
+ state->sbc_enc_process_input_8s_le = sbc_enc_process_input_8s_le;
+ state->sbc_enc_process_input_8s_be = sbc_enc_process_input_8s_be;
+
/* X86/AMD64 optimizations */
#ifdef SBC_BUILD_WITH_MMX_SUPPORT
sbc_init_primitives_mmx(state);