summaryrefslogtreecommitdiffstats
path: root/sbc
diff options
context:
space:
mode:
authorSiarhei Siamashka <siarhei.siamashka@nokia.com>2009-01-15 19:11:23 +0200
committerMarcel Holtmann <marcel@holtmann.org>2009-01-16 00:28:32 +0100
commit9e31e7dde636ca28ee551e8bcf8e4f4ca0ef553d (patch)
treeefc05cd31c23eb871dcff2ed3776473e38a3d003 /sbc
parent5331a26b8a3c2d69d30a4334e023238db197080b (diff)
SIMD-friendly variant of SBC encoder analysis filter
Added SIMD-friendly C implementation of SBC analysis filter (the structure of code had to be changed a bit and constants in the tables reordered). This code can be used as a reference for developing platform specific SIMD optimizations. These functions are put into a new file 'sbc_primitives.c', which is going to contain all the basic stuff for SBC codec.
Diffstat (limited to 'sbc')
-rw-r--r--sbc/Makefile.am3
-rw-r--r--sbc/sbc.c155
-rw-r--r--sbc/sbc_math.h2
-rw-r--r--sbc/sbc_primitives.c401
-rw-r--r--sbc/sbc_primitives.h52
-rw-r--r--sbc/sbc_tables.h250
6 files changed, 703 insertions, 160 deletions
diff --git a/sbc/Makefile.am b/sbc/Makefile.am
index c42f1622..cd068e76 100644
--- a/sbc/Makefile.am
+++ b/sbc/Makefile.am
@@ -8,7 +8,8 @@ endif
if SBC
noinst_LTLIBRARIES = libsbc.la
-libsbc_la_SOURCES = sbc.h sbc.c sbc_math.h sbc_tables.h
+libsbc_la_SOURCES = sbc.h sbc.c sbc_math.h sbc_tables.h \
+ sbc_primitives.c
libsbc_la_CFLAGS = -finline-functions -funswitch-loops -fgcse-after-reload
diff --git a/sbc/sbc.c b/sbc/sbc.c
index 651981fa..534c9359 100644
--- a/sbc/sbc.c
+++ b/sbc/sbc.c
@@ -46,6 +46,7 @@
#include "sbc_tables.h"
#include "sbc.h"
+#include "sbc_primitives.h"
#define SBC_SYNCWORD 0x9C
@@ -91,16 +92,6 @@ struct sbc_decoder_state {
int offset[2][16];
};
-struct sbc_encoder_state {
- int subbands;
- int position[2];
- int16_t X[2][256];
- void (*sbc_analyze_4b_4s)(int16_t *pcm, int16_t *x,
- int32_t *out, int out_stride);
- void (*sbc_analyze_4b_8s)(int16_t *pcm, int16_t *x,
- int32_t *out, int out_stride);
-};
-
/*
* Calculates the CRC-8 of the first len bits in data
*/
@@ -653,146 +644,6 @@ static int sbc_synthesize_audio(struct sbc_decoder_state *state,
}
}
-static inline void _sbc_analyze_four(const int16_t *in, int32_t *out)
-{
- FIXED_A t1[4];
- FIXED_T t2[4];
- int i = 0, hop = 0;
-
- /* rounding coefficient */
- t1[0] = t1[1] = t1[2] = t1[3] =
- (FIXED_A) 1 << (SBC_PROTO_FIXED4_SCALE - 1);
-
- /* low pass polyphase filter */
- for (hop = 0; hop < 40; hop += 8) {
- t1[0] += (FIXED_A) in[hop] * _sbc_proto_fixed4[hop];
- t1[1] += (FIXED_A) in[hop + 1] * _sbc_proto_fixed4[hop + 1];
- t1[2] += (FIXED_A) in[hop + 2] * _sbc_proto_fixed4[hop + 2];
- t1[1] += (FIXED_A) in[hop + 3] * _sbc_proto_fixed4[hop + 3];
- t1[0] += (FIXED_A) in[hop + 4] * _sbc_proto_fixed4[hop + 4];
- t1[3] += (FIXED_A) in[hop + 5] * _sbc_proto_fixed4[hop + 5];
- t1[3] += (FIXED_A) in[hop + 7] * _sbc_proto_fixed4[hop + 7];
- }
-
- /* scaling */
- t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE;
- t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE;
- t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE;
- t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE;
-
- /* do the cos transform */
- for (i = 0, hop = 0; i < 4; hop += 8, i++) {
- out[i] = ((FIXED_A) t2[0] * cos_table_fixed_4[0 + hop] +
- (FIXED_A) t2[1] * cos_table_fixed_4[1 + hop] +
- (FIXED_A) t2[2] * cos_table_fixed_4[2 + hop] +
- (FIXED_A) t2[3] * cos_table_fixed_4[5 + hop]) >>
- (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
- }
-}
-
-static void sbc_analyze_4b_4s(int16_t *pcm, int16_t *x,
- int32_t *out, int out_stride)
-{
- int i;
-
- /* Input 4 x 4 Audio Samples */
- for (i = 0; i < 16; i += 4) {
- x[64 + i] = x[0 + i] = pcm[15 - i];
- x[65 + i] = x[1 + i] = pcm[14 - i];
- x[66 + i] = x[2 + i] = pcm[13 - i];
- x[67 + i] = x[3 + i] = pcm[12 - i];
- }
-
- /* Analyze four blocks */
- _sbc_analyze_four(x + 12, out);
- out += out_stride;
- _sbc_analyze_four(x + 8, out);
- out += out_stride;
- _sbc_analyze_four(x + 4, out);
- out += out_stride;
- _sbc_analyze_four(x, out);
-}
-
-static inline void _sbc_analyze_eight(const int16_t *in, int32_t *out)
-{
- FIXED_A t1[8];
- FIXED_T t2[8];
- int i, hop;
-
- /* rounding coefficient */
- t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] =
- (FIXED_A) 1 << (SBC_PROTO_FIXED8_SCALE-1);
-
- /* low pass polyphase filter */
- for (hop = 0; hop < 80; hop += 16) {
- t1[0] += (FIXED_A) in[hop] * _sbc_proto_fixed8[hop];
- t1[1] += (FIXED_A) in[hop + 1] * _sbc_proto_fixed8[hop + 1];
- t1[2] += (FIXED_A) in[hop + 2] * _sbc_proto_fixed8[hop + 2];
- t1[3] += (FIXED_A) in[hop + 3] * _sbc_proto_fixed8[hop + 3];
- t1[4] += (FIXED_A) in[hop + 4] * _sbc_proto_fixed8[hop + 4];
- t1[3] += (FIXED_A) in[hop + 5] * _sbc_proto_fixed8[hop + 5];
- t1[2] += (FIXED_A) in[hop + 6] * _sbc_proto_fixed8[hop + 6];
- t1[1] += (FIXED_A) in[hop + 7] * _sbc_proto_fixed8[hop + 7];
- t1[0] += (FIXED_A) in[hop + 8] * _sbc_proto_fixed8[hop + 8];
- t1[5] += (FIXED_A) in[hop + 9] * _sbc_proto_fixed8[hop + 9];
- t1[6] += (FIXED_A) in[hop + 10] * _sbc_proto_fixed8[hop + 10];
- t1[7] += (FIXED_A) in[hop + 11] * _sbc_proto_fixed8[hop + 11];
- t1[7] += (FIXED_A) in[hop + 13] * _sbc_proto_fixed8[hop + 13];
- t1[6] += (FIXED_A) in[hop + 14] * _sbc_proto_fixed8[hop + 14];
- t1[5] += (FIXED_A) in[hop + 15] * _sbc_proto_fixed8[hop + 15];
- }
-
- /* scaling */
- t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE;
- t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE;
- t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE;
- t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE;
- t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE;
- t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE;
- t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE;
- t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE;
-
- /* do the cos transform */
- for (i = 0, hop = 0; i < 8; hop += 16, i++) {
- out[i] = ((FIXED_A) t2[0] * cos_table_fixed_8[0 + hop] +
- (FIXED_A) t2[1] * cos_table_fixed_8[1 + hop] +
- (FIXED_A) t2[2] * cos_table_fixed_8[2 + hop] +
- (FIXED_A) t2[3] * cos_table_fixed_8[3 + hop] +
- (FIXED_A) t2[4] * cos_table_fixed_8[4 + hop] +
- (FIXED_A) t2[5] * cos_table_fixed_8[9 + hop] +
- (FIXED_A) t2[6] * cos_table_fixed_8[10 + hop] +
- (FIXED_A) t2[7] * cos_table_fixed_8[11 + hop]) >>
- (SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS);
- }
-}
-
-static void sbc_analyze_4b_8s(int16_t *pcm, int16_t *x,
- int32_t *out, int out_stride)
-{
- int i;
-
- /* Input 4 x 8 Audio Samples */
- for (i = 0; i < 32; i += 8) {
- x[128 + i] = x[0 + i] = pcm[31 - i];
- x[129 + i] = x[1 + i] = pcm[30 - i];
- x[130 + i] = x[2 + i] = pcm[29 - i];
- x[131 + i] = x[3 + i] = pcm[28 - i];
- x[132 + i] = x[4 + i] = pcm[27 - i];
- x[133 + i] = x[5 + i] = pcm[26 - i];
- x[134 + i] = x[6 + i] = pcm[25 - i];
- x[135 + i] = x[7 + i] = pcm[24 - i];
- }
-
- /* Analyze four blocks */
- _sbc_analyze_eight(x + 24, out);
- out += out_stride;
- _sbc_analyze_eight(x + 16, out);
- out += out_stride;
- _sbc_analyze_eight(x + 8, out);
- out += out_stride;
- _sbc_analyze_eight(x, out);
-}
-
static int sbc_analyze_audio(struct sbc_encoder_state *state,
struct sbc_frame *frame)
{
@@ -1056,9 +907,7 @@ static void sbc_encoder_init(struct sbc_encoder_state *state,
state->subbands = frame->subbands;
state->position[0] = state->position[1] = 12 * frame->subbands;
- /* Default implementation for analyze function */
- state->sbc_analyze_4b_4s = sbc_analyze_4b_4s;
- state->sbc_analyze_4b_8s = sbc_analyze_4b_8s;
+ sbc_init_primitives(state);
}
struct sbc_priv {
diff --git a/sbc/sbc_math.h b/sbc/sbc_math.h
index 6ca4f526..b87bc81c 100644
--- a/sbc/sbc_math.h
+++ b/sbc/sbc_math.h
@@ -29,8 +29,6 @@
#define ASR(val, bits) ((-2 >> 1 == -1) ? \
((int32_t)(val)) >> (bits) : ((int32_t) (val)) / (1 << (bits)))
-#define SCALE_OUT_BITS 15
-
#define SCALE_SPROTO4_TBL 12
#define SCALE_SPROTO8_TBL 14
#define SCALE_NPROTO4_TBL 11
diff --git a/sbc/sbc_primitives.c b/sbc/sbc_primitives.c
new file mode 100644
index 00000000..f2e75b4c
--- /dev/null
+++ b/sbc/sbc_primitives.c
@@ -0,0 +1,401 @@
+/*
+ *
+ * Bluetooth low-complexity, subband codec (SBC) library
+ *
+ * Copyright (C) 2004-2009 Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com>
+ *
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include <stdint.h>
+#include <limits.h>
+#include "sbc.h"
+#include "sbc_math.h"
+#include "sbc_tables.h"
+
+#include "sbc_primitives.h"
+
+/*
+ * A standard C code of analysis filter.
+ */
+static inline void sbc_analyze_four(const int16_t *in, int32_t *out)
+{
+ FIXED_A t1[4];
+ FIXED_T t2[4];
+ int i = 0, hop = 0;
+
+ /* rounding coefficient */
+ t1[0] = t1[1] = t1[2] = t1[3] =
+ (FIXED_A) 1 << (SBC_PROTO_FIXED4_SCALE - 1);
+
+ /* low pass polyphase filter */
+ for (hop = 0; hop < 40; hop += 8) {
+ t1[0] += (FIXED_A) in[hop] * _sbc_proto_fixed4[hop];
+ t1[1] += (FIXED_A) in[hop + 1] * _sbc_proto_fixed4[hop + 1];
+ t1[2] += (FIXED_A) in[hop + 2] * _sbc_proto_fixed4[hop + 2];
+ t1[1] += (FIXED_A) in[hop + 3] * _sbc_proto_fixed4[hop + 3];
+ t1[0] += (FIXED_A) in[hop + 4] * _sbc_proto_fixed4[hop + 4];
+ t1[3] += (FIXED_A) in[hop + 5] * _sbc_proto_fixed4[hop + 5];
+ t1[3] += (FIXED_A) in[hop + 7] * _sbc_proto_fixed4[hop + 7];
+ }
+
+ /* scaling */
+ t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE;
+ t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE;
+ t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE;
+ t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE;
+
+ /* do the cos transform */
+ for (i = 0, hop = 0; i < 4; hop += 8, i++) {
+ out[i] = ((FIXED_A) t2[0] * cos_table_fixed_4[0 + hop] +
+ (FIXED_A) t2[1] * cos_table_fixed_4[1 + hop] +
+ (FIXED_A) t2[2] * cos_table_fixed_4[2 + hop] +
+ (FIXED_A) t2[3] * cos_table_fixed_4[5 + hop]) >>
+ (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
+ }
+}
+
+static void sbc_analyze_4b_4s(int16_t *pcm, int16_t *x,
+ int32_t *out, int out_stride)
+{
+ int i;
+
+ /* Input 4 x 4 Audio Samples */
+ for (i = 0; i < 16; i += 4) {
+ x[64 + i] = x[0 + i] = pcm[15 - i];
+ x[65 + i] = x[1 + i] = pcm[14 - i];
+ x[66 + i] = x[2 + i] = pcm[13 - i];
+ x[67 + i] = x[3 + i] = pcm[12 - i];
+ }
+
+ /* Analyze four blocks */
+ sbc_analyze_four(x + 12, out);
+ out += out_stride;
+ sbc_analyze_four(x + 8, out);
+ out += out_stride;
+ sbc_analyze_four(x + 4, out);
+ out += out_stride;
+ sbc_analyze_four(x, out);
+}
+
+static inline void sbc_analyze_eight(const int16_t *in, int32_t *out)
+{
+ FIXED_A t1[8];
+ FIXED_T t2[8];
+ int i, hop;
+
+ /* rounding coefficient */
+ t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] =
+ (FIXED_A) 1 << (SBC_PROTO_FIXED8_SCALE-1);
+
+ /* low pass polyphase filter */
+ for (hop = 0; hop < 80; hop += 16) {
+ t1[0] += (FIXED_A) in[hop] * _sbc_proto_fixed8[hop];
+ t1[1] += (FIXED_A) in[hop + 1] * _sbc_proto_fixed8[hop + 1];
+ t1[2] += (FIXED_A) in[hop + 2] * _sbc_proto_fixed8[hop + 2];
+ t1[3] += (FIXED_A) in[hop + 3] * _sbc_proto_fixed8[hop + 3];
+ t1[4] += (FIXED_A) in[hop + 4] * _sbc_proto_fixed8[hop + 4];
+ t1[3] += (FIXED_A) in[hop + 5] * _sbc_proto_fixed8[hop + 5];
+ t1[2] += (FIXED_A) in[hop + 6] * _sbc_proto_fixed8[hop + 6];
+ t1[1] += (FIXED_A) in[hop + 7] * _sbc_proto_fixed8[hop + 7];
+ t1[0] += (FIXED_A) in[hop + 8] * _sbc_proto_fixed8[hop + 8];
+ t1[5] += (FIXED_A) in[hop + 9] * _sbc_proto_fixed8[hop + 9];
+ t1[6] += (FIXED_A) in[hop + 10] * _sbc_proto_fixed8[hop + 10];
+ t1[7] += (FIXED_A) in[hop + 11] * _sbc_proto_fixed8[hop + 11];
+ t1[7] += (FIXED_A) in[hop + 13] * _sbc_proto_fixed8[hop + 13];
+ t1[6] += (FIXED_A) in[hop + 14] * _sbc_proto_fixed8[hop + 14];
+ t1[5] += (FIXED_A) in[hop + 15] * _sbc_proto_fixed8[hop + 15];
+ }
+
+ /* scaling */
+ t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE;
+ t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE;
+ t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE;
+ t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE;
+ t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE;
+ t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE;
+ t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE;
+ t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE;
+
+ /* do the cos transform */
+ for (i = 0, hop = 0; i < 8; hop += 16, i++) {
+ out[i] = ((FIXED_A) t2[0] * cos_table_fixed_8[0 + hop] +
+ (FIXED_A) t2[1] * cos_table_fixed_8[1 + hop] +
+ (FIXED_A) t2[2] * cos_table_fixed_8[2 + hop] +
+ (FIXED_A) t2[3] * cos_table_fixed_8[3 + hop] +
+ (FIXED_A) t2[4] * cos_table_fixed_8[4 + hop] +
+ (FIXED_A) t2[5] * cos_table_fixed_8[9 + hop] +
+ (FIXED_A) t2[6] * cos_table_fixed_8[10 + hop] +
+ (FIXED_A) t2[7] * cos_table_fixed_8[11 + hop]) >>
+ (SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS);
+ }
+}
+
+static void sbc_analyze_4b_8s(int16_t *pcm, int16_t *x,
+ int32_t *out, int out_stride)
+{
+ int i;
+
+ /* Input 4 x 8 Audio Samples */
+ for (i = 0; i < 32; i += 8) {
+ x[128 + i] = x[0 + i] = pcm[31 - i];
+ x[129 + i] = x[1 + i] = pcm[30 - i];
+ x[130 + i] = x[2 + i] = pcm[29 - i];
+ x[131 + i] = x[3 + i] = pcm[28 - i];
+ x[132 + i] = x[4 + i] = pcm[27 - i];
+ x[133 + i] = x[5 + i] = pcm[26 - i];
+ x[134 + i] = x[6 + i] = pcm[25 - i];
+ x[135 + i] = x[7 + i] = pcm[24 - i];
+ }
+
+ /* Analyze four blocks */
+ sbc_analyze_eight(x + 24, out);
+ out += out_stride;
+ sbc_analyze_eight(x + 16, out);
+ out += out_stride;
+ sbc_analyze_eight(x + 8, out);
+ out += out_stride;
+ sbc_analyze_eight(x, out);
+}
+
+/*
+ * A reference C code of analysis filter with SIMD-friendly tables
+ * reordering and code layout. This code can be used to develop platform
+ * specific SIMD optimizations. Also it may be used as some kind of test
+ * for compiler autovectorization capabilities (who knows, if the compiler
+ * is very good at this stuff, hand optimized assembly may be not strictly
+ * needed for some platform).
+ */
+
+static inline void sbc_analyze_four_simd(const int16_t *in, int32_t *out,
+ const FIXED_T *consts)
+{
+ FIXED_A t1[4];
+ FIXED_T t2[4];
+ int hop = 0;
+
+ /* rounding coefficient */
+ t1[0] = t1[1] = t1[2] = t1[3] =
+ (FIXED_A) 1 << (SBC_PROTO_FIXED4_SCALE - 1);
+
+ /* low pass polyphase filter */
+ for (hop = 0; hop < 40; hop += 8) {
+ t1[0] += (FIXED_A) in[hop] * consts[hop];
+ t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1];
+ t1[1] += (FIXED_A) in[hop + 2] * consts[hop + 2];
+ t1[1] += (FIXED_A) in[hop + 3] * consts[hop + 3];
+ t1[2] += (FIXED_A) in[hop + 4] * consts[hop + 4];
+ t1[2] += (FIXED_A) in[hop + 5] * consts[hop + 5];
+ t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6];
+ t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7];
+ }
+
+ /* scaling */
+ t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE;
+ t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE;
+ t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE;
+ t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE;
+
+ /* do the cos transform */
+ t1[0] = (FIXED_A) t2[0] * consts[40 + 0];
+ t1[0] += (FIXED_A) t2[1] * consts[40 + 1];
+ t1[1] = (FIXED_A) t2[0] * consts[40 + 2];
+ t1[1] += (FIXED_A) t2[1] * consts[40 + 3];
+ t1[2] = (FIXED_A) t2[0] * consts[40 + 4];
+ t1[2] += (FIXED_A) t2[1] * consts[40 + 5];
+ t1[3] = (FIXED_A) t2[0] * consts[40 + 6];
+ t1[3] += (FIXED_A) t2[1] * consts[40 + 7];
+
+ t1[0] += (FIXED_A) t2[2] * consts[40 + 8];
+ t1[0] += (FIXED_A) t2[3] * consts[40 + 9];
+ t1[1] += (FIXED_A) t2[2] * consts[40 + 10];
+ t1[1] += (FIXED_A) t2[3] * consts[40 + 11];
+ t1[2] += (FIXED_A) t2[2] * consts[40 + 12];
+ t1[2] += (FIXED_A) t2[3] * consts[40 + 13];
+ t1[3] += (FIXED_A) t2[2] * consts[40 + 14];
+ t1[3] += (FIXED_A) t2[3] * consts[40 + 15];
+
+ out[0] = t1[0] >>
+ (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
+ out[1] = t1[1] >>
+ (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
+ out[2] = t1[2] >>
+ (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
+ out[3] = t1[3] >>
+ (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS);
+}
+
+static inline void sbc_analyze_eight_simd(const int16_t *in, int32_t *out,
+ const FIXED_T *consts)
+{
+ FIXED_A t1[8];
+ FIXED_T t2[8];
+ int i, hop;
+
+ /* rounding coefficient */
+ t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] =
+ (FIXED_A) 1 << (SBC_PROTO_FIXED8_SCALE-1);
+
+ /* low pass polyphase filter */
+ for (hop = 0; hop < 80; hop += 16) {
+ t1[0] += (FIXED_A) in[hop] * consts[hop];
+ t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1];
+ t1[1] += (FIXED_A) in[hop + 2] * consts[hop + 2];
+ t1[1] += (FIXED_A) in[hop + 3] * consts[hop + 3];
+ t1[2] += (FIXED_A) in[hop + 4] * consts[hop + 4];
+ t1[2] += (FIXED_A) in[hop + 5] * consts[hop + 5];
+ t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6];
+ t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7];
+ t1[4] += (FIXED_A) in[hop + 8] * consts[hop + 8];
+ t1[4] += (FIXED_A) in[hop + 9] * consts[hop + 9];
+ t1[5] += (FIXED_A) in[hop + 10] * consts[hop + 10];
+ t1[5] += (FIXED_A) in[hop + 11] * consts[hop + 11];
+ t1[6] += (FIXED_A) in[hop + 12] * consts[hop + 12];
+ t1[6] += (FIXED_A) in[hop + 13] * consts[hop + 13];
+ t1[7] += (FIXED_A) in[hop + 14] * consts[hop + 14];
+ t1[7] += (FIXED_A) in[hop + 15] * consts[hop + 15];
+ }
+
+ /* scaling */
+ t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE;
+ t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE;
+ t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE;
+ t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE;
+ t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE;
+ t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE;
+ t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE;
+ t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE;
+
+
+ /* do the cos transform */
+ t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = 0;
+
+ for (i = 0; i < 4; i++) {
+ t1[0] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 0];
+ t1[0] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 1];
+ t1[1] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 2];
+ t1[1] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 3];
+ t1[2] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 4];
+ t1[2] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 5];
+ t1[3] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 6];
+ t1[3] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 7];
+ t1[4] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 8];
+ t1[4] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 9];
+ t1[5] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 10];
+ t1[5] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 11];
+ t1[6] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 12];
+ t1[6] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 13];
+ t1[7] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 14];
+ t1[7] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 15];
+ }
+
+ for (i = 0; i < 8; i++)
+ out[i] = t1[i] >>
+ (SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS);
+}
+
+static inline void sbc_analyze_4b_4s_simd(int16_t *pcm, int16_t *x,
+ int32_t *out, int out_stride)
+{
+ /* Fetch audio samples and do input data reordering for SIMD */
+ x[64] = x[0] = pcm[8 + 7];
+ x[65] = x[1] = pcm[8 + 3];
+ x[66] = x[2] = pcm[8 + 6];
+ x[67] = x[3] = pcm[8 + 4];
+ x[68] = x[4] = pcm[8 + 0];
+ x[69] = x[5] = pcm[8 + 2];
+ x[70] = x[6] = pcm[8 + 1];
+ x[71] = x[7] = pcm[8 + 5];
+
+ x[72] = x[8] = pcm[0 + 7];
+ x[73] = x[9] = pcm[0 + 3];
+ x[74] = x[10] = pcm[0 + 6];
+ x[75] = x[11] = pcm[0 + 4];
+ x[76] = x[12] = pcm[0 + 0];
+ x[77] = x[13] = pcm[0 + 2];
+ x[78] = x[14] = pcm[0 + 1];
+ x[79] = x[15] = pcm[0 + 5];
+
+ /* Analyze blocks */
+ sbc_analyze_four_simd(x + 12, out, analysis_consts_fixed4_simd_odd);
+ out += out_stride;
+ sbc_analyze_four_simd(x + 8, out, analysis_consts_fixed4_simd_even);
+ out += out_stride;
+ sbc_analyze_four_simd(x + 4, out, analysis_consts_fixed4_simd_odd);
+ out += out_stride;
+ sbc_analyze_four_simd(x + 0, out, analysis_consts_fixed4_simd_even);
+}
+
+static inline void sbc_analyze_4b_8s_simd(int16_t *pcm, int16_t *x,
+ int32_t *out, int out_stride)
+{
+ /* Fetch audio samples and do input data reordering for SIMD */
+ x[128] = x[0] = pcm[16 + 15];
+ x[129] = x[1] = pcm[16 + 7];
+ x[130] = x[2] = pcm[16 + 14];
+ x[131] = x[3] = pcm[16 + 8];
+ x[132] = x[4] = pcm[16 + 13];
+ x[133] = x[5] = pcm[16 + 9];
+ x[134] = x[6] = pcm[16 + 12];
+ x[135] = x[7] = pcm[16 + 10];
+ x[136] = x[8] = pcm[16 + 11];
+ x[137] = x[9] = pcm[16 + 3];
+ x[138] = x[10] = pcm[16 + 6];
+ x[139] = x[11] = pcm[16 + 0];
+ x[140] = x[12] = pcm[16 + 5];
+ x[141] = x[13] = pcm[16 + 1];
+ x[142] = x[14] = pcm[16 + 4];
+ x[143] = x[15] = pcm[16 + 2];
+
+ x[144] = x[16] = pcm[0 + 15];
+ x[145] = x[17] = pcm[0 + 7];
+ x[146] = x[18] = pcm[0 + 14];
+ x[147] = x[19] = pcm[0 + 8];
+ x[148] = x[20] = pcm[0 + 13];
+ x[149] = x[21] = pcm[0 + 9];
+ x[150] = x[22] = pcm[0 + 12];
+ x[151] = x[23] = pcm[0 + 10];
+ x[152] = x[24] = pcm[0 + 11];
+ x[153] = x[25] = pcm[0 + 3];
+ x[154] = x[26] = pcm[0 + 6];
+ x[155] = x[27] = pcm[0 + 0];
+ x[156] = x[28] = pcm[0 + 5];
+ x[157] = x[29] = pcm[0 + 1];
+ x[158] = x[30] = pcm[0 + 4];
+ x[159] = x[31] = pcm[0 + 2];
+
+ /* Analyze blocks */
+ sbc_analyze_eight_simd(x + 24, out, analysis_consts_fixed8_simd_odd);
+ out += out_stride;
+ sbc_analyze_eight_simd(x + 16, out, analysis_consts_fixed8_simd_even);
+ out += out_stride;
+ sbc_analyze_eight_simd(x + 8, out, analysis_consts_fixed8_simd_odd);
+ out += out_stride;
+ sbc_analyze_eight_simd(x + 0, out, analysis_consts_fixed8_simd_even);
+}
+
+/*
+ * Detect CPU features and setup function pointers
+ */
+void sbc_init_primitives(struct sbc_encoder_state *state)
+{
+ /* Default implementation for analyze functions */
+ state->sbc_analyze_4b_4s = sbc_analyze_4b_4s;
+ state->sbc_analyze_4b_8s = sbc_analyze_4b_8s;
+}
diff --git a/sbc/sbc_primitives.h b/sbc/sbc_primitives.h
new file mode 100644
index 00000000..ca1ec277
--- /dev/null
+++ b/sbc/sbc_primitives.h
@@ -0,0 +1,52 @@
+/*
+ *
+ * Bluetooth low-complexity, subband codec (SBC) library
+ *
+ * Copyright (C) 2004-2009 Marcel Holtmann <marcel@holtmann.org>
+ * Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch>
+ * Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com>
+ *
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#ifndef __SBC_PRIMITIVES_H
+#define __SBC_PRIMITIVES_H
+
+#define SCALE_OUT_BITS 15
+
+struct sbc_encoder_state {
+ int subbands;
+ int position[2];
+ int16_t X[2][256];
+ /* Polyphase analysis filter for 4 subbands configuration,
+ it handles 4 blocks at once */
+ void (*sbc_analyze_4b_4s)(int16_t *pcm, int16_t *x,
+ int32_t *out, int out_stride);
+ /* Polyphase analysis filter for 8 subbands configuration,
+ it handles 4 blocks at once */
+ void (*sbc_analyze_4b_8s)(int16_t *pcm, int16_t *x,
+ int32_t *out, int out_stride);
+};
+
+/*
+ * Initialize pointers to the functions which are the basic "building bricks"
+ * of SBC codec. Best implementation is selected based on target CPU
+ * capabilities.
+ */
+void sbc_init_primitives(struct sbc_encoder_state *encoder_state);
+
+#endif
diff --git a/sbc/sbc_tables.h b/sbc/sbc_tables.h
index f1dfe6c0..a9a995fa 100644
--- a/sbc/sbc_tables.h
+++ b/sbc/sbc_tables.h
@@ -157,8 +157,9 @@ static const int32_t synmatrix8[16][8] = {
*/
#define SBC_PROTO_FIXED4_SCALE \
((sizeof(FIXED_T) * CHAR_BIT - 1) - SBC_FIXED_EXTRA_BITS + 1)
-#define F(x) (FIXED_A) ((x * 2) * \
+#define F_PROTO4(x) (FIXED_A) ((x * 2) * \
((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5)
+#define F(x) F_PROTO4(x)
static const FIXED_T _sbc_proto_fixed4[40] = {
F(0.00000000E+00), F(5.36548976E-04),
-F(1.49188357E-03), F(2.73370904E-03),
@@ -206,8 +207,9 @@ static const FIXED_T _sbc_proto_fixed4[40] = {
*/
#define SBC_COS_TABLE_FIXED4_SCALE \
((sizeof(FIXED_T) * CHAR_BIT - 1) + SBC_FIXED_EXTRA_BITS)
-#define F(x) (FIXED_A) ((x) * \
+#define F_COS4(x) (FIXED_A) ((x) * \
((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5)
+#define F(x) F_COS4(x)
static const FIXED_T cos_table_fixed_4[32] = {
F(0.7071067812), F(0.9238795325), -F(1.0000000000), F(0.9238795325),
F(0.7071067812), F(0.3826834324), F(0.0000000000), F(0.3826834324),
@@ -233,8 +235,9 @@ static const FIXED_T cos_table_fixed_4[32] = {
*/
#define SBC_PROTO_FIXED8_SCALE \
((sizeof(FIXED_T) * CHAR_BIT - 1) - SBC_FIXED_EXTRA_BITS + 2)
-#define F(x) (FIXED_A) ((x * 4) * \
+#define F_PROTO8(x) (FIXED_A) ((x * 4) * \
((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5)
+#define F(x) F_PROTO8(x)
static const FIXED_T _sbc_proto_fixed8[80] = {
F(0.00000000E+00), F(1.56575398E-04),
F(3.43256425E-04), F(5.54620202E-04),
@@ -301,8 +304,9 @@ static const FIXED_T _sbc_proto_fixed8[80] = {
*/
#define SBC_COS_TABLE_FIXED8_SCALE \
((sizeof(FIXED_T) * CHAR_BIT - 1) + SBC_FIXED_EXTRA_BITS)
-#define F(x) (FIXED_A) ((x) * \
+#define F_COS8(x) (FIXED_A) ((x) * \
((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5)
+#define F(x) F_COS8(x)
static const FIXED_T cos_table_fixed_8[128] = {
F(0.7071067812), F(0.8314696123), F(0.9238795325), F(0.9807852804),
-F(1.0000000000), F(0.9807852804), F(0.9238795325), F(0.8314696123),
@@ -345,3 +349,241 @@ static const FIXED_T cos_table_fixed_8[128] = {
-F(0.0000000000), -F(0.1950903220), F(0.3826834324), -F(0.5555702330),
};
#undef F
+
+/*
+ * Constant tables for the use in SIMD optimized analysis filters
+ * Each table consists of two parts:
+ * 1. reordered "proto" table
+ * 2. reordered "cos" table
+ *
+ * Due to non-symmetrical reordering, separate tables for "even"
+ * and "odd" cases are needed
+ */
+
+static const FIXED_T analysis_consts_fixed4_simd_even[40 + 16] = {
+#define F(x) F_PROTO4(x)
+ F(0.00000000E+00), F(3.83720193E-03),
+ F(5.36548976E-04), F(2.73370904E-03),
+ F(3.06012286E-03), F(3.89205149E-03),
+ F(0.00000000E+00), -F(1.49188357E-03),
+ F(1.09137620E-02), F(2.58767811E-02),
+ F(2.04385087E-02), F(3.21939290E-02),
+ F(7.76463494E-02), F(6.13245186E-03),
+ F(0.00000000E+00), -F(2.88757392E-02),
+ F(1.35593274E-01), F(2.94315332E-01),
+ F(1.94987841E-01), F(2.81828203E-01),
+ -F(1.94987841E-01), F(2.81828203E-01),
+ F(0.00000000E+00), -F(2.46636662E-01),
+ -F(1.35593274E-01), F(2.58767811E-02),
+ -F(7.76463494E-02), F(6.13245186E-03),
+ -F(2.04385087E-02), F(3.21939290E-02),
+ F(0.00000000E+00), F(2.88217274E-02),
+ -F(1.09137620E-02), F(3.83720193E-03),
+ -F(3.06012286E-03), F(3.89205149E-03),
+ -F(5.36548976E-04), F(2.73370904E-03),
+ F(0.00000000E+00), -F(1.86581691E-03),
+#undef F
+#define F(x) F_COS4(x)
+ F(0.7071067812), F(0.9238795325),
+ -F(0.7071067812), F(0.3826834324),
+ -F(0.7071067812), -F(0.3826834324),
+ F(0.7071067812), -F(0.9238795325),
+ F(0.3826834324), -F(1.0000000000),
+ -F(0.9238795325), -F(1.0000000000),
+ F(0.9238795325), -F(1.0000000000),
+ -F(0.3826834324), -F(1.0000000000),
+#undef F
+};
+
+static const FIXED_T analysis_consts_fixed4_simd_odd[40 + 16] = {
+#define F(x) F_PROTO4(x)
+ F(2.73370904E-03), F(5.36548976E-04),
+ -F(1.49188357E-03), F(0.00000000E+00),
+ F(3.83720193E-03), F(1.09137620E-02),
+ F(3.89205149E-03), F(3.06012286E-03),
+ F(3.21939290E-02), F(2.04385087E-02),
+ -F(2.88757392E-02), F(0.00000000E+00),
+ F(2.58767811E-02), F(1.35593274E-01),
+ F(6.13245186E-03), F(7.76463494E-02),
+ F(2.81828203E-01), F(1.94987841E-01),
+ -F(2.46636662E-01), F(0.00000000E+00),
+ F(2.94315332E-01), -F(1.35593274E-01),
+ F(2.81828203E-01), -F(1.94987841E-01),
+ F(6.13245186E-03), -F(7.76463494E-02),
+ F(2.88217274E-02), F(0.00000000E+00),
+ F(2.58767811E-02), -F(1.09137620E-02),
+ F(3.21939290E-02), -F(2.04385087E-02),
+ F(3.89205149E-03), -F(3.06012286E-03),
+ -F(1.86581691E-03), F(0.00000000E+00),
+ F(3.83720193E-03), F(0.00000000E+00),
+ F(2.73370904E-03), -F(5.36548976E-04),
+#undef F
+#define F(x) F_COS4(x)
+ F(0.9238795325), -F(1.0000000000),
+ F(0.3826834324), -F(1.0000000000),
+ -F(0.3826834324), -F(1.0000000000),
+ -F(0.9238795325), -F(1.0000000000),
+ F(0.7071067812), F(0.3826834324),
+ -F(0.7071067812), -F(0.9238795325),
+ -F(0.7071067812), F(0.9238795325),
+ F(0.7071067812), -F(0.3826834324),
+#undef F
+};
+
+static const FIXED_T analysis_consts_fixed8_simd_even[80 + 64] = {
+#define F(x) F_PROTO8(x)
+ F(0.00000000E+00), F(2.01182542E-03),
+ F(1.56575398E-04), F(1.78371725E-03),
+ F(3.43256425E-04), F(1.47640169E-03),
+ F(5.54620202E-04), F(1.13992507E-03),
+ -F(8.23919506E-04), F(0.00000000E+00),
+ F(2.10371989E-03), F(3.49717454E-03),
+ F(1.99454554E-03), F(1.64973098E-03),
+ F(1.61656283E-03), F(1.78805361E-04),
+ F(5.65949473E-03), F(1.29371806E-02),
+ F(8.02941163E-03), F(1.53184106E-02),
+ F(1.04584443E-02), F(1.62208471E-02),
+ F(1.27472335E-02), F(1.59045603E-02),
+ -F(1.46525263E-02), F(0.00000000E+00),
+ F(8.85757540E-03), F(5.31873032E-02),
+ F(2.92408442E-03), F(3.90751381E-02),
+ -F(4.91578024E-03), F(2.61098752E-02),
+ F(6.79989431E-02), F(1.46955068E-01),
+ F(8.29847578E-02), F(1.45389847E-01),
+ F(9.75753918E-02), F(1.40753505E-01),
+ F(1.11196689E-01), F(1.33264415E-01),
+ -F(1.23264548E-01), F(0.00000000E+00),
+ F(1.45389847E-01), -F(8.29847578E-02),
+ F(1.40753505E-01), -F(9.75753918E-02),
+ F(1.33264415E-01), -F(1.11196689E-01),
+ -F(6.79989431E-02), F(1.29371806E-02),
+ -F(5.31873032E-02), F(8.85757540E-03),
+ -F(3.90751381E-02), F(2.92408442E-03),
+ -F(2.61098752E-02), -F(4.91578024E-03),
+ F(1.46404076E-02), F(0.00000000E+00),
+ F(1.53184106E-02), -F(8.02941163E-03),
+ F(1.62208471E-02), -F(1.04584443E-02),
+ F(1.59045603E-02), -F(1.27472335E-02),
+ -F(5.65949473E-03), F(2.01182542E-03),
+ -F(3.49717454E-03), F(2.10371989E-03),
+ -F(1.64973098E-03), F(1.99454554E-03),
+ -F(1.78805361E-04), F(1.61656283E-03),
+ -F(9.02154502E-04), F(0.00000000E+00),
+ F(1.78371725E-03), -F(1.56575398E-04),
+ F(1.47640169E-03), -F(3.43256425E-04),
+ F(1.13992507E-03), -F(5.54620202E-04),
+#undef F
+#define F(x) F_COS8(x)
+ F(0.7071067812), F(0.8314696123),
+ -F(0.7071067812), -F(0.1950903220),
+ -F(0.7071067812), -F(0.9807852804),
+ F(0.7071067812), -F(0.5555702330),
+ F(0.7071067812), F(0.5555702330),
+ -F(0.7071067812), F(0.9807852804),
+ -F(0.7071067812), F(0.1950903220),
+ F(0.7071067812), -F(0.8314696123),
+ F(0.9238795325), F(0.9807852804),
+ F(0.3826834324), F(0.8314696123),
+ -F(0.3826834324), F(0.5555702330),
+ -F(0.9238795325), F(0.1950903220),
+ -F(0.9238795325), -F(0.1950903220),
+ -F(0.3826834324), -F(0.5555702330),
+ F(0.3826834324), -F(0.8314696123),
+ F(0.9238795325), -F(0.9807852804),
+ -F(1.0000000000), F(0.5555702330),
+ -F(1.0000000000), -F(0.9807852804),
+ -F(1.0000000000), F(0.1950903220),
+ -F(1.0000000000), F(0.8314696123),
+ -F(1.0000000000), -F(0.8314696123),
+ -F(1.0000000000), -F(0.1950903220),
+ -F(1.0000000000), F(0.9807852804),
+ -F(1.0000000000), -F(0.5555702330),
+ F(0.3826834324), F(0.1950903220),
+ -F(0.9238795325), -F(0.5555702330),
+ F(0.9238795325), F(0.8314696123),
+ -F(0.3826834324), -F(0.9807852804),
+ -F(0.3826834324), F(0.9807852804),
+ F(0.9238795325), -F(0.8314696123),
+ -F(0.9238795325), F(0.5555702330),
+ F(0.3826834324), -F(0.1950903220),
+#undef F
+};
+
+static const FIXED_T analysis_consts_fixed8_simd_odd[80 + 64] = {
+#define F(x) F_PROTO8(x)
+ F(0.00000000E+00), -F(8.23919506E-04),
+ F(1.56575398E-04), F(1.78371725E-03),
+ F(3.43256425E-04), F(1.47640169E-03),
+ F(5.54620202E-04), F(1.13992507E-03),
+ F(2.01182542E-03), F(5.65949473E-03),
+ F(2.10371989E-03), F(3.49717454E-03),
+ F(1.99454554E-03), F(1.64973098E-03),
+ F(1.61656283E-03), F(1.78805361E-04),
+ F(0.00000000E+00), -F(1.46525263E-02),
+ F(8.02941163E-03), F(1.53184106E-02),
+ F(1.04584443E-02), F(1.62208471E-02),
+ F(1.27472335E-02), F(1.59045603E-02),
+ F(1.29371806E-02), F(6.79989431E-02),
+ F(8.85757540E-03), F(5.31873032E-02),
+ F(2.92408442E-03), F(3.90751381E-02),
+ -F(4.91578024E-03), F(2.61098752E-02),
+ F(0.00000000E+00), -F(1.23264548E-01),
+ F(8.29847578E-02), F(1.45389847E-01),
+ F(9.75753918E-02), F(1.40753505E-01),
+ F(1.11196689E-01), F(1.33264415E-01),
+ F(1.46955068E-01), -F(6.79989431E-02),
+ F(1.45389847E-01), -F(8.29847578E-02),
+ F(1.40753505E-01), -F(9.75753918E-02),
+ F(1.33264415E-01), -F(1.11196689E-01),
+ F(0.00000000E+00), F(1.46404076E-02),
+ -F(5.31873032E-02), F(8.85757540E-03),
+ -F(3.90751381E-02), F(2.92408442E-03),
+ -F(2.61098752E-02), -F(4.91578024E-03),
+ F(1.29371806E-02), -F(5.65949473E-03),
+ F(1.53184106E-02), -F(8.02941163E-03),
+ F(1.62208471E-02), -F(1.04584443E-02),
+ F(1.59045603E-02), -F(1.27472335E-02),
+ F(0.00000000E+00), -F(9.02154502E-04),
+ -F(3.49717454E-03), F(2.10371989E-03),
+ -F(1.64973098E-03), F(1.99454554E-03),
+ -F(1.78805361E-04), F(1.61656283E-03),
+ F(2.01182542E-03), F(0.00000000E+00),
+ F(1.78371725E-03), -F(1.56575398E-04),
+ F(1.47640169E-03), -F(3.43256425E-04),
+ F(1.13992507E-03), -F(5.54620202E-04),
+#undef F
+#define F(x) F_COS8(x)
+ -F(1.0000000000), F(0.8314696123),
+ -F(1.0000000000), -F(0.1950903220),
+ -F(1.0000000000), -F(0.9807852804),
+ -F(1.0000000000), -F(0.5555702330),
+ -F(1.0000000000), F(0.5555702330),
+ -F(1.0000000000), F(0.9807852804),
+ -F(1.0000000000), F(0.1950903220),
+ -F(1.0000000000), -F(0.8314696123),
+ F(0.9238795325), F(0.9807852804),
+ F(0.3826834324), F(0.8314696123),
+ -F(0.3826834324), F(0.5555702330),
+ -F(0.9238795325), F(0.1950903220),
+ -F(0.9238795325), -F(0.1950903220),
+ -F(0.3826834324), -F(0.5555702330),
+ F(0.3826834324), -F(0.8314696123),
+ F(0.9238795325), -F(0.9807852804),
+ F(0.7071067812), F(0.5555702330),
+ -F(0.7071067812), -F(0.9807852804),
+ -F(0.7071067812), F(0.1950903220),
+ F(0.7071067812), F(0.8314696123),
+ F(0.7071067812), -F(0.8314696123),
+ -F(0.7071067812), -F(0.1950903220),
+ -F(0.7071067812), F(0.9807852804),
+ F(0.7071067812), -F(0.5555702330),
+ F(0.3826834324), F(0.1950903220),
+ -F(0.9238795325), -F(0.5555702330),
+ F(0.9238795325), F(0.8314696123),
+ -F(0.3826834324), -F(0.9807852804),
+ -F(0.3826834324), F(0.9807852804),
+ F(0.9238795325), -F(0.8314696123),
+ -F(0.9238795325), F(0.5555702330),
+ F(0.3826834324), -F(0.1950903220),
+#undef F
+};