diff options
| -rw-r--r-- | sbc/Makefile.am | 3 | ||||
| -rw-r--r-- | sbc/sbc.c | 155 | ||||
| -rw-r--r-- | sbc/sbc_math.h | 2 | ||||
| -rw-r--r-- | sbc/sbc_primitives.c | 401 | ||||
| -rw-r--r-- | sbc/sbc_primitives.h | 52 | ||||
| -rw-r--r-- | sbc/sbc_tables.h | 250 | 
6 files changed, 703 insertions, 160 deletions
| diff --git a/sbc/Makefile.am b/sbc/Makefile.am index c42f1622..cd068e76 100644 --- a/sbc/Makefile.am +++ b/sbc/Makefile.am @@ -8,7 +8,8 @@ endif  if SBC  noinst_LTLIBRARIES = libsbc.la -libsbc_la_SOURCES = sbc.h sbc.c sbc_math.h sbc_tables.h +libsbc_la_SOURCES = sbc.h sbc.c sbc_math.h sbc_tables.h \ +	sbc_primitives.c  libsbc_la_CFLAGS = -finline-functions -funswitch-loops -fgcse-after-reload @@ -46,6 +46,7 @@  #include "sbc_tables.h"  #include "sbc.h" +#include "sbc_primitives.h"  #define SBC_SYNCWORD	0x9C @@ -91,16 +92,6 @@ struct sbc_decoder_state {  	int offset[2][16];  }; -struct sbc_encoder_state { -	int subbands; -	int position[2]; -	int16_t X[2][256]; -	void (*sbc_analyze_4b_4s)(int16_t *pcm, int16_t *x, -				  int32_t *out, int out_stride); -	void (*sbc_analyze_4b_8s)(int16_t *pcm, int16_t *x, -				  int32_t *out, int out_stride); -}; -  /*   * Calculates the CRC-8 of the first len bits in data   */ @@ -653,146 +644,6 @@ static int sbc_synthesize_audio(struct sbc_decoder_state *state,  	}  } -static inline void _sbc_analyze_four(const int16_t *in, int32_t *out) -{ -	FIXED_A t1[4]; -	FIXED_T t2[4]; -	int i = 0, hop = 0; - -	/* rounding coefficient */ -	t1[0] = t1[1] = t1[2] = t1[3] = -		(FIXED_A) 1 << (SBC_PROTO_FIXED4_SCALE - 1); - -	/* low pass polyphase filter */ -	for (hop = 0; hop < 40; hop += 8) { -		t1[0] += (FIXED_A) in[hop] * _sbc_proto_fixed4[hop]; -		t1[1] += (FIXED_A) in[hop + 1] * _sbc_proto_fixed4[hop + 1]; -		t1[2] += (FIXED_A) in[hop + 2] * _sbc_proto_fixed4[hop + 2]; -		t1[1] += (FIXED_A) in[hop + 3] * _sbc_proto_fixed4[hop + 3]; -		t1[0] += (FIXED_A) in[hop + 4] * _sbc_proto_fixed4[hop + 4]; -		t1[3] += (FIXED_A) in[hop + 5] * _sbc_proto_fixed4[hop + 5]; -		t1[3] += (FIXED_A) in[hop + 7] * _sbc_proto_fixed4[hop + 7]; -	} - -	/* scaling */ -	t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE; -	t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE; -	t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE; -	t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE; - -	/* do the cos transform */ -	for (i = 0, hop = 0; i < 4; hop += 8, i++) { -		out[i] = ((FIXED_A) t2[0] * cos_table_fixed_4[0 + hop] + -			  (FIXED_A) t2[1] * cos_table_fixed_4[1 + hop] + -			  (FIXED_A) t2[2] * cos_table_fixed_4[2 + hop] + -			  (FIXED_A) t2[3] * cos_table_fixed_4[5 + hop]) >> -			(SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); -	} -} - -static void sbc_analyze_4b_4s(int16_t *pcm, int16_t *x, -			      int32_t *out, int out_stride) -{ -	int i; - -	/* Input 4 x 4 Audio Samples */ -	for (i = 0; i < 16; i += 4) { -		x[64 + i] = x[0 + i] = pcm[15 - i]; -		x[65 + i] = x[1 + i] = pcm[14 - i]; -		x[66 + i] = x[2 + i] = pcm[13 - i]; -		x[67 + i] = x[3 + i] = pcm[12 - i]; -	} - -	/* Analyze four blocks */ -	_sbc_analyze_four(x + 12, out); -	out += out_stride; -	_sbc_analyze_four(x + 8, out); -	out += out_stride; -	_sbc_analyze_four(x + 4, out); -	out += out_stride; -	_sbc_analyze_four(x, out); -} - -static inline void _sbc_analyze_eight(const int16_t *in, int32_t *out) -{ -	FIXED_A t1[8]; -	FIXED_T t2[8]; -	int i, hop; - -	/* rounding coefficient */ -	t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = -		(FIXED_A) 1 << (SBC_PROTO_FIXED8_SCALE-1); - -	/* low pass polyphase filter */ -	for (hop = 0; hop < 80; hop += 16) { -		t1[0] += (FIXED_A) in[hop] * _sbc_proto_fixed8[hop]; -		t1[1] += (FIXED_A) in[hop + 1] * _sbc_proto_fixed8[hop + 1]; -		t1[2] += (FIXED_A) in[hop + 2] * _sbc_proto_fixed8[hop + 2]; -		t1[3] += (FIXED_A) in[hop + 3] * _sbc_proto_fixed8[hop + 3]; -		t1[4] += (FIXED_A) in[hop + 4] * _sbc_proto_fixed8[hop + 4]; -		t1[3] += (FIXED_A) in[hop + 5] * _sbc_proto_fixed8[hop + 5]; -		t1[2] += (FIXED_A) in[hop + 6] * _sbc_proto_fixed8[hop + 6]; -		t1[1] += (FIXED_A) in[hop + 7] * _sbc_proto_fixed8[hop + 7]; -		t1[0] += (FIXED_A) in[hop + 8] * _sbc_proto_fixed8[hop + 8]; -		t1[5] += (FIXED_A) in[hop + 9] * _sbc_proto_fixed8[hop + 9]; -		t1[6] += (FIXED_A) in[hop + 10] * _sbc_proto_fixed8[hop + 10]; -		t1[7] += (FIXED_A) in[hop + 11] * _sbc_proto_fixed8[hop + 11]; -		t1[7] += (FIXED_A) in[hop + 13] * _sbc_proto_fixed8[hop + 13]; -		t1[6] += (FIXED_A) in[hop + 14] * _sbc_proto_fixed8[hop + 14]; -		t1[5] += (FIXED_A) in[hop + 15] * _sbc_proto_fixed8[hop + 15]; -	} - -	/* scaling */ -	t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE; -	t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE; -	t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE; -	t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE; -	t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE; -	t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE; -	t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE; -	t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE; - -	/* do the cos transform */ -	for (i = 0, hop = 0; i < 8; hop += 16, i++) { -		out[i] = ((FIXED_A) t2[0] * cos_table_fixed_8[0 + hop] + -			  (FIXED_A) t2[1] * cos_table_fixed_8[1 + hop] + -			  (FIXED_A) t2[2] * cos_table_fixed_8[2 + hop] + -			  (FIXED_A) t2[3] * cos_table_fixed_8[3 + hop] + -			  (FIXED_A) t2[4] * cos_table_fixed_8[4 + hop] + -			  (FIXED_A) t2[5] * cos_table_fixed_8[9 + hop] + -			  (FIXED_A) t2[6] * cos_table_fixed_8[10 + hop] + -			  (FIXED_A) t2[7] * cos_table_fixed_8[11 + hop]) >> -			(SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS); -	} -} - -static void sbc_analyze_4b_8s(int16_t *pcm, int16_t *x, -			      int32_t *out, int out_stride) -{ -	int i; - -	/* Input 4 x 8 Audio Samples */ -	for (i = 0; i < 32; i += 8) { -		x[128 + i] = x[0 + i] = pcm[31 - i]; -		x[129 + i] = x[1 + i] = pcm[30 - i]; -		x[130 + i] = x[2 + i] = pcm[29 - i]; -		x[131 + i] = x[3 + i] = pcm[28 - i]; -		x[132 + i] = x[4 + i] = pcm[27 - i]; -		x[133 + i] = x[5 + i] = pcm[26 - i]; -		x[134 + i] = x[6 + i] = pcm[25 - i]; -		x[135 + i] = x[7 + i] = pcm[24 - i]; -	} - -	/* Analyze four blocks */ -	_sbc_analyze_eight(x + 24, out); -	out += out_stride; -	_sbc_analyze_eight(x + 16, out); -	out += out_stride; -	_sbc_analyze_eight(x + 8, out); -	out += out_stride; -	_sbc_analyze_eight(x, out); -} -  static int sbc_analyze_audio(struct sbc_encoder_state *state,  				struct sbc_frame *frame)  { @@ -1056,9 +907,7 @@ static void sbc_encoder_init(struct sbc_encoder_state *state,  	state->subbands = frame->subbands;  	state->position[0] = state->position[1] = 12 * frame->subbands; -	/* Default implementation for analyze function */ -	state->sbc_analyze_4b_4s = sbc_analyze_4b_4s; -	state->sbc_analyze_4b_8s = sbc_analyze_4b_8s; +	sbc_init_primitives(state);  }  struct sbc_priv { diff --git a/sbc/sbc_math.h b/sbc/sbc_math.h index 6ca4f526..b87bc81c 100644 --- a/sbc/sbc_math.h +++ b/sbc/sbc_math.h @@ -29,8 +29,6 @@  #define ASR(val, bits) ((-2 >> 1 == -1) ? \  		 ((int32_t)(val)) >> (bits) : ((int32_t) (val)) / (1 << (bits))) -#define SCALE_OUT_BITS 15 -  #define SCALE_SPROTO4_TBL	12  #define SCALE_SPROTO8_TBL	14  #define SCALE_NPROTO4_TBL	11 diff --git a/sbc/sbc_primitives.c b/sbc/sbc_primitives.c new file mode 100644 index 00000000..f2e75b4c --- /dev/null +++ b/sbc/sbc_primitives.c @@ -0,0 +1,401 @@ +/* + * + *  Bluetooth low-complexity, subband codec (SBC) library + * + *  Copyright (C) 2004-2009  Marcel Holtmann <marcel@holtmann.org> + *  Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch> + *  Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com> + * + * + *  This library is free software; you can redistribute it and/or + *  modify it under the terms of the GNU Lesser General Public + *  License as published by the Free Software Foundation; either + *  version 2.1 of the License, or (at your option) any later version. + * + *  This library is distributed in the hope that it will be useful, + *  but WITHOUT ANY WARRANTY; without even the implied warranty of + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + *  Lesser General Public License for more details. + * + *  You should have received a copy of the GNU Lesser General Public + *  License along with this library; if not, write to the Free Software + *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA + * + */ + +#include <stdint.h> +#include <limits.h> +#include "sbc.h" +#include "sbc_math.h" +#include "sbc_tables.h" + +#include "sbc_primitives.h" + +/* + * A standard C code of analysis filter. + */ +static inline void sbc_analyze_four(const int16_t *in, int32_t *out) +{ +	FIXED_A t1[4]; +	FIXED_T t2[4]; +	int i = 0, hop = 0; + +	/* rounding coefficient */ +	t1[0] = t1[1] = t1[2] = t1[3] = +		(FIXED_A) 1 << (SBC_PROTO_FIXED4_SCALE - 1); + +	/* low pass polyphase filter */ +	for (hop = 0; hop < 40; hop += 8) { +		t1[0] += (FIXED_A) in[hop] * _sbc_proto_fixed4[hop]; +		t1[1] += (FIXED_A) in[hop + 1] * _sbc_proto_fixed4[hop + 1]; +		t1[2] += (FIXED_A) in[hop + 2] * _sbc_proto_fixed4[hop + 2]; +		t1[1] += (FIXED_A) in[hop + 3] * _sbc_proto_fixed4[hop + 3]; +		t1[0] += (FIXED_A) in[hop + 4] * _sbc_proto_fixed4[hop + 4]; +		t1[3] += (FIXED_A) in[hop + 5] * _sbc_proto_fixed4[hop + 5]; +		t1[3] += (FIXED_A) in[hop + 7] * _sbc_proto_fixed4[hop + 7]; +	} + +	/* scaling */ +	t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE; +	t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE; +	t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE; +	t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE; + +	/* do the cos transform */ +	for (i = 0, hop = 0; i < 4; hop += 8, i++) { +		out[i] = ((FIXED_A) t2[0] * cos_table_fixed_4[0 + hop] + +			  (FIXED_A) t2[1] * cos_table_fixed_4[1 + hop] + +			  (FIXED_A) t2[2] * cos_table_fixed_4[2 + hop] + +			  (FIXED_A) t2[3] * cos_table_fixed_4[5 + hop]) >> +			(SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); +	} +} + +static void sbc_analyze_4b_4s(int16_t *pcm, int16_t *x, +			      int32_t *out, int out_stride) +{ +	int i; + +	/* Input 4 x 4 Audio Samples */ +	for (i = 0; i < 16; i += 4) { +		x[64 + i] = x[0 + i] = pcm[15 - i]; +		x[65 + i] = x[1 + i] = pcm[14 - i]; +		x[66 + i] = x[2 + i] = pcm[13 - i]; +		x[67 + i] = x[3 + i] = pcm[12 - i]; +	} + +	/* Analyze four blocks */ +	sbc_analyze_four(x + 12, out); +	out += out_stride; +	sbc_analyze_four(x + 8, out); +	out += out_stride; +	sbc_analyze_four(x + 4, out); +	out += out_stride; +	sbc_analyze_four(x, out); +} + +static inline void sbc_analyze_eight(const int16_t *in, int32_t *out) +{ +	FIXED_A t1[8]; +	FIXED_T t2[8]; +	int i, hop; + +	/* rounding coefficient */ +	t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = +		(FIXED_A) 1 << (SBC_PROTO_FIXED8_SCALE-1); + +	/* low pass polyphase filter */ +	for (hop = 0; hop < 80; hop += 16) { +		t1[0] += (FIXED_A) in[hop] * _sbc_proto_fixed8[hop]; +		t1[1] += (FIXED_A) in[hop + 1] * _sbc_proto_fixed8[hop + 1]; +		t1[2] += (FIXED_A) in[hop + 2] * _sbc_proto_fixed8[hop + 2]; +		t1[3] += (FIXED_A) in[hop + 3] * _sbc_proto_fixed8[hop + 3]; +		t1[4] += (FIXED_A) in[hop + 4] * _sbc_proto_fixed8[hop + 4]; +		t1[3] += (FIXED_A) in[hop + 5] * _sbc_proto_fixed8[hop + 5]; +		t1[2] += (FIXED_A) in[hop + 6] * _sbc_proto_fixed8[hop + 6]; +		t1[1] += (FIXED_A) in[hop + 7] * _sbc_proto_fixed8[hop + 7]; +		t1[0] += (FIXED_A) in[hop + 8] * _sbc_proto_fixed8[hop + 8]; +		t1[5] += (FIXED_A) in[hop + 9] * _sbc_proto_fixed8[hop + 9]; +		t1[6] += (FIXED_A) in[hop + 10] * _sbc_proto_fixed8[hop + 10]; +		t1[7] += (FIXED_A) in[hop + 11] * _sbc_proto_fixed8[hop + 11]; +		t1[7] += (FIXED_A) in[hop + 13] * _sbc_proto_fixed8[hop + 13]; +		t1[6] += (FIXED_A) in[hop + 14] * _sbc_proto_fixed8[hop + 14]; +		t1[5] += (FIXED_A) in[hop + 15] * _sbc_proto_fixed8[hop + 15]; +	} + +	/* scaling */ +	t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE; +	t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE; +	t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE; +	t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE; +	t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE; +	t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE; +	t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE; +	t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE; + +	/* do the cos transform */ +	for (i = 0, hop = 0; i < 8; hop += 16, i++) { +		out[i] = ((FIXED_A) t2[0] * cos_table_fixed_8[0 + hop] + +			  (FIXED_A) t2[1] * cos_table_fixed_8[1 + hop] + +			  (FIXED_A) t2[2] * cos_table_fixed_8[2 + hop] + +			  (FIXED_A) t2[3] * cos_table_fixed_8[3 + hop] + +			  (FIXED_A) t2[4] * cos_table_fixed_8[4 + hop] + +			  (FIXED_A) t2[5] * cos_table_fixed_8[9 + hop] + +			  (FIXED_A) t2[6] * cos_table_fixed_8[10 + hop] + +			  (FIXED_A) t2[7] * cos_table_fixed_8[11 + hop]) >> +			(SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS); +	} +} + +static void sbc_analyze_4b_8s(int16_t *pcm, int16_t *x, +			      int32_t *out, int out_stride) +{ +	int i; + +	/* Input 4 x 8 Audio Samples */ +	for (i = 0; i < 32; i += 8) { +		x[128 + i] = x[0 + i] = pcm[31 - i]; +		x[129 + i] = x[1 + i] = pcm[30 - i]; +		x[130 + i] = x[2 + i] = pcm[29 - i]; +		x[131 + i] = x[3 + i] = pcm[28 - i]; +		x[132 + i] = x[4 + i] = pcm[27 - i]; +		x[133 + i] = x[5 + i] = pcm[26 - i]; +		x[134 + i] = x[6 + i] = pcm[25 - i]; +		x[135 + i] = x[7 + i] = pcm[24 - i]; +	} + +	/* Analyze four blocks */ +	sbc_analyze_eight(x + 24, out); +	out += out_stride; +	sbc_analyze_eight(x + 16, out); +	out += out_stride; +	sbc_analyze_eight(x + 8, out); +	out += out_stride; +	sbc_analyze_eight(x, out); +} + +/* + * A reference C code of analysis filter with SIMD-friendly tables + * reordering and code layout. This code can be used to develop platform + * specific SIMD optimizations. Also it may be used as some kind of test + * for compiler autovectorization capabilities (who knows, if the compiler + * is very good at this stuff, hand optimized assembly may be not strictly + * needed for some platform). + */ + +static inline void sbc_analyze_four_simd(const int16_t *in, int32_t *out, +					 const FIXED_T *consts) +{ +	FIXED_A t1[4]; +	FIXED_T t2[4]; +	int hop = 0; + +	/* rounding coefficient */ +	t1[0] = t1[1] = t1[2] = t1[3] = +		(FIXED_A) 1 << (SBC_PROTO_FIXED4_SCALE - 1); + +	/* low pass polyphase filter */ +	for (hop = 0; hop < 40; hop += 8) { +		t1[0] += (FIXED_A) in[hop] * consts[hop]; +		t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; +		t1[1] += (FIXED_A) in[hop + 2] * consts[hop + 2]; +		t1[1] += (FIXED_A) in[hop + 3] * consts[hop + 3]; +		t1[2] += (FIXED_A) in[hop + 4] * consts[hop + 4]; +		t1[2] += (FIXED_A) in[hop + 5] * consts[hop + 5]; +		t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6]; +		t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7]; +	} + +	/* scaling */ +	t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE; +	t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE; +	t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE; +	t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE; + +	/* do the cos transform */ +	t1[0]  = (FIXED_A) t2[0] * consts[40 + 0]; +	t1[0] += (FIXED_A) t2[1] * consts[40 + 1]; +	t1[1]  = (FIXED_A) t2[0] * consts[40 + 2]; +	t1[1] += (FIXED_A) t2[1] * consts[40 + 3]; +	t1[2]  = (FIXED_A) t2[0] * consts[40 + 4]; +	t1[2] += (FIXED_A) t2[1] * consts[40 + 5]; +	t1[3]  = (FIXED_A) t2[0] * consts[40 + 6]; +	t1[3] += (FIXED_A) t2[1] * consts[40 + 7]; + +	t1[0] += (FIXED_A) t2[2] * consts[40 + 8]; +	t1[0] += (FIXED_A) t2[3] * consts[40 + 9]; +	t1[1] += (FIXED_A) t2[2] * consts[40 + 10]; +	t1[1] += (FIXED_A) t2[3] * consts[40 + 11]; +	t1[2] += (FIXED_A) t2[2] * consts[40 + 12]; +	t1[2] += (FIXED_A) t2[3] * consts[40 + 13]; +	t1[3] += (FIXED_A) t2[2] * consts[40 + 14]; +	t1[3] += (FIXED_A) t2[3] * consts[40 + 15]; + +	out[0] = t1[0] >> +		(SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); +	out[1] = t1[1] >> +		(SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); +	out[2] = t1[2] >> +		(SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); +	out[3] = t1[3] >> +		(SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); +} + +static inline void sbc_analyze_eight_simd(const int16_t *in, int32_t *out, +					  const FIXED_T *consts) +{ +	FIXED_A t1[8]; +	FIXED_T t2[8]; +	int i, hop; + +	/* rounding coefficient */ +	t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = +		(FIXED_A) 1 << (SBC_PROTO_FIXED8_SCALE-1); + +	/* low pass polyphase filter */ +	for (hop = 0; hop < 80; hop += 16) { +		t1[0] += (FIXED_A) in[hop] * consts[hop]; +		t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; +		t1[1] += (FIXED_A) in[hop + 2] * consts[hop + 2]; +		t1[1] += (FIXED_A) in[hop + 3] * consts[hop + 3]; +		t1[2] += (FIXED_A) in[hop + 4] * consts[hop + 4]; +		t1[2] += (FIXED_A) in[hop + 5] * consts[hop + 5]; +		t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6]; +		t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7]; +		t1[4] += (FIXED_A) in[hop + 8] * consts[hop + 8]; +		t1[4] += (FIXED_A) in[hop + 9] * consts[hop + 9]; +		t1[5] += (FIXED_A) in[hop + 10] * consts[hop + 10]; +		t1[5] += (FIXED_A) in[hop + 11] * consts[hop + 11]; +		t1[6] += (FIXED_A) in[hop + 12] * consts[hop + 12]; +		t1[6] += (FIXED_A) in[hop + 13] * consts[hop + 13]; +		t1[7] += (FIXED_A) in[hop + 14] * consts[hop + 14]; +		t1[7] += (FIXED_A) in[hop + 15] * consts[hop + 15]; +	} + +	/* scaling */ +	t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE; +	t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE; +	t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE; +	t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE; +	t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE; +	t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE; +	t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE; +	t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE; + + +	/* do the cos transform */ +	t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = 0; + +	for (i = 0; i < 4; i++) { +		t1[0] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 0]; +		t1[0] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 1]; +		t1[1] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 2]; +		t1[1] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 3]; +		t1[2] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 4]; +		t1[2] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 5]; +		t1[3] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 6]; +		t1[3] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 7]; +		t1[4] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 8]; +		t1[4] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 9]; +		t1[5] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 10]; +		t1[5] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 11]; +		t1[6] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 12]; +		t1[6] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 13]; +		t1[7] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 14]; +		t1[7] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 15]; +	} + +	for (i = 0; i < 8; i++) +		out[i] = t1[i] >> +			(SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS); +} + +static inline void sbc_analyze_4b_4s_simd(int16_t *pcm, int16_t *x, +					  int32_t *out, int out_stride) +{ +	/* Fetch audio samples and do input data reordering for SIMD */ +	x[64] = x[0]  = pcm[8 + 7]; +	x[65] = x[1]  = pcm[8 + 3]; +	x[66] = x[2]  = pcm[8 + 6]; +	x[67] = x[3]  = pcm[8 + 4]; +	x[68] = x[4]  = pcm[8 + 0]; +	x[69] = x[5]  = pcm[8 + 2]; +	x[70] = x[6]  = pcm[8 + 1]; +	x[71] = x[7]  = pcm[8 + 5]; + +	x[72] = x[8]  = pcm[0 + 7]; +	x[73] = x[9]  = pcm[0 + 3]; +	x[74] = x[10] = pcm[0 + 6]; +	x[75] = x[11] = pcm[0 + 4]; +	x[76] = x[12] = pcm[0 + 0]; +	x[77] = x[13] = pcm[0 + 2]; +	x[78] = x[14] = pcm[0 + 1]; +	x[79] = x[15] = pcm[0 + 5]; + +	/* Analyze blocks */ +	sbc_analyze_four_simd(x + 12, out, analysis_consts_fixed4_simd_odd); +	out += out_stride; +	sbc_analyze_four_simd(x + 8, out, analysis_consts_fixed4_simd_even); +	out += out_stride; +	sbc_analyze_four_simd(x + 4, out, analysis_consts_fixed4_simd_odd); +	out += out_stride; +	sbc_analyze_four_simd(x + 0, out, analysis_consts_fixed4_simd_even); +} + +static inline void sbc_analyze_4b_8s_simd(int16_t *pcm, int16_t *x, +					  int32_t *out, int out_stride) +{ +	/* Fetch audio samples and do input data reordering for SIMD */ +	x[128] = x[0]  = pcm[16 + 15]; +	x[129] = x[1]  = pcm[16 + 7]; +	x[130] = x[2]  = pcm[16 + 14]; +	x[131] = x[3]  = pcm[16 + 8]; +	x[132] = x[4]  = pcm[16 + 13]; +	x[133] = x[5]  = pcm[16 + 9]; +	x[134] = x[6]  = pcm[16 + 12]; +	x[135] = x[7]  = pcm[16 + 10]; +	x[136] = x[8]  = pcm[16 + 11]; +	x[137] = x[9]  = pcm[16 + 3]; +	x[138] = x[10] = pcm[16 + 6]; +	x[139] = x[11] = pcm[16 + 0]; +	x[140] = x[12] = pcm[16 + 5]; +	x[141] = x[13] = pcm[16 + 1]; +	x[142] = x[14] = pcm[16 + 4]; +	x[143] = x[15] = pcm[16 + 2]; + +	x[144] = x[16] = pcm[0 + 15]; +	x[145] = x[17] = pcm[0 + 7]; +	x[146] = x[18] = pcm[0 + 14]; +	x[147] = x[19] = pcm[0 + 8]; +	x[148] = x[20] = pcm[0 + 13]; +	x[149] = x[21] = pcm[0 + 9]; +	x[150] = x[22] = pcm[0 + 12]; +	x[151] = x[23] = pcm[0 + 10]; +	x[152] = x[24] = pcm[0 + 11]; +	x[153] = x[25] = pcm[0 + 3]; +	x[154] = x[26] = pcm[0 + 6]; +	x[155] = x[27] = pcm[0 + 0]; +	x[156] = x[28] = pcm[0 + 5]; +	x[157] = x[29] = pcm[0 + 1]; +	x[158] = x[30] = pcm[0 + 4]; +	x[159] = x[31] = pcm[0 + 2]; + +	/* Analyze blocks */ +	sbc_analyze_eight_simd(x + 24, out, analysis_consts_fixed8_simd_odd); +	out += out_stride; +	sbc_analyze_eight_simd(x + 16, out, analysis_consts_fixed8_simd_even); +	out += out_stride; +	sbc_analyze_eight_simd(x + 8, out, analysis_consts_fixed8_simd_odd); +	out += out_stride; +	sbc_analyze_eight_simd(x + 0, out, analysis_consts_fixed8_simd_even); +} + +/* + * Detect CPU features and setup function pointers + */ +void sbc_init_primitives(struct sbc_encoder_state *state) +{ +	/* Default implementation for analyze functions */ +	state->sbc_analyze_4b_4s = sbc_analyze_4b_4s; +	state->sbc_analyze_4b_8s = sbc_analyze_4b_8s; +} diff --git a/sbc/sbc_primitives.h b/sbc/sbc_primitives.h new file mode 100644 index 00000000..ca1ec277 --- /dev/null +++ b/sbc/sbc_primitives.h @@ -0,0 +1,52 @@ +/* + * + *  Bluetooth low-complexity, subband codec (SBC) library + * + *  Copyright (C) 2004-2009  Marcel Holtmann <marcel@holtmann.org> + *  Copyright (C) 2004-2005  Henryk Ploetz <henryk@ploetzli.ch> + *  Copyright (C) 2005-2006  Brad Midgley <bmidgley@xmission.com> + * + * + *  This library is free software; you can redistribute it and/or + *  modify it under the terms of the GNU Lesser General Public + *  License as published by the Free Software Foundation; either + *  version 2.1 of the License, or (at your option) any later version. + * + *  This library is distributed in the hope that it will be useful, + *  but WITHOUT ANY WARRANTY; without even the implied warranty of + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU + *  Lesser General Public License for more details. + * + *  You should have received a copy of the GNU Lesser General Public + *  License along with this library; if not, write to the Free Software + *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA + * + */ + +#ifndef __SBC_PRIMITIVES_H +#define __SBC_PRIMITIVES_H + +#define SCALE_OUT_BITS 15 + +struct sbc_encoder_state { +	int subbands; +	int position[2]; +	int16_t X[2][256]; +	/* Polyphase analysis filter for 4 subbands configuration, +	   it handles 4 blocks at once */ +	void (*sbc_analyze_4b_4s)(int16_t *pcm, int16_t *x, +				  int32_t *out, int out_stride); +	/* Polyphase analysis filter for 8 subbands configuration, +	   it handles 4 blocks at once */ +	void (*sbc_analyze_4b_8s)(int16_t *pcm, int16_t *x, +				  int32_t *out, int out_stride); +}; + +/* + * Initialize pointers to the functions which are the basic "building bricks" + * of SBC codec. Best implementation is selected based on target CPU + * capabilities. + */ +void sbc_init_primitives(struct sbc_encoder_state *encoder_state); + +#endif diff --git a/sbc/sbc_tables.h b/sbc/sbc_tables.h index f1dfe6c0..a9a995fa 100644 --- a/sbc/sbc_tables.h +++ b/sbc/sbc_tables.h @@ -157,8 +157,9 @@ static const int32_t synmatrix8[16][8] = {   */  #define SBC_PROTO_FIXED4_SCALE \  	((sizeof(FIXED_T) * CHAR_BIT - 1) - SBC_FIXED_EXTRA_BITS + 1) -#define F(x) (FIXED_A) ((x * 2) * \ +#define F_PROTO4(x) (FIXED_A) ((x * 2) * \  	((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) +#define F(x) F_PROTO4(x)  static const FIXED_T _sbc_proto_fixed4[40] = {  	 F(0.00000000E+00),  F(5.36548976E-04),  	-F(1.49188357E-03),  F(2.73370904E-03), @@ -206,8 +207,9 @@ static const FIXED_T _sbc_proto_fixed4[40] = {   */  #define SBC_COS_TABLE_FIXED4_SCALE \  	((sizeof(FIXED_T) * CHAR_BIT - 1) + SBC_FIXED_EXTRA_BITS) -#define F(x) (FIXED_A) ((x) * \ +#define F_COS4(x) (FIXED_A) ((x) * \  	((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) +#define F(x) F_COS4(x)  static const FIXED_T cos_table_fixed_4[32] = {  	 F(0.7071067812),  F(0.9238795325), -F(1.0000000000),  F(0.9238795325),  	 F(0.7071067812),  F(0.3826834324),  F(0.0000000000),  F(0.3826834324), @@ -233,8 +235,9 @@ static const FIXED_T cos_table_fixed_4[32] = {   */  #define SBC_PROTO_FIXED8_SCALE \  	((sizeof(FIXED_T) * CHAR_BIT - 1) - SBC_FIXED_EXTRA_BITS + 2) -#define F(x) (FIXED_A) ((x * 4) * \ +#define F_PROTO8(x) (FIXED_A) ((x * 4) * \  	((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) +#define F(x) F_PROTO8(x)  static const FIXED_T _sbc_proto_fixed8[80] = {  	 F(0.00000000E+00),  F(1.56575398E-04),  	 F(3.43256425E-04),  F(5.54620202E-04), @@ -301,8 +304,9 @@ static const FIXED_T _sbc_proto_fixed8[80] = {   */  #define SBC_COS_TABLE_FIXED8_SCALE \  	((sizeof(FIXED_T) * CHAR_BIT - 1) + SBC_FIXED_EXTRA_BITS) -#define F(x) (FIXED_A) ((x) * \ +#define F_COS8(x) (FIXED_A) ((x) * \  	((FIXED_A) 1 << (sizeof(FIXED_T) * CHAR_BIT - 1)) + 0.5) +#define F(x) F_COS8(x)  static const FIXED_T cos_table_fixed_8[128] = {  	 F(0.7071067812),  F(0.8314696123),  F(0.9238795325),  F(0.9807852804),  	-F(1.0000000000),  F(0.9807852804),  F(0.9238795325),  F(0.8314696123), @@ -345,3 +349,241 @@ static const FIXED_T cos_table_fixed_8[128] = {  	-F(0.0000000000), -F(0.1950903220),  F(0.3826834324), -F(0.5555702330),  };  #undef F + +/* + * Constant tables for the use in SIMD optimized analysis filters + * Each table consists of two parts: + * 1. reordered "proto" table + * 2. reordered "cos" table + * + * Due to non-symmetrical reordering, separate tables for "even" + * and "odd" cases are needed + */ + +static const FIXED_T analysis_consts_fixed4_simd_even[40 + 16] = { +#define F(x) F_PROTO4(x) +	 F(0.00000000E+00),  F(3.83720193E-03), +	 F(5.36548976E-04),  F(2.73370904E-03), +	 F(3.06012286E-03),  F(3.89205149E-03), +	 F(0.00000000E+00), -F(1.49188357E-03), +	 F(1.09137620E-02),  F(2.58767811E-02), +	 F(2.04385087E-02),  F(3.21939290E-02), +	 F(7.76463494E-02),  F(6.13245186E-03), +	 F(0.00000000E+00), -F(2.88757392E-02), +	 F(1.35593274E-01),  F(2.94315332E-01), +	 F(1.94987841E-01),  F(2.81828203E-01), +	-F(1.94987841E-01),  F(2.81828203E-01), +	 F(0.00000000E+00), -F(2.46636662E-01), +	-F(1.35593274E-01),  F(2.58767811E-02), +	-F(7.76463494E-02),  F(6.13245186E-03), +	-F(2.04385087E-02),  F(3.21939290E-02), +	 F(0.00000000E+00),  F(2.88217274E-02), +	-F(1.09137620E-02),  F(3.83720193E-03), +	-F(3.06012286E-03),  F(3.89205149E-03), +	-F(5.36548976E-04),  F(2.73370904E-03), +	 F(0.00000000E+00), -F(1.86581691E-03), +#undef F +#define F(x) F_COS4(x) +	 F(0.7071067812),  F(0.9238795325), +	-F(0.7071067812),  F(0.3826834324), +	-F(0.7071067812), -F(0.3826834324), +	 F(0.7071067812), -F(0.9238795325), +	 F(0.3826834324), -F(1.0000000000), +	-F(0.9238795325), -F(1.0000000000), +	 F(0.9238795325), -F(1.0000000000), +	-F(0.3826834324), -F(1.0000000000), +#undef F +}; + +static const FIXED_T analysis_consts_fixed4_simd_odd[40 + 16] = { +#define F(x) F_PROTO4(x) +	 F(2.73370904E-03),  F(5.36548976E-04), +	-F(1.49188357E-03),  F(0.00000000E+00), +	 F(3.83720193E-03),  F(1.09137620E-02), +	 F(3.89205149E-03),  F(3.06012286E-03), +	 F(3.21939290E-02),  F(2.04385087E-02), +	-F(2.88757392E-02),  F(0.00000000E+00), +	 F(2.58767811E-02),  F(1.35593274E-01), +	 F(6.13245186E-03),  F(7.76463494E-02), +	 F(2.81828203E-01),  F(1.94987841E-01), +	-F(2.46636662E-01),  F(0.00000000E+00), +	 F(2.94315332E-01), -F(1.35593274E-01), +	 F(2.81828203E-01), -F(1.94987841E-01), +	 F(6.13245186E-03), -F(7.76463494E-02), +	 F(2.88217274E-02),  F(0.00000000E+00), +	 F(2.58767811E-02), -F(1.09137620E-02), +	 F(3.21939290E-02), -F(2.04385087E-02), +	 F(3.89205149E-03), -F(3.06012286E-03), +	-F(1.86581691E-03),  F(0.00000000E+00), +	 F(3.83720193E-03),  F(0.00000000E+00), +	 F(2.73370904E-03), -F(5.36548976E-04), +#undef F +#define F(x) F_COS4(x) +	 F(0.9238795325), -F(1.0000000000), +	 F(0.3826834324), -F(1.0000000000), +	-F(0.3826834324), -F(1.0000000000), +	-F(0.9238795325), -F(1.0000000000), +	 F(0.7071067812),  F(0.3826834324), +	-F(0.7071067812), -F(0.9238795325), +	-F(0.7071067812),  F(0.9238795325), +	 F(0.7071067812), -F(0.3826834324), +#undef F +}; + +static const FIXED_T analysis_consts_fixed8_simd_even[80 + 64] = { +#define F(x) F_PROTO8(x) +	 F(0.00000000E+00),  F(2.01182542E-03), +	 F(1.56575398E-04),  F(1.78371725E-03), +	 F(3.43256425E-04),  F(1.47640169E-03), +	 F(5.54620202E-04),  F(1.13992507E-03), +	-F(8.23919506E-04),  F(0.00000000E+00), +	 F(2.10371989E-03),  F(3.49717454E-03), +	 F(1.99454554E-03),  F(1.64973098E-03), +	 F(1.61656283E-03),  F(1.78805361E-04), +	 F(5.65949473E-03),  F(1.29371806E-02), +	 F(8.02941163E-03),  F(1.53184106E-02), +	 F(1.04584443E-02),  F(1.62208471E-02), +	 F(1.27472335E-02),  F(1.59045603E-02), +	-F(1.46525263E-02),  F(0.00000000E+00), +	 F(8.85757540E-03),  F(5.31873032E-02), +	 F(2.92408442E-03),  F(3.90751381E-02), +	-F(4.91578024E-03),  F(2.61098752E-02), +	 F(6.79989431E-02),  F(1.46955068E-01), +	 F(8.29847578E-02),  F(1.45389847E-01), +	 F(9.75753918E-02),  F(1.40753505E-01), +	 F(1.11196689E-01),  F(1.33264415E-01), +	-F(1.23264548E-01),  F(0.00000000E+00), +	 F(1.45389847E-01), -F(8.29847578E-02), +	 F(1.40753505E-01), -F(9.75753918E-02), +	 F(1.33264415E-01), -F(1.11196689E-01), +	-F(6.79989431E-02),  F(1.29371806E-02), +	-F(5.31873032E-02),  F(8.85757540E-03), +	-F(3.90751381E-02),  F(2.92408442E-03), +	-F(2.61098752E-02), -F(4.91578024E-03), +	 F(1.46404076E-02),  F(0.00000000E+00), +	 F(1.53184106E-02), -F(8.02941163E-03), +	 F(1.62208471E-02), -F(1.04584443E-02), +	 F(1.59045603E-02), -F(1.27472335E-02), +	-F(5.65949473E-03),  F(2.01182542E-03), +	-F(3.49717454E-03),  F(2.10371989E-03), +	-F(1.64973098E-03),  F(1.99454554E-03), +	-F(1.78805361E-04),  F(1.61656283E-03), +	-F(9.02154502E-04),  F(0.00000000E+00), +	 F(1.78371725E-03), -F(1.56575398E-04), +	 F(1.47640169E-03), -F(3.43256425E-04), +	 F(1.13992507E-03), -F(5.54620202E-04), +#undef F +#define F(x) F_COS8(x) +	 F(0.7071067812),  F(0.8314696123), +	-F(0.7071067812), -F(0.1950903220), +	-F(0.7071067812), -F(0.9807852804), +	 F(0.7071067812), -F(0.5555702330), +	 F(0.7071067812),  F(0.5555702330), +	-F(0.7071067812),  F(0.9807852804), +	-F(0.7071067812),  F(0.1950903220), +	 F(0.7071067812), -F(0.8314696123), +	 F(0.9238795325),  F(0.9807852804), +	 F(0.3826834324),  F(0.8314696123), +	-F(0.3826834324),  F(0.5555702330), +	-F(0.9238795325),  F(0.1950903220), +	-F(0.9238795325), -F(0.1950903220), +	-F(0.3826834324), -F(0.5555702330), +	 F(0.3826834324), -F(0.8314696123), +	 F(0.9238795325), -F(0.9807852804), +	-F(1.0000000000),  F(0.5555702330), +	-F(1.0000000000), -F(0.9807852804), +	-F(1.0000000000),  F(0.1950903220), +	-F(1.0000000000),  F(0.8314696123), +	-F(1.0000000000), -F(0.8314696123), +	-F(1.0000000000), -F(0.1950903220), +	-F(1.0000000000),  F(0.9807852804), +	-F(1.0000000000), -F(0.5555702330), +	 F(0.3826834324),  F(0.1950903220), +	-F(0.9238795325), -F(0.5555702330), +	 F(0.9238795325),  F(0.8314696123), +	-F(0.3826834324), -F(0.9807852804), +	-F(0.3826834324),  F(0.9807852804), +	 F(0.9238795325), -F(0.8314696123), +	-F(0.9238795325),  F(0.5555702330), +	 F(0.3826834324), -F(0.1950903220), +#undef F +}; + +static const FIXED_T analysis_consts_fixed8_simd_odd[80 + 64] = { +#define F(x) F_PROTO8(x) +	 F(0.00000000E+00), -F(8.23919506E-04), +	 F(1.56575398E-04),  F(1.78371725E-03), +	 F(3.43256425E-04),  F(1.47640169E-03), +	 F(5.54620202E-04),  F(1.13992507E-03), +	 F(2.01182542E-03),  F(5.65949473E-03), +	 F(2.10371989E-03),  F(3.49717454E-03), +	 F(1.99454554E-03),  F(1.64973098E-03), +	 F(1.61656283E-03),  F(1.78805361E-04), +	 F(0.00000000E+00), -F(1.46525263E-02), +	 F(8.02941163E-03),  F(1.53184106E-02), +	 F(1.04584443E-02),  F(1.62208471E-02), +	 F(1.27472335E-02),  F(1.59045603E-02), +	 F(1.29371806E-02),  F(6.79989431E-02), +	 F(8.85757540E-03),  F(5.31873032E-02), +	 F(2.92408442E-03),  F(3.90751381E-02), +	-F(4.91578024E-03),  F(2.61098752E-02), +	 F(0.00000000E+00), -F(1.23264548E-01), +	 F(8.29847578E-02),  F(1.45389847E-01), +	 F(9.75753918E-02),  F(1.40753505E-01), +	 F(1.11196689E-01),  F(1.33264415E-01), +	 F(1.46955068E-01), -F(6.79989431E-02), +	 F(1.45389847E-01), -F(8.29847578E-02), +	 F(1.40753505E-01), -F(9.75753918E-02), +	 F(1.33264415E-01), -F(1.11196689E-01), +	 F(0.00000000E+00),  F(1.46404076E-02), +	-F(5.31873032E-02),  F(8.85757540E-03), +	-F(3.90751381E-02),  F(2.92408442E-03), +	-F(2.61098752E-02), -F(4.91578024E-03), +	 F(1.29371806E-02), -F(5.65949473E-03), +	 F(1.53184106E-02), -F(8.02941163E-03), +	 F(1.62208471E-02), -F(1.04584443E-02), +	 F(1.59045603E-02), -F(1.27472335E-02), +	 F(0.00000000E+00), -F(9.02154502E-04), +	-F(3.49717454E-03),  F(2.10371989E-03), +	-F(1.64973098E-03),  F(1.99454554E-03), +	-F(1.78805361E-04),  F(1.61656283E-03), +	 F(2.01182542E-03),  F(0.00000000E+00), +	 F(1.78371725E-03), -F(1.56575398E-04), +	 F(1.47640169E-03), -F(3.43256425E-04), +	 F(1.13992507E-03), -F(5.54620202E-04), +#undef F +#define F(x) F_COS8(x) +	-F(1.0000000000),  F(0.8314696123), +	-F(1.0000000000), -F(0.1950903220), +	-F(1.0000000000), -F(0.9807852804), +	-F(1.0000000000), -F(0.5555702330), +	-F(1.0000000000),  F(0.5555702330), +	-F(1.0000000000),  F(0.9807852804), +	-F(1.0000000000),  F(0.1950903220), +	-F(1.0000000000), -F(0.8314696123), +	 F(0.9238795325),  F(0.9807852804), +	 F(0.3826834324),  F(0.8314696123), +	-F(0.3826834324),  F(0.5555702330), +	-F(0.9238795325),  F(0.1950903220), +	-F(0.9238795325), -F(0.1950903220), +	-F(0.3826834324), -F(0.5555702330), +	 F(0.3826834324), -F(0.8314696123), +	 F(0.9238795325), -F(0.9807852804), +	 F(0.7071067812),  F(0.5555702330), +	-F(0.7071067812), -F(0.9807852804), +	-F(0.7071067812),  F(0.1950903220), +	 F(0.7071067812),  F(0.8314696123), +	 F(0.7071067812), -F(0.8314696123), +	-F(0.7071067812), -F(0.1950903220), +	-F(0.7071067812),  F(0.9807852804), +	 F(0.7071067812), -F(0.5555702330), +	 F(0.3826834324),  F(0.1950903220), +	-F(0.9238795325), -F(0.5555702330), +	 F(0.9238795325),  F(0.8314696123), +	-F(0.3826834324), -F(0.9807852804), +	-F(0.3826834324),  F(0.9807852804), +	 F(0.9238795325), -F(0.8314696123), +	-F(0.9238795325),  F(0.5555702330), +	 F(0.3826834324), -F(0.1950903220), +#undef F +}; | 
