summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSiarhei Siamashka <siarhei.siamashka@nokia.com>2009-01-15 19:45:36 +0200
committerMarcel Holtmann <marcel@holtmann.org>2009-01-16 08:23:19 +0100
commit82d00972c91a44a428bd08412ca3039e101c0e40 (patch)
tree8cc953b4ec1148d73e14fb93d71951457792abbd
parent9e31e7dde636ca28ee551e8bcf8e4f4ca0ef553d (diff)
SBC arrays and constant tables aligned at 16 byte boundary for SIMD
Most SIMD instruction sets benefit from data being naturally aligned. And even if it is not strictly required, performance is usually better with the aligned data. ARM NEON and SSE2 have different instruction variants for aligned/unaligned memory accesses.
-rw-r--r--sbc/sbc.c26
-rw-r--r--sbc/sbc.h1
-rw-r--r--sbc/sbc_primitives.h2
-rw-r--r--sbc/sbc_tables.h22
4 files changed, 36 insertions, 15 deletions
diff --git a/sbc/sbc.c b/sbc/sbc.c
index 534c9359..0699ae00 100644
--- a/sbc/sbc.c
+++ b/sbc/sbc.c
@@ -80,10 +80,13 @@ struct sbc_frame {
uint8_t scale_factor[2][8];
/* raw integer subband samples in the frame */
+ int32_t SBC_ALIGNED sb_sample_f[16][2][8];
- int32_t sb_sample_f[16][2][8];
- int32_t sb_sample[16][2][8]; /* modified subband samples */
- int16_t pcm_sample[2][16*8]; /* original pcm audio samples */
+ /* modified subband samples */
+ int32_t SBC_ALIGNED sb_sample[16][2][8];
+
+ /* original pcm audio samples */
+ int16_t SBC_ALIGNED pcm_sample[2][16*8];
};
struct sbc_decoder_state {
@@ -912,9 +915,9 @@ static void sbc_encoder_init(struct sbc_encoder_state *state,
struct sbc_priv {
int init;
- struct sbc_frame frame;
- struct sbc_decoder_state dec_state;
- struct sbc_encoder_state enc_state;
+ struct SBC_ALIGNED sbc_frame frame;
+ struct SBC_ALIGNED sbc_decoder_state dec_state;
+ struct SBC_ALIGNED sbc_encoder_state enc_state;
};
static void sbc_set_defaults(sbc_t *sbc, unsigned long flags)
@@ -940,10 +943,13 @@ int sbc_init(sbc_t *sbc, unsigned long flags)
memset(sbc, 0, sizeof(sbc_t));
- sbc->priv = malloc(sizeof(struct sbc_priv));
- if (!sbc->priv)
+ sbc->priv_alloc_base = malloc(sizeof(struct sbc_priv) + SBC_ALIGN_MASK);
+ if (!sbc->priv_alloc_base)
return -ENOMEM;
+ sbc->priv = (void *) (((uintptr_t) sbc->priv_alloc_base +
+ SBC_ALIGN_MASK) & ~((uintptr_t) SBC_ALIGN_MASK));
+
memset(sbc->priv, 0, sizeof(struct sbc_priv));
sbc_set_defaults(sbc, flags);
@@ -1091,8 +1097,8 @@ void sbc_finish(sbc_t *sbc)
if (!sbc)
return;
- if (sbc->priv)
- free(sbc->priv);
+ if (sbc->priv_alloc_base)
+ free(sbc->priv_alloc_base);
memset(sbc, 0, sizeof(sbc_t));
}
diff --git a/sbc/sbc.h b/sbc/sbc.h
index 8ac59309..b0a14888 100644
--- a/sbc/sbc.h
+++ b/sbc/sbc.h
@@ -74,6 +74,7 @@ struct sbc_struct {
uint8_t endian;
void *priv;
+ void *priv_alloc_base;
};
typedef struct sbc_struct sbc_t;
diff --git a/sbc/sbc_primitives.h b/sbc/sbc_primitives.h
index ca1ec277..a8b3df6e 100644
--- a/sbc/sbc_primitives.h
+++ b/sbc/sbc_primitives.h
@@ -31,7 +31,7 @@
struct sbc_encoder_state {
int subbands;
int position[2];
- int16_t X[2][256];
+ int16_t SBC_ALIGNED X[2][256];
/* Polyphase analysis filter for 4 subbands configuration,
it handles 4 blocks at once */
void (*sbc_analyze_4b_4s)(int16_t *pcm, int16_t *x,
diff --git a/sbc/sbc_tables.h b/sbc/sbc_tables.h
index a9a995fa..7c2af076 100644
--- a/sbc/sbc_tables.h
+++ b/sbc/sbc_tables.h
@@ -351,6 +351,20 @@ static const FIXED_T cos_table_fixed_8[128] = {
#undef F
/*
+ * Enforce 16 byte alignment for the data, which is supposed to be used
+ * with SIMD optimized code.
+ */
+
+#define SBC_ALIGN_BITS 4
+#define SBC_ALIGN_MASK ((1 << (SBC_ALIGN_BITS)) - 1)
+
+#ifdef __GNUC__
+#define SBC_ALIGNED __attribute__((aligned(1 << (SBC_ALIGN_BITS))))
+#else
+#define SBC_ALIGNED
+#endif
+
+/*
* Constant tables for the use in SIMD optimized analysis filters
* Each table consists of two parts:
* 1. reordered "proto" table
@@ -360,7 +374,7 @@ static const FIXED_T cos_table_fixed_8[128] = {
* and "odd" cases are needed
*/
-static const FIXED_T analysis_consts_fixed4_simd_even[40 + 16] = {
+static const FIXED_T SBC_ALIGNED analysis_consts_fixed4_simd_even[40 + 16] = {
#define F(x) F_PROTO4(x)
F(0.00000000E+00), F(3.83720193E-03),
F(5.36548976E-04), F(2.73370904E-03),
@@ -395,7 +409,7 @@ static const FIXED_T analysis_consts_fixed4_simd_even[40 + 16] = {
#undef F
};
-static const FIXED_T analysis_consts_fixed4_simd_odd[40 + 16] = {
+static const FIXED_T SBC_ALIGNED analysis_consts_fixed4_simd_odd[40 + 16] = {
#define F(x) F_PROTO4(x)
F(2.73370904E-03), F(5.36548976E-04),
-F(1.49188357E-03), F(0.00000000E+00),
@@ -430,7 +444,7 @@ static const FIXED_T analysis_consts_fixed4_simd_odd[40 + 16] = {
#undef F
};
-static const FIXED_T analysis_consts_fixed8_simd_even[80 + 64] = {
+static const FIXED_T SBC_ALIGNED analysis_consts_fixed8_simd_even[80 + 64] = {
#define F(x) F_PROTO8(x)
F(0.00000000E+00), F(2.01182542E-03),
F(1.56575398E-04), F(1.78371725E-03),
@@ -509,7 +523,7 @@ static const FIXED_T analysis_consts_fixed8_simd_even[80 + 64] = {
#undef F
};
-static const FIXED_T analysis_consts_fixed8_simd_odd[80 + 64] = {
+static const FIXED_T SBC_ALIGNED analysis_consts_fixed8_simd_odd[80 + 64] = {
#define F(x) F_PROTO8(x)
F(0.00000000E+00), -F(8.23919506E-04),
F(1.56575398E-04), F(1.78371725E-03),