summaryrefslogtreecommitdiffstats
path: root/src/modules/echo-cancel
diff options
context:
space:
mode:
authorArun Raghavan <arun.raghavan@collabora.co.uk>2010-09-21 20:42:32 +0530
committerArun Raghavan <arun.raghavan@collabora.co.uk>2010-09-23 17:17:03 +0530
commit963250abb99ab43b209281c2aa5398205492e555 (patch)
tree9f745ac8c630dd294c1a9a3c8255098526122117 /src/modules/echo-cancel
parentab4223e9cffbc21399c0468dd89a2e57122fbfee (diff)
echo-cancel: Add SSE optimisation to the adrian module
Optimises the core inner-product function, which takes the most CPU. The SSE-optimised bits of the adrian echo canceller only if the CPU that PA is running on actually supports SSE.
Diffstat (limited to 'src/modules/echo-cancel')
-rw-r--r--src/modules/echo-cancel/adrian-aec.c37
-rw-r--r--src/modules/echo-cancel/adrian-aec.h12
-rw-r--r--src/modules/echo-cancel/adrian.c10
-rw-r--r--src/modules/echo-cancel/adrian.h2
-rw-r--r--src/modules/echo-cancel/echo-cancel.h8
-rw-r--r--src/modules/echo-cancel/module-echo-cancel.c2
-rw-r--r--src/modules/echo-cancel/speex.c2
7 files changed, 61 insertions, 12 deletions
diff --git a/src/modules/echo-cancel/adrian-aec.c b/src/modules/echo-cancel/adrian-aec.c
index 69107c75..39c2d638 100644
--- a/src/modules/echo-cancel/adrian-aec.c
+++ b/src/modules/echo-cancel/adrian-aec.c
@@ -17,6 +17,10 @@
#include "adrian-aec.h"
+#ifdef __SSE__
+#include <xmmintrin.h>
+#endif
+
/* Vector Dot Product */
static REAL dotp(REAL a[], REAL b[])
{
@@ -31,8 +35,32 @@ static REAL dotp(REAL a[], REAL b[])
return sum0 + sum1;
}
+static REAL dotp_sse(REAL a[], REAL b[]) __attribute__((noinline));
+static REAL dotp_sse(REAL a[], REAL b[])
+{
+#ifdef __SSE__
+ /* This is taken from speex's inner product implementation */
+ int j;
+ REAL sum;
+ __m128 acc = _mm_setzero_ps();
+
+ for (j=0;j<NLMS_LEN;j+=8)
+ {
+ acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j), _mm_loadu_ps(b+j)));
+ acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j+4), _mm_loadu_ps(b+j+4)));
+ }
+ acc = _mm_add_ps(acc, _mm_movehl_ps(acc, acc));
+ acc = _mm_add_ss(acc, _mm_shuffle_ps(acc, acc, 0x55));
+ _mm_store_ss(&sum, acc);
-AEC* AEC_init(int RATE)
+ return sum;
+#else
+ return dotp(a, b);
+#endif
+}
+
+
+AEC* AEC_init(int RATE, int have_vector)
{
AEC *a = pa_xnew(AEC, 1);
a->hangover = 0;
@@ -57,6 +85,11 @@ AEC* AEC_init(int RATE)
a->dumpcnt = 0;
memset(a->ws, 0, sizeof(a->ws));
+ if (have_vector)
+ a->dotp = dotp_sse;
+ else
+ a->dotp = dotp;
+
return a;
}
@@ -146,7 +179,7 @@ static REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize)
// (mic signal - estimated mic signal from spk signal)
e = d;
if (a->hangover > 0) {
- e -= dotp(a->w, a->x + a->j);
+ e -= a->dotp(a->w, a->x + a->j);
}
ef = IIR1_highpass(a->Fe, e); // pre-whitening of e
diff --git a/src/modules/echo-cancel/adrian-aec.h b/src/modules/echo-cancel/adrian-aec.h
index 1f5b090a..df9f3e64 100644
--- a/src/modules/echo-cancel/adrian-aec.h
+++ b/src/modules/echo-cancel/adrian-aec.h
@@ -13,6 +13,13 @@
#ifndef _AEC_H /* include only once */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <pulsecore/macro.h>
+#include <pulse/xmalloc.h>
+
#define WIDEB 2
// use double if your CPU does software-emulation of float
@@ -315,6 +322,9 @@ struct AEC {
// variables are public for visualization
int hangover;
float stepsize;
+
+ // vfuncs that are picked based on processor features available
+ REAL (*dotp) (REAL[], REAL[]);
};
/* Double-Talk Detector
@@ -338,7 +348,7 @@ static void AEC_leaky(AEC *a);
*/
static REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize);
- AEC* AEC_init(int RATE);
+ AEC* AEC_init(int RATE, int have_vector);
/* Acoustic Echo Cancellation and Suppression of one sample
* in d: microphone signal with echo
diff --git a/src/modules/echo-cancel/adrian.c b/src/modules/echo-cancel/adrian.c
index 86db1e2c..c8acaa8b 100644
--- a/src/modules/echo-cancel/adrian.c
+++ b/src/modules/echo-cancel/adrian.c
@@ -51,12 +51,12 @@ static void pa_adrian_ec_fixate_spec(pa_sample_spec *source_ss, pa_channel_map *
*sink_map = *source_map;
}
-pa_bool_t pa_adrian_ec_init(pa_echo_canceller *ec,
+pa_bool_t pa_adrian_ec_init(pa_core *c, pa_echo_canceller *ec,
pa_sample_spec *source_ss, pa_channel_map *source_map,
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
uint32_t *blocksize, const char *args)
{
- int framelen, rate;
+ int framelen, rate, have_vector = 0;
uint32_t frame_size_ms;
pa_modargs *ma;
@@ -80,7 +80,11 @@ pa_bool_t pa_adrian_ec_init(pa_echo_canceller *ec,
pa_log_debug ("Using framelen %d, blocksize %u, channels %d, rate %d", framelen, ec->params.priv.adrian.blocksize, source_ss->channels, source_ss->rate);
- ec->params.priv.adrian.aec = AEC_init(rate);
+ /* For now we only support SSE */
+ if (c->cpu_info.cpu_type == PA_CPU_X86 && (c->cpu_info.flags.x86 & PA_CPU_X86_SSE))
+ have_vector = 1;
+
+ ec->params.priv.adrian.aec = AEC_init(rate, have_vector);
if (!ec->params.priv.adrian.aec)
goto fail;
diff --git a/src/modules/echo-cancel/adrian.h b/src/modules/echo-cancel/adrian.h
index d02e934d..639fa9ec 100644
--- a/src/modules/echo-cancel/adrian.h
+++ b/src/modules/echo-cancel/adrian.h
@@ -27,5 +27,5 @@
typedef struct AEC AEC;
-AEC* AEC_init(int RATE);
+AEC* AEC_init(int RATE, int have_vector);
int AEC_doAEC(AEC *a, int d_, int x_);
diff --git a/src/modules/echo-cancel/echo-cancel.h b/src/modules/echo-cancel/echo-cancel.h
index 448ad994..5f6adbc1 100644
--- a/src/modules/echo-cancel/echo-cancel.h
+++ b/src/modules/echo-cancel/echo-cancel.h
@@ -25,6 +25,7 @@
#include <pulse/sample.h>
#include <pulse/channelmap.h>
+#include <pulsecore/core.h>
#include <pulsecore/macro.h>
#include <speex/speex_echo.h>
@@ -50,7 +51,8 @@ struct pa_echo_canceller_params {
typedef struct pa_echo_canceller pa_echo_canceller;
struct pa_echo_canceller {
- pa_bool_t (*init) (pa_echo_canceller *ec,
+ pa_bool_t (*init) (pa_core *c,
+ pa_echo_canceller *ec,
pa_sample_spec *source_ss,
pa_channel_map *source_map,
pa_sample_spec *sink_ss,
@@ -64,7 +66,7 @@ struct pa_echo_canceller {
};
/* Speex canceller functions */
-pa_bool_t pa_speex_ec_init(pa_echo_canceller *ec,
+pa_bool_t pa_speex_ec_init(pa_core *c, pa_echo_canceller *ec,
pa_sample_spec *source_ss, pa_channel_map *source_map,
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
uint32_t *blocksize, const char *args);
@@ -72,7 +74,7 @@ void pa_speex_ec_run(pa_echo_canceller *ec, const uint8_t *rec, const uint8_t *p
void pa_speex_ec_done(pa_echo_canceller *ec);
/* Adrian Andre's echo canceller */
-pa_bool_t pa_adrian_ec_init(pa_echo_canceller *ec,
+pa_bool_t pa_adrian_ec_init(pa_core *c, pa_echo_canceller *ec,
pa_sample_spec *source_ss, pa_channel_map *source_map,
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
uint32_t *blocksize, const char *args);
diff --git a/src/modules/echo-cancel/module-echo-cancel.c b/src/modules/echo-cancel/module-echo-cancel.c
index 8ae45a5c..b6c82a5b 100644
--- a/src/modules/echo-cancel/module-echo-cancel.c
+++ b/src/modules/echo-cancel/module-echo-cancel.c
@@ -1398,7 +1398,7 @@ int pa__init(pa_module*m) {
u->asyncmsgq = pa_asyncmsgq_new(0);
u->need_realign = TRUE;
if (u->ec->init) {
- if (!u->ec->init(u->ec, &source_ss, &source_map, &sink_ss, &sink_map, &u->blocksize, pa_modargs_get_value(ma, "aec_args", NULL))) {
+ if (!u->ec->init(u->core, u->ec, &source_ss, &source_map, &sink_ss, &sink_map, &u->blocksize, pa_modargs_get_value(ma, "aec_args", NULL))) {
pa_log("Failed to init AEC engine");
goto fail;
}
diff --git a/src/modules/echo-cancel/speex.c b/src/modules/echo-cancel/speex.c
index 17a89d23..4351d238 100644
--- a/src/modules/echo-cancel/speex.c
+++ b/src/modules/echo-cancel/speex.c
@@ -48,7 +48,7 @@ static void pa_speex_ec_fixate_spec(pa_sample_spec *source_ss, pa_channel_map *s
*sink_map = *source_map;
}
-pa_bool_t pa_speex_ec_init(pa_echo_canceller *ec,
+pa_bool_t pa_speex_ec_init(pa_core *c, pa_echo_canceller *ec,
pa_sample_spec *source_ss, pa_channel_map *source_map,
pa_sample_spec *sink_ss, pa_channel_map *sink_map,
uint32_t *blocksize, const char *args)