From 963250abb99ab43b209281c2aa5398205492e555 Mon Sep 17 00:00:00 2001 From: Arun Raghavan Date: Tue, 21 Sep 2010 20:42:32 +0530 Subject: echo-cancel: Add SSE optimisation to the adrian module Optimises the core inner-product function, which takes the most CPU. The SSE-optimised bits of the adrian echo canceller only if the CPU that PA is running on actually supports SSE. --- src/modules/echo-cancel/adrian-aec.c | 37 ++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) (limited to 'src/modules/echo-cancel/adrian-aec.c') diff --git a/src/modules/echo-cancel/adrian-aec.c b/src/modules/echo-cancel/adrian-aec.c index 69107c75..39c2d638 100644 --- a/src/modules/echo-cancel/adrian-aec.c +++ b/src/modules/echo-cancel/adrian-aec.c @@ -17,6 +17,10 @@ #include "adrian-aec.h" +#ifdef __SSE__ +#include +#endif + /* Vector Dot Product */ static REAL dotp(REAL a[], REAL b[]) { @@ -31,8 +35,32 @@ static REAL dotp(REAL a[], REAL b[]) return sum0 + sum1; } +static REAL dotp_sse(REAL a[], REAL b[]) __attribute__((noinline)); +static REAL dotp_sse(REAL a[], REAL b[]) +{ +#ifdef __SSE__ + /* This is taken from speex's inner product implementation */ + int j; + REAL sum; + __m128 acc = _mm_setzero_ps(); + + for (j=0;jhangover = 0; @@ -57,6 +85,11 @@ AEC* AEC_init(int RATE) a->dumpcnt = 0; memset(a->ws, 0, sizeof(a->ws)); + if (have_vector) + a->dotp = dotp_sse; + else + a->dotp = dotp; + return a; } @@ -146,7 +179,7 @@ static REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize) // (mic signal - estimated mic signal from spk signal) e = d; if (a->hangover > 0) { - e -= dotp(a->w, a->x + a->j); + e -= a->dotp(a->w, a->x + a->j); } ef = IIR1_highpass(a->Fe, e); // pre-whitening of e -- cgit