summaryrefslogtreecommitdiffstats
path: root/src/modules/echo-cancel/adrian-aec.c
diff options
context:
space:
mode:
authorArun Raghavan <arun.raghavan@collabora.co.uk>2010-09-21 20:42:32 +0530
committerArun Raghavan <arun.raghavan@collabora.co.uk>2010-09-23 17:17:03 +0530
commit963250abb99ab43b209281c2aa5398205492e555 (patch)
tree9f745ac8c630dd294c1a9a3c8255098526122117 /src/modules/echo-cancel/adrian-aec.c
parentab4223e9cffbc21399c0468dd89a2e57122fbfee (diff)
echo-cancel: Add SSE optimisation to the adrian module
Optimises the core inner-product function, which takes the most CPU. The SSE-optimised bits of the adrian echo canceller only if the CPU that PA is running on actually supports SSE.
Diffstat (limited to 'src/modules/echo-cancel/adrian-aec.c')
-rw-r--r--src/modules/echo-cancel/adrian-aec.c37
1 files changed, 35 insertions, 2 deletions
diff --git a/src/modules/echo-cancel/adrian-aec.c b/src/modules/echo-cancel/adrian-aec.c
index 69107c75..39c2d638 100644
--- a/src/modules/echo-cancel/adrian-aec.c
+++ b/src/modules/echo-cancel/adrian-aec.c
@@ -17,6 +17,10 @@
#include "adrian-aec.h"
+#ifdef __SSE__
+#include <xmmintrin.h>
+#endif
+
/* Vector Dot Product */
static REAL dotp(REAL a[], REAL b[])
{
@@ -31,8 +35,32 @@ static REAL dotp(REAL a[], REAL b[])
return sum0 + sum1;
}
+static REAL dotp_sse(REAL a[], REAL b[]) __attribute__((noinline));
+static REAL dotp_sse(REAL a[], REAL b[])
+{
+#ifdef __SSE__
+ /* This is taken from speex's inner product implementation */
+ int j;
+ REAL sum;
+ __m128 acc = _mm_setzero_ps();
+
+ for (j=0;j<NLMS_LEN;j+=8)
+ {
+ acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j), _mm_loadu_ps(b+j)));
+ acc = _mm_add_ps(acc, _mm_mul_ps(_mm_load_ps(a+j+4), _mm_loadu_ps(b+j+4)));
+ }
+ acc = _mm_add_ps(acc, _mm_movehl_ps(acc, acc));
+ acc = _mm_add_ss(acc, _mm_shuffle_ps(acc, acc, 0x55));
+ _mm_store_ss(&sum, acc);
-AEC* AEC_init(int RATE)
+ return sum;
+#else
+ return dotp(a, b);
+#endif
+}
+
+
+AEC* AEC_init(int RATE, int have_vector)
{
AEC *a = pa_xnew(AEC, 1);
a->hangover = 0;
@@ -57,6 +85,11 @@ AEC* AEC_init(int RATE)
a->dumpcnt = 0;
memset(a->ws, 0, sizeof(a->ws));
+ if (have_vector)
+ a->dotp = dotp_sse;
+ else
+ a->dotp = dotp;
+
return a;
}
@@ -146,7 +179,7 @@ static REAL AEC_nlms_pw(AEC *a, REAL d, REAL x_, float stepsize)
// (mic signal - estimated mic signal from spk signal)
e = d;
if (a->hangover > 0) {
- e -= dotp(a->w, a->x + a->j);
+ e -= a->dotp(a->w, a->x + a->j);
}
ef = IIR1_highpass(a->Fe, e); // pre-whitening of e