summaryrefslogtreecommitdiffstats
path: root/src/pulsecore
diff options
context:
space:
mode:
Diffstat (limited to 'src/pulsecore')
-rw-r--r--src/pulsecore/sample-util.c102
-rw-r--r--src/pulsecore/vector.h97
2 files changed, 168 insertions, 31 deletions
diff --git a/src/pulsecore/sample-util.c b/src/pulsecore/sample-util.c
index cf7b4d58..905ba5df 100644
--- a/src/pulsecore/sample-util.c
+++ b/src/pulsecore/sample-util.c
@@ -213,13 +213,22 @@ size_t pa_mix(
for (i = 0; i < nstreams; i++) {
pa_mix_info *m = streams + i;
- int32_t v, cv = m->linear[channel].i;
+ int32_t v, lo, hi, cv = m->linear[channel].i;
if (PA_UNLIKELY(cv <= 0))
continue;
+ /* Multiplying the 32bit volume factor with the
+ * 16bit sample might result in an 48bit value. We
+ * want to do without 64 bit integers and hence do
+ * the multiplication independantly for the HI and
+ * LO part of the volume. */
+
+ hi = cv >> 16;
+ lo = cv & 0xFFFF;
+
v = *((int16_t*) m->ptr);
- v = (v * cv) / 0x10000;
+ v = ((v * lo) >> 16) + (v * hi);
sum += v;
m->ptr = (uint8_t*) m->ptr + sizeof(int16_t);
@@ -248,13 +257,16 @@ size_t pa_mix(
for (i = 0; i < nstreams; i++) {
pa_mix_info *m = streams + i;
- int32_t v, cv = m->linear[channel].i;
+ int32_t v, lo, hi, cv = m->linear[channel].i;
if (PA_UNLIKELY(cv <= 0))
continue;
+ hi = cv >> 16;
+ lo = cv & 0xFFFF;
+
v = PA_INT16_SWAP(*((int16_t*) m->ptr));
- v = (v * cv) / 0x10000;
+ v = ((v * lo) >> 16) + (v * hi);
sum += v;
m->ptr = (uint8_t*) m->ptr + sizeof(int16_t);
@@ -290,7 +302,7 @@ size_t pa_mix(
continue;
v = *((int32_t*) m->ptr);
- v = (v * cv) / 0x10000;
+ v = (v * cv) >> 16;
sum += v;
m->ptr = (uint8_t*) m->ptr + sizeof(int32_t);
@@ -326,7 +338,7 @@ size_t pa_mix(
continue;
v = PA_INT32_SWAP(*((int32_t*) m->ptr));
- v = (v * cv) / 0x10000;
+ v = (v * cv) >> 16;
sum += v;
m->ptr = (uint8_t*) m->ptr + sizeof(int32_t);
@@ -362,7 +374,7 @@ size_t pa_mix(
continue;
v = (int32_t) (PA_READ24NE(m->ptr) << 8);
- v = (v * cv) / 0x10000;
+ v = (v * cv) >> 16;
sum += v;
m->ptr = (uint8_t*) m->ptr + 3;
@@ -398,7 +410,7 @@ size_t pa_mix(
continue;
v = (int32_t) (PA_READ24RE(m->ptr) << 8);
- v = (v * cv) / 0x10000;
+ v = (v * cv) >> 16;
sum += v;
m->ptr = (uint8_t*) m->ptr + 3;
@@ -434,7 +446,7 @@ size_t pa_mix(
continue;
v = (int32_t) (*((uint32_t*)m->ptr) << 8);
- v = (v * cv) / 0x10000;
+ v = (v * cv) >> 16;
sum += v;
m->ptr = (uint8_t*) m->ptr + sizeof(int32_t);
@@ -470,7 +482,7 @@ size_t pa_mix(
continue;
v = (int32_t) (PA_UINT32_SWAP(*((uint32_t*) m->ptr)) << 8);
- v = (v * cv) / 0x10000;
+ v = (v * cv) >> 16;
sum += v;
m->ptr = (uint8_t*) m->ptr + 3;
@@ -505,7 +517,7 @@ size_t pa_mix(
continue;
v = (int32_t) *((uint8_t*) m->ptr) - 0x80;
- v = (v * cv) / 0x10000;
+ v = (v * cv) >> 16;
sum += v;
m->ptr = (uint8_t*) m->ptr + 1;
@@ -534,13 +546,16 @@ size_t pa_mix(
for (i = 0; i < nstreams; i++) {
pa_mix_info *m = streams + i;
- int32_t v, cv = m->linear[channel].i;
+ int32_t v, hi, lo, cv = m->linear[channel].i;
if (PA_UNLIKELY(cv <= 0))
continue;
+ hi = cv >> 16;
+ lo = cv & 0xFFFF;
+
v = (int32_t) st_ulaw2linear16(*((uint8_t*) m->ptr));
- v = (v * cv) / 0x10000;
+ v = ((v * lo) >> 16) + (v * hi);
sum += v;
m->ptr = (uint8_t*) m->ptr + 1;
@@ -569,13 +584,16 @@ size_t pa_mix(
for (i = 0; i < nstreams; i++) {
pa_mix_info *m = streams + i;
- int32_t v, cv = m->linear[channel].i;
+ int32_t v, hi, lo, cv = m->linear[channel].i;
if (PA_UNLIKELY(cv <= 0))
continue;
+ hi = cv >> 16;
+ lo = cv & 0xFFFF;
+
v = (int32_t) st_alaw2linear16(*((uint8_t*) m->ptr));
- v = (v * cv) / 0x10000;
+ v = ((v * lo) >> 16) + (v * hi);
sum += v;
m->ptr = (uint8_t*) m->ptr + 1;
@@ -710,16 +728,26 @@ void pa_volume_memchunk(
e = (int16_t*) ptr + c->length/sizeof(int16_t);
for (channel = 0, d = ptr; d < e; d++) {
- int32_t t;
+ int32_t t, hi, lo;
+
+ /* Multiplying the 32bit volume factor with the 16bit
+ * sample might result in an 48bit value. We want to
+ * do without 64 bit integers and hence do the
+ * multiplication independantly for the HI and LO part
+ * of the volume. */
+
+ hi = linear[channel] >> 16;
+ lo = linear[channel] & 0xFFFF;
t = (int32_t)(*d);
- t = (t * linear[channel]) / 0x10000;
+ t = ((t * lo) >> 16) + (t * hi);
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
*d = (int16_t) t;
if (PA_UNLIKELY(++channel >= spec->channels))
channel = 0;
}
+
break;
}
@@ -733,10 +761,13 @@ void pa_volume_memchunk(
e = (int16_t*) ptr + c->length/sizeof(int16_t);
for (channel = 0, d = ptr; d < e; d++) {
- int32_t t;
+ int32_t t, hi, lo;
+
+ hi = linear[channel] >> 16;
+ lo = linear[channel] & 0xFFFF;
t = (int32_t) PA_INT16_SWAP(*d);
- t = (t * linear[channel]) / 0x10000;
+ t = ((t * lo) >> 16) + (t * hi);
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
*d = PA_INT16_SWAP((int16_t) t);
@@ -760,7 +791,7 @@ void pa_volume_memchunk(
int64_t t;
t = (int64_t)(*d);
- t = (t * linear[channel]) / 0x10000;
+ t = (t * linear[channel]) >> 16;
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
*d = (int32_t) t;
@@ -783,7 +814,7 @@ void pa_volume_memchunk(
int64_t t;
t = (int64_t) PA_INT32_SWAP(*d);
- t = (t * linear[channel]) / 0x10000;
+ t = (t * linear[channel]) >> 16;
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
*d = PA_INT32_SWAP((int32_t) t);
@@ -806,7 +837,7 @@ void pa_volume_memchunk(
int64_t t;
t = (int64_t)((int32_t) (PA_READ24NE(d) << 8));
- t = (t * linear[channel]) / 0x10000;
+ t = (t * linear[channel]) >> 16;
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
PA_WRITE24NE(d, ((uint32_t) (int32_t) t) >> 8);
@@ -829,7 +860,7 @@ void pa_volume_memchunk(
int64_t t;
t = (int64_t)((int32_t) (PA_READ24RE(d) << 8));
- t = (t * linear[channel]) / 0x10000;
+ t = (t * linear[channel]) >> 16;
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
PA_WRITE24RE(d, ((uint32_t) (int32_t) t) >> 8);
@@ -852,7 +883,7 @@ void pa_volume_memchunk(
int64_t t;
t = (int64_t) ((int32_t) (*d << 8));
- t = (t * linear[channel]) / 0x10000;
+ t = (t * linear[channel]) >> 16;
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
*d = ((uint32_t) ((int32_t) t)) >> 8;
@@ -875,7 +906,7 @@ void pa_volume_memchunk(
int64_t t;
t = (int64_t) ((int32_t) (PA_UINT32_SWAP(*d) << 8));
- t = (t * linear[channel]) / 0x10000;
+ t = (t * linear[channel]) >> 16;
t = PA_CLAMP_UNLIKELY(t, -0x80000000LL, 0x7FFFFFFFLL);
*d = PA_UINT32_SWAP(((uint32_t) ((int32_t) t)) >> 8);
@@ -895,10 +926,13 @@ void pa_volume_memchunk(
e = (uint8_t*) ptr + c->length;
for (channel = 0, d = ptr; d < e; d++) {
- int32_t t;
+ int32_t t, hi, lo;
+
+ hi = linear[channel] >> 16;
+ lo = linear[channel] & 0xFFFF;
t = (int32_t) *d - 0x80;
- t = (t * linear[channel]) / 0x10000;
+ t = ((t * lo) >> 16) + (t * hi);
t = PA_CLAMP_UNLIKELY(t, -0x80, 0x7F);
*d = (uint8_t) (t + 0x80);
@@ -918,10 +952,13 @@ void pa_volume_memchunk(
e = (uint8_t*) ptr + c->length;
for (channel = 0, d = ptr; d < e; d++) {
- int32_t t;
+ int32_t t, hi, lo;
+
+ hi = linear[channel] >> 16;
+ lo = linear[channel] & 0xFFFF;
t = (int32_t) st_ulaw2linear16(*d);
- t = (t * linear[channel]) / 0x10000;
+ t = ((t * lo) >> 16) + (t * hi);
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
*d = (uint8_t) st_14linear2ulaw((int16_t) t >> 2);
@@ -941,10 +978,13 @@ void pa_volume_memchunk(
e = (uint8_t*) ptr + c->length;
for (channel = 0, d = ptr; d < e; d++) {
- int32_t t;
+ int32_t t, hi, lo;
+
+ hi = linear[channel] >> 16;
+ lo = linear[channel] & 0xFFFF;
t = (int32_t) st_alaw2linear16(*d);
- t = (t * linear[channel]) / 0x10000;
+ t = ((t * lo) >> 16) + (t * hi);
t = PA_CLAMP_UNLIKELY(t, -0x8000, 0x7FFF);
*d = (uint8_t) st_13linear2alaw((int16_t) t >> 3);
diff --git a/src/pulsecore/vector.h b/src/pulsecore/vector.h
new file mode 100644
index 00000000..076bd6c0
--- /dev/null
+++ b/src/pulsecore/vector.h
@@ -0,0 +1,97 @@
+/***
+ This file is part of PulseAudio.
+
+ Copyright 2004-2006 Lennart Poettering
+ Copyright 2006 Pierre Ossman <ossman@cendio.se> for Cendio AB
+
+ PulseAudio is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2 of the License,
+ or (at your option) any later version.
+
+ PulseAudio is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with PulseAudio; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ USA.
+***/
+
+#include <inttypes.h>
+
+/* First, define HAVE_VECTOR if we have the gcc vector extensions at all */
+#if defined(__SSE2__) || defined(__ALTIVEC__)
+#define HAVE_VECTOR
+
+
+/* This is supposed to be portable to different SIMD instruction
+ * sets. We define vector types for different base types: uint8_t,
+ * int16_t, int32_t, float. The vector type is a union. The fields .i,
+ * .u, .f are arrays for accessing the separate elements of a
+ * vector. .v is a gcc vector type of the right format. .m is the
+ * vector in the type the SIMD extenstion specific intrinsics API
+ * expects. PA_xxx_VECTOR_SIZE is the size of the
+ * entries. PA_xxxx_VECTOR_MAKE constructs a gcc vector variable with
+ * the same value in all elements. */
+
+#ifdef __SSE2__
+
+#include <xmmintrin.h>
+#include <emmintrin.h>
+
+#define PA_UINT8_VECTOR_SIZE 16
+#define PA_INT16_VECTOR_SIZE 8
+#define PA_INT32_VECTOR_SIZE 4
+#define PA_FLOAT_VECTOR_SIZE 4
+
+#define PA_UINT8_VECTOR_MAKE(x) (pa_v16qi) { x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x }
+#define PA_INT16_VECTOR_MAKE(x) (pa_v8hi) { x, x, x, x, x, x, x, x }
+#define PA_INT32_VECTOR_MAKE(x) (pa_v4si) { x, x, x, x }
+#define PA_FLOAT_VECTOR_MAKE(x) (pa_v4fi) { x, x, x, x }
+
+#endif
+
+/* uint8_t vector */
+typedef uint8_t pa_v16qi __attribute__ ((vector_size (PA_UINT8_VECTOR_SIZE * sizeof(uint8_t))));
+typedef union pa_uint8_vector {
+ uint8_t u[PA_UINT8_VECTOR_SIZE];
+ pa_v16qi v;
+#ifdef __SSE2__
+ __m128i m;
+#endif
+} pa_uint8_vector_t;
+
+/* int16_t vector*/
+typedef int16_t pa_v8hi __attribute__ ((vector_size (PA_INT16_VECTOR_SIZE * sizeof(int16_t))));
+typedef union pa_int16_vector {
+ int16_t i[PA_INT16_VECTOR_SIZE];
+ pa_v8hi v;
+#ifdef __SSE2__
+ __m128i m;
+#endif
+} pa_int16_vector_t;
+
+/* int32_t vector */
+typedef int32_t pa_v4si __attribute__ ((vector_size (PA_INT32_VECTOR_SIZE * sizeof(int32_t))));
+typedef union pa_int32_vector {
+ int32_t i[PA_INT32_VECTOR_SIZE];
+ pa_v4si v;
+#ifdef __SSE2__
+ __m128i m;
+#endif
+} pa_int32_vector_t;
+
+/* float vector */
+typedef float pa_v4sf __attribute__ ((vector_size (PA_FLOAT_VECTOR_SIZE * sizeof(float))));
+typedef union pa_float_vector {
+ float f[PA_FLOAT_VECTOR_SIZE];
+ pa_v4sf v;
+#ifdef __SSE2__
+ __m128 m;
+#endif
+} pa_float_vector_t;
+
+#endif