summaryrefslogtreecommitdiffstats
path: root/src/pulsecore/svolume_sse.c
diff options
context:
space:
mode:
authorWim Taymans <wim.taymans@collabora.co.uk>2009-08-14 15:41:32 +0200
committerWim Taymans <wim.taymans@collabora.co.uk>2009-08-20 11:31:03 +0200
commitf24c24c14b6614cf19ee916886c8b02384bac435 (patch)
tree978169fdcb41b8361ed1aab6156b15e52b53cc68 /src/pulsecore/svolume_sse.c
parenta1235446a733164f00a96688784913172456a34e (diff)
volume: improved comments
Diffstat (limited to 'src/pulsecore/svolume_sse.c')
-rw-r--r--src/pulsecore/svolume_sse.c45
1 files changed, 24 insertions, 21 deletions
diff --git a/src/pulsecore/svolume_sse.c b/src/pulsecore/svolume_sse.c
index 8138c6c1..d95fa9d9 100644
--- a/src/pulsecore/svolume_sse.c
+++ b/src/pulsecore/svolume_sse.c
@@ -231,12 +231,12 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" test $1, %2 \n\t" /* check for odd samples */
" je 2f \n\t"
- " movd (%1, %3, 4), %%xmm0 \n\t" /* do odd sample */
- " movw (%0), %4 \n\t"
+ " movd (%1, %3, 4), %%xmm0 \n\t" /* | v0h | v0l | */
+ " movw (%0), %4 \n\t" /* .. | p0 | */
" rorw $8, %4 \n\t"
" movd %4, %%xmm1 \n\t"
VOLUME_32x16 (%%xmm1, %%xmm0)
- " movd %%xmm0, %4 \n\t"
+ " movd %%xmm0, %4 \n\t" /* .. | p0*v0 | */
" rorw $8, %4 \n\t"
" movw %4, (%0) \n\t"
" add $2, %0 \n\t"
@@ -244,31 +244,34 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
"2: \n\t"
" sar $1, %2 \n\t" /* prepare for processing 2 samples at a time */
- " test $1, %2 \n\t" /* check for odd samples */
+ " test $1, %2 \n\t"
" je 4f \n\t"
- "3: \n\t" /* do samples in pairs of 2 */
- " movq (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
- " movd (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */
+ "3: \n\t" /* do samples in groups of 2 */
+ " movq (%1, %3, 4), %%xmm0 \n\t" /* | v1h | v1l | v0h | v0l | */
+ " movd (%0), %%xmm1 \n\t" /* .. | p1 | p0 | */
SWAP_16 (%%xmm1)
VOLUME_32x16 (%%xmm1, %%xmm0)
SWAP_16 (%%xmm0)
- " movd %%xmm0, (%0) \n\t"
+ " movd %%xmm0, (%0) \n\t" /* .. | p1*v1 | p0*v0 | */
" add $4, %0 \n\t"
MOD_ADD ($2, %5)
"4: \n\t"
" sar $1, %2 \n\t" /* prepare for processing 4 samples at a time */
- " test $1, %2 \n\t" /* check for odd samples */
+ " test $1, %2 \n\t"
" je 6f \n\t"
- "5: \n\t" /* do samples in pairs of 4 */
- " movdqu (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
- " movq (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */
+ /* FIXME, we can do aligned access of the volume values if we can guarantee
+ * that the array is 16 bytes aligned, we probably have to do the odd values
+ * after this then. */
+ "5: \n\t" /* do samples in groups of 4 */
+ " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
+ " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
SWAP_16 (%%xmm1)
VOLUME_32x16 (%%xmm1, %%xmm0)
SWAP_16 (%%xmm0)
- " movq %%xmm0, (%0) \n\t"
+ " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
" add $8, %0 \n\t"
MOD_ADD ($4, %5)
@@ -277,17 +280,17 @@ pa_volume_s16re_sse (int16_t *samples, int32_t *volumes, unsigned channels, unsi
" cmp $0, %2 \n\t"
" je 8f \n\t"
- "7: \n\t" /* do samples in pairs of 8 */
- " movdqu (%1, %3, 4), %%xmm0 \n\t" /* v1_h | v1_l | v0_h | v0_l */
- " movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* v3_h | v3_l | v2_h | v2_l */
- " movq (%0), %%xmm1 \n\t" /* X | X | p1 | p0 */
- " movq 8(%0), %%xmm3 \n\t" /* X | X | p3 | p2 */
+ "7: \n\t" /* do samples in groups of 8 */
+ " movdqu (%1, %3, 4), %%xmm0 \n\t" /* | v3h | v3l .. v0h | v0l | */
+ " movdqu 16(%1, %3, 4), %%xmm2 \n\t" /* | v7h | v7l .. v4h | v4l | */
+ " movq (%0), %%xmm1 \n\t" /* .. | p3 .. p0 | */
+ " movq 8(%0), %%xmm3 \n\t" /* .. | p7 .. p4 | */
SWAP_16_2 (%%xmm1, %%xmm3)
VOLUME_32x16 (%%xmm1, %%xmm0)
VOLUME_32x16 (%%xmm3, %%xmm2)
SWAP_16_2 (%%xmm0, %%xmm2)
- " movq %%xmm0, (%0) \n\t"
- " movq %%xmm2, 8(%0) \n\t"
+ " movq %%xmm0, (%0) \n\t" /* .. | p3*v3 .. p0*v0 | */
+ " movq %%xmm2, 8(%0) \n\t" /* .. | p7*v7 .. p4*v4 | */
" add $16, %0 \n\t"
MOD_ADD ($8, %5)
" dec %2 \n\t"
@@ -458,7 +461,7 @@ pa_volume_s24_32re_sse (uint32_t *samples, int32_t *volumes, unsigned channels,
}
#endif
-#define RUN_TEST
+#undef RUN_TEST
#ifdef RUN_TEST
#define CHANNELS 2