summaryrefslogtreecommitdiffstats
path: root/gst/goom/filters_mmx.s
diff options
context:
space:
mode:
authorWim Taymans <wim.taymans@gmail.com>2002-02-01 19:28:30 +0000
committerWim Taymans <wim.taymans@gmail.com>2002-02-01 19:28:30 +0000
commit4d3b1472e78ac7c12460b26914c9a294a3de573a (patch)
tree4d9b1f0a6698385b086e37381b75b2d93bf85e39 /gst/goom/filters_mmx.s
parente5faa112b67c7681f653949413799180c7382451 (diff)
Added a goom plugin (goom.sourceforge.net) to test: ./gst-launch filesrc location=/opt/data/south.mp3 ! mad ! tee sil...
Original commit message from CVS: Added a goom plugin (goom.sourceforge.net) to test: ./gst-launch filesrc location=/opt/data/south.mp3 ! mad ! tee silent=true src%d! goom ! colorspace ! xvideosink tee0.src%d! osssink
Diffstat (limited to 'gst/goom/filters_mmx.s')
-rw-r--r--gst/goom/filters_mmx.s130
1 files changed, 130 insertions, 0 deletions
diff --git a/gst/goom/filters_mmx.s b/gst/goom/filters_mmx.s
new file mode 100644
index 00000000..337de56c
--- /dev/null
+++ b/gst/goom/filters_mmx.s
@@ -0,0 +1,130 @@
+;// file : mmx_zoom.s
+;// author : JC Hoelt <jeko@free.fr>
+;//
+;// history
+;// 07/01/2001 : Changing FEMMS to EMMS : slower... but run on intel machines
+;// 03/01/2001 : WIDTH and HEIGHT are now variable
+;// 28/12/2000 : adding comments to the code, suppress some useless lines
+;// 27/12/2000 : reducing memory access... improving performance by 20%
+;// coefficients are now on 1 byte
+;// 22/12/2000 : Changing data structure
+;// 16/12/2000 : AT&T version
+;// 14/12/2000 : unrolling loop
+;// 12/12/2000 : 64 bits memory access
+
+
+.data
+
+thezero:
+ .long 0x00000000
+ .long 0x00000000
+
+
+.text
+
+.globl mmx_zoom ;// name of the function to call by C program
+.extern coeffs ;// the transformation buffer
+.extern expix1,expix2 ;// the source and destination buffer
+.extern mmx_zoom_size, zoom_width ;// size of the buffers
+
+.align 16
+mmx_zoom:
+
+push %ebp
+push %esp
+
+;// initialisation du mm7 à zero
+movq (thezero), %mm7
+
+movl zoom_width, %eax
+movl $4, %ebx
+mull %ebx
+movl %eax, %ebp
+
+movl (coeffs), %eax
+movl (expix1), %edx
+movl (expix2), %ebx
+movl $10, %edi
+movl mmx_zoom_size, %ecx
+
+.while:
+ ;// esi <- nouvelle position
+ movl (%eax), %esi
+ leal (%edx, %esi), %esi
+
+ ;// recuperation des deux premiers pixels dans mm0 et mm1
+ movq (%esi), %mm0 /* b1-v1-r1-a1-b2-v2-r2-a2 */
+ movq %mm0, %mm1 /* b1-v1-r1-a1-b2-v2-r2-a2 */
+
+ ;// recuperation des 4 coefficients
+ movd 4(%eax), %mm6 /* ??-??-??-??-c4-c3-c2-c1 */
+ ;// depackage du premier pixel
+ punpcklbw %mm7, %mm0 /* 00-b2-00-v2-00-r2-00-a2 */
+
+ movq %mm6, %mm5 /* ??-??-??-??-c4-c3-c2-c1 */
+ ;// depackage du 2ieme pixel
+ punpckhbw %mm7, %mm1 /* 00-b1-00-v1-00-r1-00-a1 */
+
+ ;// extraction des coefficients...
+ punpcklbw %mm5, %mm6 /* c4-c4-c3-c3-c2-c2-c1-c1 */
+ movq %mm6, %mm4 /* c4-c4-c3-c3-c2-c2-c1-c1 */
+ movq %mm6, %mm5 /* c4-c4-c3-c3-c2-c2-c1-c1 */
+
+ punpcklbw %mm5, %mm6 /* c2-c2-c2-c2-c1-c1-c1-c1 */
+ punpckhbw %mm5, %mm4 /* c4-c4-c4-c4-c3-c3-c3-c3 */
+
+ movq %mm6, %mm3 /* c2-c2-c2-c2-c1-c1-c1-c1 */
+ punpcklbw %mm7, %mm6 /* 00-c1-00-c1-00-c1-00-c1 */
+ punpckhbw %mm7, %mm3 /* 00-c2-00-c2-00-c2-00-c2 */
+
+ ;// multiplication des pixels par les coefficients
+ pmullw %mm6, %mm0 /* c1*b2-c1*v2-c1*r2-c1*a2 */
+ pmullw %mm3, %mm1 /* c2*b1-c2*v1-c2*r1-c2*a1 */
+ paddw %mm1, %mm0
+
+ ;// ...extraction des 2 derniers coefficients
+ movq %mm4, %mm5 /* c4-c4-c4-c4-c3-c3-c3-c3 */
+ punpcklbw %mm7, %mm4 /* 00-c3-00-c3-00-c3-00-c3 */
+ punpckhbw %mm7, %mm5 /* 00-c4-00-c4-00-c4-00-c4 */
+
+ ;// recuperation des 2 derniers pixels
+ movq (%esi,%ebp), %mm1
+ movq %mm1, %mm2
+
+ ;// depackage des pixels
+ punpcklbw %mm7, %mm1
+ punpckhbw %mm7, %mm2
+
+ ;// multiplication pas les coeffs
+ pmullw %mm4, %mm1
+ pmullw %mm5, %mm2
+
+ ;// ajout des valeurs obtenues à la valeur finale
+ paddw %mm1, %mm0
+ paddw %mm2, %mm0
+
+ ;// division par 256 = 16+16+16+16, puis repackage du pixel final
+ psrlw $8, %mm0
+ packuswb %mm7, %mm0
+
+ ;// passage au suivant
+ leal 8(%eax), %eax
+
+ decl %ecx
+ ;// enregistrement du resultat
+ movd %mm0, (%ebx)
+ leal 4(%ebx), %ebx
+
+ ;// test de fin du tantque
+ cmpl $0, %ecx ;// 400x300
+
+jz .fin_while
+jmp .while
+
+.fin_while:
+emms
+
+pop %esp
+pop %ebp
+
+ret ;//The End