diff options
author | Bastien Nocera <hadess@hadess.net> | 2008-02-23 01:51:37 +0000 |
---|---|---|
committer | Bastien Nocera <hadess@hadess.net> | 2008-02-23 01:51:37 +0000 |
commit | a7bc7485b1a4d7e1b1a12ff593ca4ccb1d59e466 (patch) | |
tree | ffba99ad38c7616d089c5e728c75a6bd5f736c6c /gst/goom2k1/filters_mmx.s | |
parent | 7f0745bb7f26c69766bb0c64458c6543588cc4dc (diff) |
configure.ac: Add checks for Flex/Yacc/Bison and other furry animals, for the new goom 2k4 based plugin
Original commit message from CVS:
2008-02-23 Bastien Nocera <hadess@hadess.net>
* configure.ac: Add checks for Flex/Yacc/Bison and other
furry animals, for the new goom 2k4 based plugin
* gst/goom/*: Update to use goom 2k4, uses liboil to detect
CPU optimisations (not working yet), move the old plugin to...
* gst/goom2k1/*: ... here, in case somebody is sick enough
Fixes #515073
Diffstat (limited to 'gst/goom2k1/filters_mmx.s')
-rw-r--r-- | gst/goom2k1/filters_mmx.s | 130 |
1 files changed, 130 insertions, 0 deletions
diff --git a/gst/goom2k1/filters_mmx.s b/gst/goom2k1/filters_mmx.s new file mode 100644 index 00000000..337de56c --- /dev/null +++ b/gst/goom2k1/filters_mmx.s @@ -0,0 +1,130 @@ +;// file : mmx_zoom.s +;// author : JC Hoelt <jeko@free.fr> +;// +;// history +;// 07/01/2001 : Changing FEMMS to EMMS : slower... but run on intel machines +;// 03/01/2001 : WIDTH and HEIGHT are now variable +;// 28/12/2000 : adding comments to the code, suppress some useless lines +;// 27/12/2000 : reducing memory access... improving performance by 20% +;// coefficients are now on 1 byte +;// 22/12/2000 : Changing data structure +;// 16/12/2000 : AT&T version +;// 14/12/2000 : unrolling loop +;// 12/12/2000 : 64 bits memory access + + +.data + +thezero: + .long 0x00000000 + .long 0x00000000 + + +.text + +.globl mmx_zoom ;// name of the function to call by C program +.extern coeffs ;// the transformation buffer +.extern expix1,expix2 ;// the source and destination buffer +.extern mmx_zoom_size, zoom_width ;// size of the buffers + +.align 16 +mmx_zoom: + +push %ebp +push %esp + +;// initialisation du mm7 à zero +movq (thezero), %mm7 + +movl zoom_width, %eax +movl $4, %ebx +mull %ebx +movl %eax, %ebp + +movl (coeffs), %eax +movl (expix1), %edx +movl (expix2), %ebx +movl $10, %edi +movl mmx_zoom_size, %ecx + +.while: + ;// esi <- nouvelle position + movl (%eax), %esi + leal (%edx, %esi), %esi + + ;// recuperation des deux premiers pixels dans mm0 et mm1 + movq (%esi), %mm0 /* b1-v1-r1-a1-b2-v2-r2-a2 */ + movq %mm0, %mm1 /* b1-v1-r1-a1-b2-v2-r2-a2 */ + + ;// recuperation des 4 coefficients + movd 4(%eax), %mm6 /* ??-??-??-??-c4-c3-c2-c1 */ + ;// depackage du premier pixel + punpcklbw %mm7, %mm0 /* 00-b2-00-v2-00-r2-00-a2 */ + + movq %mm6, %mm5 /* ??-??-??-??-c4-c3-c2-c1 */ + ;// depackage du 2ieme pixel + punpckhbw %mm7, %mm1 /* 00-b1-00-v1-00-r1-00-a1 */ + + ;// extraction des coefficients... + punpcklbw %mm5, %mm6 /* c4-c4-c3-c3-c2-c2-c1-c1 */ + movq %mm6, %mm4 /* c4-c4-c3-c3-c2-c2-c1-c1 */ + movq %mm6, %mm5 /* c4-c4-c3-c3-c2-c2-c1-c1 */ + + punpcklbw %mm5, %mm6 /* c2-c2-c2-c2-c1-c1-c1-c1 */ + punpckhbw %mm5, %mm4 /* c4-c4-c4-c4-c3-c3-c3-c3 */ + + movq %mm6, %mm3 /* c2-c2-c2-c2-c1-c1-c1-c1 */ + punpcklbw %mm7, %mm6 /* 00-c1-00-c1-00-c1-00-c1 */ + punpckhbw %mm7, %mm3 /* 00-c2-00-c2-00-c2-00-c2 */ + + ;// multiplication des pixels par les coefficients + pmullw %mm6, %mm0 /* c1*b2-c1*v2-c1*r2-c1*a2 */ + pmullw %mm3, %mm1 /* c2*b1-c2*v1-c2*r1-c2*a1 */ + paddw %mm1, %mm0 + + ;// ...extraction des 2 derniers coefficients + movq %mm4, %mm5 /* c4-c4-c4-c4-c3-c3-c3-c3 */ + punpcklbw %mm7, %mm4 /* 00-c3-00-c3-00-c3-00-c3 */ + punpckhbw %mm7, %mm5 /* 00-c4-00-c4-00-c4-00-c4 */ + + ;// recuperation des 2 derniers pixels + movq (%esi,%ebp), %mm1 + movq %mm1, %mm2 + + ;// depackage des pixels + punpcklbw %mm7, %mm1 + punpckhbw %mm7, %mm2 + + ;// multiplication pas les coeffs + pmullw %mm4, %mm1 + pmullw %mm5, %mm2 + + ;// ajout des valeurs obtenues à la valeur finale + paddw %mm1, %mm0 + paddw %mm2, %mm0 + + ;// division par 256 = 16+16+16+16, puis repackage du pixel final + psrlw $8, %mm0 + packuswb %mm7, %mm0 + + ;// passage au suivant + leal 8(%eax), %eax + + decl %ecx + ;// enregistrement du resultat + movd %mm0, (%ebx) + leal 4(%ebx), %ebx + + ;// test de fin du tantque + cmpl $0, %ecx ;// 400x300 + +jz .fin_while +jmp .while + +.fin_while: +emms + +pop %esp +pop %ebp + +ret ;//The End |