; Goom Project ; Copyright (C) <2001> Jean-Christophe Hoelt ; ; This library is free software; you can redistribute it and/or ; modify it under the terms of the GNU Library General Public ; License as published by the Free Software Foundation; either ; version 2 of the License, or (at your option) any later version. ; ; This library is distributed in the hope that it will be useful, ; but WITHOUT ANY WARRANTY; without even the implied warranty of ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ; Library General Public License for more details. ; ; You should have received a copy of the GNU Library General Public ; License along with this library; if not, write to the ; Free Software Foundation, Inc., 59 Temple Place - Suite 330, ; Boston, MA 02111-1307, USA. ; ;// ;// history ;// 07/01/2001 : Changing FEMMS to EMMS : slower... but run on intel machines ;// 03/01/2001 : WIDTH and HEIGHT are now variable ;// 28/12/2000 : adding comments to the code, suppress some useless lines ;// 27/12/2000 : reducing memory access... improving performance by 20% ;// coefficients are now on 1 byte ;// 22/12/2000 : Changing data structure ;// 16/12/2000 : AT&T version ;// 14/12/2000 : unrolling loop ;// 12/12/2000 : 64 bits memory access .data chaine: .string "pos = %d\n\0" .long 0x0 thezero: .long 0x00000000 .long 0x00000000 .text .globl mmx_zoom ;// name of the function to call by C program /* .extern coeffs ;// the transformation buffer */ .extern expix1,expix2 ;// the source and destination buffer .extern mmx_zoom_size, zoom_width ;// size of the buffers .extern brutS,brutD,buffratio,precalCoef,prevX,prevY #define PERTEMASK 15 /* faire : a / sqrtperte <=> a >> PERTEDEC*/ #define PERTEDEC 4 .align 16 mmx_zoom: pushl %ebp movl %esp,%ebp subl $12,%esp movl prevX,%eax decl %eax sarl $4,%eax movl %eax,-4(%ebp) movl prevY,%eax decl %eax sarl $4,%eax movl %eax,-8(%ebp) ;// initialisation du mm7 � zero movq (thezero), %mm7 movl mmx_zoom_size, %ecx decl %ecx .while: ;// esi <- nouvelle position movl brutS, %eax leal (%eax, %ecx, 8),%eax movl (%eax),%edx /* = brutS.px (brutSmypos) */ movl 4(%eax),%eax /* = brutS.py */ movl brutD,%ebx leal (%ebx, %ecx, 8),%ebx movl (%ebx),%esi subl %edx, %esi imull buffratio,%esi sarl $16,%esi addl %edx,%esi /* esi = px */ /* eax contient deja brutS.py = le nouveau brutSmypos*/ /* ebx pointe sur brutD[myPos] */ movl 4(%ebx),%edi subl %eax,%edi imull buffratio,%edi sarl $16,%edi addl %eax,%edi /* edi = py */ /* pushl %eax pushl %ebx*/ /* popl %ebx popl %eax*/ movl %esi,%eax andl $15,%eax /* eax = coefh */ movl %edi,%ebx andl $15,%ebx /* ebx = coefv */ leal 0(,%ebx,4),%ebx sall $6,%eax addl %ebx,%eax movl $precalCoef,%ebx /* movd (%eax,%ebx),%mm6*/ /* mm6 = coeffs */ cmpl -8(%ebp),%edi jge .then1 cmpl -4(%ebp),%esi jge .then1 sarl $4,%esi sarl $4,%edi imull zoom_width,%edi leal (%esi,%edi),%esi jmp .finsi1 .then1: movl $0,%esi .finsi1: /** apres ce calcul, %esi = pos, %mm6 = coeffs **/ /* pushl %esi pushl $chaine call printf addl $8,%esp*/ movl expix1,%eax ;// recuperation des deux premiers pixels dans mm0 et mm1 /* movq (%eax,%esi,4), %mm0 /* b1-v1-r1-a1-b2-v2-r2-a2 */ movq %mm0, %mm1 /* b1-v1-r1-a1-b2-v2-r2-a2 */ ;// depackage du premier pixel punpcklbw %mm7, %mm0 /* 00-b2-00-v2-00-r2-00-a2 */ movq %mm6, %mm5 /* ??-??-??-??-c4-c3-c2-c1 */ ;// depackage du 2ieme pixel punpckhbw %mm7, %mm1 /* 00-b1-00-v1-00-r1-00-a1 */ ;// extraction des coefficients... punpcklbw %mm5, %mm6 /* c4-c4-c3-c3-c2-c2-c1-c1 */ movq %mm6, %mm4 /* c4-c4-c3-c3-c2-c2-c1-c1 */ movq %mm6, %mm5 /* c4-c4-c3-c3-c2-c2-c1-c1 */ punpcklbw %mm5, %mm6 /* c2-c2-c2-c2-c1-c1-c1-c1 */ punpckhbw %mm5, %mm4 /* c4-c4-c4-c4-c3-c3-c3-c3 */ movq %mm6, %mm3 /* c2-c2-c2-c2-c1-c1-c1-c1 */ punpcklbw %mm7, %mm6 /* 00-c1-00-c1-00-c1-00-c1 */ punpckhbw %mm7, %mm3 /* 00-c2-00-c2-00-c2-00-c2 */ ;// multiplication des pixels par les coefficients pmullw %mm6, %mm0 /* c1*b2-c1*v2-c1*r2-c1*a2 */ pmullw %mm3, %mm1 /* c2*b1-c2*v1-c2*r1-c2*a1 */ paddw %mm1, %mm0 ;// ...extraction des 2 derniers coefficients movq %mm4, %mm5 /* c4-c4-c4-c4-c3-c3-c3-c3 */ punpcklbw %mm7, %mm4 /* 00-c3-00-c3-00-c3-00-c3 */ punpckhbw %mm7, %mm5 /* 00-c4-00-c4-00-c4-00-c4 */ /* ajouter la longueur de ligne a esi */ addl prevX,%esi ;// recuperation des 2 derniers pixels /* movq (%eax,%esi,4), %mm1*/ movq %mm1, %mm2 ;// depackage des pixels punpcklbw %mm7, %mm1 punpckhbw %mm7, %mm2 ;// multiplication pas les coeffs pmullw %mm4, %mm1 pmullw %mm5, %mm2 ;// ajout des valeurs obtenues � la valeur finale paddw %mm1, %mm0 paddw %mm2, %mm0 ;// division par 256 = 16+16+16+16, puis repackage du pixel final psrlw $8, %mm0 packuswb %mm7, %mm0 ;// passage au suivant ;// enregistrement du resultat movl expix2,%eax /* movd %mm0,(%eax,%ecx,4)*/ decl %ecx ;// test de fin du tantque cmpl $0, %ecx ;// 400x300 jz .fin_while jmp .while .fin_while: emms movl %ebp,%esp popl %ebp ret ;//The End