/* mmx.c MultiMedia eXtensions GCC interface library for IA32. To use this library, simply include this header file and compile with GCC. You MUST have inlining enabled in order for mmx_ok() to work; this can be done by simply using -O on the GCC command line. Compiling with -DMMX_TRACE will cause detailed trace output to be sent to stderr for each mmx operation. This adds lots of code, and obviously slows execution to a crawl, but can be very useful for debugging. THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR ANY PARTICULAR PURPOSE. 1997-99 by H. Dietz and R. Fisher Notes: It appears that the latest gas has the pand problem fixed, therefore I'll undefine BROKEN_PAND by default. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "goom_config.h" #ifdef HAVE_MMX #define BUFFPOINTNB 16 #define BUFFPOINTMASK 0xffff #define BUFFINCR 0xff #include "mmx.h" #include "goom_graphic.h" #define sqrtperte 16 // faire : a % sqrtperte <=> a & pertemask #define PERTEMASK 0xf // faire : a / sqrtperte <=> a >> PERTEDEC #define PERTEDEC 4 int mmx_supported (void) { return (mm_support () & 0x1); } void zoom_filter_mmx (int prevX, int prevY, Pixel * expix1, Pixel * expix2, int *brutS, int *brutD, int buffratio, int precalCoef[16][16]) { unsigned int ax = (prevX - 1) << PERTEDEC, ay = (prevY - 1) << PERTEDEC; int bufsize = prevX * prevY; int loop; __asm__ __volatile__ ("pxor %mm7,%mm7"); for (loop = 0; loop < bufsize; loop++) { /* int couleur; */ int px, py; int pos; int coeffs; int myPos = loop << 1, myPos2 = myPos + 1; int brutSmypos = brutS[myPos]; px = brutSmypos + (((brutD[myPos] - brutSmypos) * buffratio) >> BUFFPOINTNB); brutSmypos = brutS[myPos2]; py = brutSmypos + (((brutD[myPos2] - brutSmypos) * buffratio) >> BUFFPOINTNB); if ((py >= ay) || (px >= ax)) { pos = coeffs = 0; } else { pos = ((px >> PERTEDEC) + prevX * (py >> PERTEDEC)); // coef en modulo 15 coeffs = precalCoef[px & PERTEMASK][py & PERTEMASK]; } __asm__ __volatile__ ("movd %2, %%mm6 \n\t" /* recuperation des deux premiers pixels dans mm0 et mm1 */ "movq (%3,%1,4), %%mm0 \n\t" /* b1-v1-r1-a1-b2-v2-r2-a2 */ "movq %%mm0, %%mm1 \n\t" /* b1-v1-r1-a1-b2-v2-r2-a2 */ /* depackage du premier pixel */ "punpcklbw %%mm7, %%mm0 \n\t" /* 00-b2-00-v2-00-r2-00-a2 */ "movq %%mm6, %%mm5 \n\t" /* ??-??-??-??-c4-c3-c2-c1 */ /* depackage du 2ieme pixel */ "punpckhbw %%mm7, %%mm1 \n\t" /* 00-b1-00-v1-00-r1-00-a1 */ /* extraction des coefficients... */ "punpcklbw %%mm5, %%mm6 \n\t" /* c4-c4-c3-c3-c2-c2-c1-c1 */ "movq %%mm6, %%mm4 \n\t" /* c4-c4-c3-c3-c2-c2-c1-c1 */ "movq %%mm6, %%mm5 \n\t" /* c4-c4-c3-c3-c2-c2-c1-c1 */ "punpcklbw %%mm5, %%mm6 \n\t" /* c2-c2-c2-c2-c1-c1-c1-c1 */ "punpckhbw %%mm5, %%mm4 \n\t" /* c4-c4-c4-c4-c3-c3-c3-c3 */ "movq %%mm6, %%mm3 \n\t" /* c2-c2-c2-c2-c1-c1-c1-c1 */ "punpcklbw %%mm7, %%mm6 \n\t" /* 00-c1-00-c1-00-c1-00-c1 */ "punpckhbw %%mm7, %%mm3 \n\t" /* 00-c2-00-c2-00-c2-00-c2 */ /* multiplication des pixels par les coefficients */ "pmullw %%mm6, %%mm0 \n\t" /* c1*b2-c1*v2-c1*r2-c1*a2 */ "pmullw %%mm3, %%mm1 \n\t" /* c2*b1-c2*v1-c2*r1-c2*a1 */ "paddw %%mm1, %%mm0 \n\t" /* ...extraction des 2 derniers coefficients */ "movq %%mm4, %%mm5 \n\t" /* c4-c4-c4-c4-c3-c3-c3-c3 */ "punpcklbw %%mm7, %%mm4 \n\t" /* 00-c3-00-c3-00-c3-00-c3 */ "punpckhbw %%mm7, %%mm5 \n\t" /* 00-c4-00-c4-00-c4-00-c4 */ /* ajouter la longueur de ligne a esi */ "addl 8(%%ebp),%1 \n\t" /* recuperation des 2 derniers pixels */ "movq (%3,%1,4), %%mm1 \n\t" "movq %%mm1, %%mm2 \n\t" /* depackage des pixels */ "punpcklbw %%mm7, %%mm1 \n\t" "punpckhbw %%mm7, %%mm2 \n\t" /* multiplication pas les coeffs */ "pmullw %%mm4, %%mm1 \n\t" "pmullw %%mm5, %%mm2 \n\t" /* ajout des valeurs obtenues ? la valeur finale */ "paddw %%mm1, %%mm0 \n\t" "paddw %%mm2, %%mm0 \n\t" /* division par 256 = 16+16+16+16, puis repackage du pixel final */ "psrlw $8, %%mm0 \n\t" "packuswb %%mm7, %%mm0 \n\t" "movd %%mm0,%0 \n\t":"=g" (expix2[loop]) :"r" (pos), "r" (coeffs), "r" (expix1) ); emms (); } } #define DRAWMETHOD_PLUS_MMX(_out,_backbuf,_col) \ { \ movd_m2r(_backbuf, mm0); \ paddusb_m2r(_col, mm0); \ movd_r2m(mm0, _out); \ } #define DRAWMETHOD DRAWMETHOD_PLUS_MMX(*p,*p,col) void draw_line_mmx (Pixel * data, int x1, int y1, int x2, int y2, int col, int screenx, int screeny) { int x, y, dx, dy, yy, xx; Pixel *p; if ((y1 < 0) || (y2 < 0) || (x1 < 0) || (x2 < 0) || (y1 >= screeny) || (y2 >= screeny) || (x1 >= screenx) || (x2 >= screenx)) goto end_of_line; dx = x2 - x1; dy = y2 - y1; if (x1 >= x2) { int tmp; tmp = x1; x1 = x2; x2 = tmp; tmp = y1; y1 = y2; y2 = tmp; dx = x2 - x1; dy = y2 - y1; } /* vertical line */ if (dx == 0) { if (y1 < y2) { p = &(data[(screenx * y1) + x1]); for (y = y1; y <= y2; y++) { DRAWMETHOD; p += screenx; } } else { p = &(data[(screenx * y2) + x1]); for (y = y2; y <= y1; y++) { DRAWMETHOD; p += screenx; } } goto end_of_line; } /* horizontal line */ if (dy == 0) { if (x1 < x2) { p = &(data[(screenx * y1) + x1]); for (x = x1; x <= x2; x++) { DRAWMETHOD; p++; } goto end_of_line; } else { p = &(data[(screenx * y1) + x2]); for (x = x2; x <= x1; x++) { DRAWMETHOD; p++; } goto end_of_line; } } /* 1 */ /* \ */ /* \ */ /* 2 */ if (y2 > y1) { /* steep */ if (dy > dx) { dx = ((dx << 16) / dy); x = x1 << 16; for (y = y1; y <= y2; y++) { xx = x >> 16; p = &(data[(screenx * y) + xx]); DRAWMETHOD; if (xx < (screenx - 1)) { p++; /* DRAWMETHOD; */ } x += dx; } goto end_of_line; } /* shallow */ else { dy = ((dy << 16) / dx); y = y1 << 16; for (x = x1; x <= x2; x++) { yy = y >> 16; p = &(data[(screenx * yy) + x]); DRAWMETHOD; if (yy < (screeny - 1)) { p += screeny; /* DRAWMETHOD; */ } y += dy; } } } /* 2 */ /* / */ /* / */ /* 1 */ else { /* steep */ if (-dy > dx) { dx = ((dx << 16) / -dy); x = (x1 + 1) << 16; for (y = y1; y >= y2; y--) { xx = x >> 16; p = &(data[(screenx * y) + xx]); DRAWMETHOD; if (xx < (screenx - 1)) { p--; /* DRAWMETHOD; */ } x += dx; } goto end_of_line; } /* shallow */ else { dy = ((dy << 16) / dx); y = y1 << 16; for (x = x1; x <= x2; x++) { yy = y >> 16; p = &(data[(screenx * yy) + x]); DRAWMETHOD; if (yy < (screeny - 1)) { p += screeny; /* DRAWMETHOD; */ } y += dy; } goto end_of_line; } } end_of_line: emms (); /* __asm__ __volatile__ ("emms"); */ } #else int mmx_supported (void) { return (0); } #endif /* HAVE_MMX */