diff options
author | Bastien Nocera <hadess@hadess.net> | 2008-02-23 01:51:37 +0000 |
---|---|---|
committer | Bastien Nocera <hadess@hadess.net> | 2008-02-23 01:51:37 +0000 |
commit | a7bc7485b1a4d7e1b1a12ff593ca4ccb1d59e466 (patch) | |
tree | ffba99ad38c7616d089c5e728c75a6bd5f736c6c /gst/goom/ppc_drawings.s | |
parent | 7f0745bb7f26c69766bb0c64458c6543588cc4dc (diff) |
configure.ac: Add checks for Flex/Yacc/Bison and other furry animals, for the new goom 2k4 based plugin
Original commit message from CVS:
2008-02-23 Bastien Nocera <hadess@hadess.net>
* configure.ac: Add checks for Flex/Yacc/Bison and other
furry animals, for the new goom 2k4 based plugin
* gst/goom/*: Update to use goom 2k4, uses liboil to detect
CPU optimisations (not working yet), move the old plugin to...
* gst/goom2k1/*: ... here, in case somebody is sick enough
Fixes #515073
Diffstat (limited to 'gst/goom/ppc_drawings.s')
-rw-r--r-- | gst/goom/ppc_drawings.s | 381 |
1 files changed, 381 insertions, 0 deletions
diff --git a/gst/goom/ppc_drawings.s b/gst/goom/ppc_drawings.s new file mode 100644 index 00000000..845e5ea1 --- /dev/null +++ b/gst/goom/ppc_drawings.s @@ -0,0 +1,381 @@ +; PowerPC optimized drawing methods for Goom +; © 2003 Guillaume Borios +; This Source Code is released under the terms of the General Public License + +; Change log : +; 30 May 2003 : File creation + +; Section definition : We use a read only code section for the whole file +.section __TEXT,__text,regular,pure_instructions + + +; -------------------------------------------------------------------------------------- +; Single 32b pixel drawing macros +; Usage : +; DRAWMETHOD_XXXX_MACRO *pixelIN, *pixelOUT, COLOR, WR1, WR2, WR3, WR4 +; Only the work registers (WR) can be touched by the macros +; +; Available methods : +; DRAWMETHOD_DFLT_MACRO : Default drawing method (Actually OVRW) +; DRAWMETHOD_PLUS_MACRO : RVB Saturated per channel addition (SLOWEST) +; DRAWMETHOD_HALF_MACRO : 50% Transparency color drawing +; DRAWMETHOD_OVRW_MACRO : Direct COLOR drawing (FASTEST) +; DRAWMETHOD_B_OR_MACRO : Bitwise OR +; DRAWMETHOD_BAND_MACRO : Bitwise AND +; DRAWMETHOD_BXOR_MACRO : Bitwise XOR +; DRAWMETHOD_BNOT_MACRO : Bitwise NOT +; -------------------------------------------------------------------------------------- + +.macro DRAWMETHOD_OVRW_MACRO + stw $2,0($1) ;; *$1 <- $2 +.endmacro + +.macro DRAWMETHOD_B_OR_MACRO + lwz $3,0($0) ;; $3 <- *$0 + or $3,$3,$2 ;; $3 <- $3 | $2 + stw $3,0($1) ;; *$1 <- $3 +.endmacro + +.macro DRAWMETHOD_BAND_MACRO + lwz $3,0($0) ;; $3 <- *$0 + and $3,$3,$2 ;; $3 <- $3 & $2 + stw $3,0($1) ;; *$1 <- $3 +.endmacro + +.macro DRAWMETHOD_BXOR_MACRO + lwz $3,0($0) ;; $3 <- *$0 + xor $3,$3,$2 ;; $3 <- $3 ^ $2 + stw $3,0($1) ;; *$1 <- $3 +.endmacro + +.macro DRAWMETHOD_BNOT_MACRO + lwz $3,0($0) ;; $3 <- *$0 + nand $3,$3,$3 ;; $3 <- ~$3 + stw $3,0($1) ;; *$1 <- $3 +.endmacro + +.macro DRAWMETHOD_PLUS_MACRO + lwz $4,0($0) ;; $4 <- *$0 + andi. $3,$4,0xFF00 ;; $3 <- $4 & 0x0000FF00 + andi. $5,$2,0xFF00 ;; $5 <- $2 & 0x0000FF00 + add $3,$3,$5 ;; $3 <- $3 + $5 + rlwinm $5,$3,15,0,0 ;; $5 <- 0 | ($3[15] << 15) + srawi $5,$5,23 ;; $5 <- $5 >> 23 (algebraic for sign extension) + or $3,$3,$5 ;; $3 <- $3 | $5 + lis $5,0xFF ;; $5 <- 0x00FF00FF + addi $5,$5,0xFF + and $4,$4,$5 ;; $4 <- $4 & $5 + and $6,$2,$5 ;; $6 <- $2 & $5 + add $4,$4,$6 ;; $4 <- $4 + $6 + rlwinm $6,$4,7,0,0 ;; $6 <- 0 | ($4[7] << 7) + srawi $6,$6,15 ;; $6 <- $6 >> 15 (algebraic for sign extension) + rlwinm $5,$4,23,0,0 ;; $5 <- 0 | ($4[23] << 23) + srawi $5,$5,31 ;; $5 <- $5 >> 31 (algebraic for sign extension) + rlwimi $6,$5,0,24,31 ;; $6[24..31] <- $5[24..31] + or $4,$4,$6 ;; $4 <- $4 | $6 + rlwimi $4,$3,0,16,23 ;; $4[16..23] <- $3[16..23] + stw $4,0($1) ;; *$1 <- $4 +.endmacro + +.macro DRAWMETHOD_HALF_MACRO + lwz $4,0($0) ;; $4 <- *$0 + andi. $3,$4,0xFF00 ;; $3 <- $4 & 0x0000FF00 + andi. $5,$2,0xFF00 ;; $5 <- $2 & 0x0000FF00 + add $3,$3,$5 ;; $3 <- $3 + $5 + lis $5,0xFF ;; $5 <- 0x00FF00FF + addi $5,$5,0xFF + and $4,$4,$5 ;; $4 <- $4 & $5 + and $5,$2,$5 ;; $5 <- $2 & $5 + add $4,$4,$5 ;; $4 <- $4 + $5 + srwi $4,$4,1 ;; $4 <- $4 >> 1 + rlwimi $4,$3,31,16,23 ;; $4[16..23] <- $3[15..22] + stw $4,0($1) ;; *$1 <- $4 +.endmacro + +.macro DRAWMETHOD_DFLT_MACRO + DRAWMETHOD_PLUS_MACRO +.endmacro + +; -------------------------------------------------------------------------------------- + + + +; ************************************************************************************** +; void DRAWMETHOD_PLUS_PPC(unsigned int * buf, unsigned int _col); +; void DRAWMETHOD_PLUS_2_PPC(unsigned * in, unsigned int * out, unsigned int _col); +; ************************************************************************************** +.globl _DRAWMETHOD_PLUS_2_PPC +.align 3 +_DRAWMETHOD_PLUS_2_PPC: + DRAWMETHOD_PLUS_MACRO r3,r4,r5,r6,r7,r8,r9 + blr ;; return + +.globl _DRAWMETHOD_PLUS_PPC +.align 3 +_DRAWMETHOD_PLUS_PPC: + DRAWMETHOD_PLUS_MACRO r3,r3,r4,r5,r6,r7,r9 + blr ;; return + + +; ************************************************************************************** +; void DRAWMETHOD_HALF_PPC(unsigned int * buf, unsigned int _col); +; void DRAWMETHOD_HALF_2_PPC(unsigned * in, unsigned int * out, unsigned int _col); +; ************************************************************************************** +.globl _DRAWMETHOD_HALF_2_PPC +.align 3 +_DRAWMETHOD_HALF_2_PPC: + DRAWMETHOD_HALF_MACRO r3,r4,r5,r6,r7,r8 + blr ;; return + +.globl _DRAWMETHOD_HALF_PPC +.align 3 +_DRAWMETHOD_HALF_PPC: + DRAWMETHOD_HALF_MACRO r3,r3,r4,r5,r6,r7 + blr ;; return + + +; ************************************************************************************** +; void DRAW_LINE_PPC(unsigned int *data, int x1, int y1, int x2, int y2, unsigned int col, +; unsigned int screenx, unsigned int screeny) +; ************************************************************************************** +.globl _DRAW_LINE_PPC +.align 3 +_DRAW_LINE_PPC: + ;; NOT IMPLEMENTED YET + blr ;; return + + +; ************************************************************************************** +; void _ppc_brightness(Pixel * src, Pixel * dest, unsigned int size, unsigned int coeff) +; ************************************************************************************** + + +.const +.align 4 +vectorZERO: + .long 0,0,0,0 + .long 0x10101000, 0x10101001, 0x10101002, 0x10101003 + .long 0x10101004, 0x10101005, 0x10101006, 0x10101007 + .long 0x10101008, 0x10101009, 0x1010100A, 0x1010100B + .long 0x1010100C, 0x1010100D, 0x1010100E, 0x1010100F + + +.section __TEXT,__text,regular,pure_instructions + +.globl _ppc_brightness_G4 +.align 3 +_ppc_brightness_G4: + + +;; PowerPC Altivec code + srwi r5,r5,2 + mtctr r5 + +;;vrsave + mfspr r11,256 + lis r12,0xCFFC + mtspr 256,r12 + + mflr r0 + bcl 20,31,"L00000000001$pb" +"L00000000001$pb": + mflr r10 + mtlr r0 + + addis r9,r10,ha16(vectorZERO-"L00000000001$pb") + addi r9,r9,lo16(vectorZERO-"L00000000001$pb") + + vxor v0,v0,v0 ;; V0 = NULL vector + + addi r9,r9,16 + lvx v10,0,r9 + addi r9,r9,16 + lvx v11,0,r9 + addi r9,r9,16 + lvx v12,0,r9 + addi r9,r9,16 + lvx v13,0,r9 + + addis r9,r10,ha16(vectortmpwork-"L00000000001$pb") + addi r9,r9,lo16(vectortmpwork-"L00000000001$pb") + stw r6,0(r9) + li r6,8 + stw r6,4(r9) + lvx v9,0,r9 + li r9,128 + vspltw v8,v9,0 + vspltw v9,v9,1 + +;; elt counter + li r9,0 + lis r7,0x0F01 + b L7 +.align 4 +L7: + lvx v1,r9,r3 + + vperm v4,v1,v0,v10 + ;********************* + add r10,r9,r3 + ;********************* + vperm v5,v1,v0,v11 + vperm v6,v1,v0,v12 + vperm v7,v1,v0,v13 + + vmulouh v4,v4,v8 + ;********************* + dst r10,r7,3 + ;********************* + vmulouh v5,v5,v8 + vmulouh v6,v6,v8 + vmulouh v7,v7,v8 + vsrw v4,v4,v9 + vsrw v5,v5,v9 + vsrw v6,v6,v9 + vsrw v7,v7,v9 + + vpkuwus v4,v4,v5 + vpkuwus v6,v6,v7 + vpkuhus v1,v4,v6 + + stvx v1,r9,r4 + addi r9,r9,16 + + bdnz L7 + + mtspr 256,r11 + blr + + +.globl _ppc_brightness_G5 +.align 3 +_ppc_brightness_G5: + +;; PowerPC Altivec G5 code + srwi r5,r5,2 + mtctr r5 + +;;vrsave + mfspr r11,256 + lis r12,0xCFFC + mtspr 256,r12 + + mflr r0 + bcl 20,31,"L00000000002$pb" +"L00000000002$pb": + mflr r10 + mtlr r0 + + addis r9,r10,ha16(vectorZERO-"L00000000002$pb") + addi r9,r9,lo16(vectorZERO-"L00000000002$pb") + + vxor v0,v0,v0 ;; V0 = NULL vector + + addi r9,r9,16 + lvx v10,0,r9 + addi r9,r9,16 + lvx v11,0,r9 + addi r9,r9,16 + lvx v12,0,r9 + addi r9,r9,16 + lvx v13,0,r9 + + addis r9,r10,ha16(vectortmpwork-"L00000000002$pb") + addi r9,r9,lo16(vectortmpwork-"L00000000002$pb") + stw r6,0(r9) + li r6,8 + stw r6,4(r9) + lvx v9,0,r9 + li r9,128 + vspltw v8,v9,0 + vspltw v9,v9,1 + +;; elt counter + li r9,0 + lis r7,0x0F01 + b L6 +.align 4 +L6: + lvx v1,r9,r3 + + vperm v4,v1,v0,v10 + ;********************* + add r10,r9,r3 + ;********************* + vperm v5,v1,v0,v11 + vperm v6,v1,v0,v12 + vperm v7,v1,v0,v13 + + vmulouh v4,v4,v8 + vmulouh v5,v5,v8 + vmulouh v6,v6,v8 + vmulouh v7,v7,v8 + vsrw v4,v4,v9 + vsrw v5,v5,v9 + vsrw v6,v6,v9 + vsrw v7,v7,v9 + + vpkuwus v4,v4,v5 + vpkuwus v6,v6,v7 + vpkuhus v1,v4,v6 + + stvx v1,r9,r4 + addi r9,r9,16 + + bdnz L6 + + mtspr 256,r11 + blr + + +.globl _ppc_brightness_generic +.align 3 +_ppc_brightness_generic: + lis r12,0x00FF + ori r12,r12,0x00FF + subi r3,r3,4 + subi r4,r4,4 + mtctr r5 + b L1 +.align 4 +L1: + lwzu r7,4(r3) + + rlwinm r8,r7,16,24,31 + rlwinm r9,r7,24,24,31 + mullw r8,r8,r6 + rlwinm r10,r7,0,24,31 + mullw r9,r9,r6 + srwi r8,r8,8 + mullw r10,r10,r6 + srwi r9,r9,8 + + rlwinm. r11,r8,0,0,23 + beq L2 + li r8,0xFF +L2: + srwi r10,r10,8 + rlwinm. r11,r9,0,0,23 + beq L3 + li r9,0xFF +L3: + rlwinm r7,r8,16,8,15 + rlwinm. r11,r10,0,0,23 + beq L4 + li r10,0xFF +L4: + rlwimi r7,r9,8,16,23 + rlwimi r7,r10,0,24,31 + + stwu r7,4(r4) + bdnz L1 + + blr + + + +.static_data +.align 4 +vectortmpwork: + .long 0,0,0,0 + |