summaryrefslogtreecommitdiffstats
path: root/gst/goom/ppc_drawings.s
diff options
context:
space:
mode:
authorBastien Nocera <hadess@hadess.net>2008-02-23 01:51:37 +0000
committerBastien Nocera <hadess@hadess.net>2008-02-23 01:51:37 +0000
commita7bc7485b1a4d7e1b1a12ff593ca4ccb1d59e466 (patch)
treeffba99ad38c7616d089c5e728c75a6bd5f736c6c /gst/goom/ppc_drawings.s
parent7f0745bb7f26c69766bb0c64458c6543588cc4dc (diff)
configure.ac: Add checks for Flex/Yacc/Bison and other furry animals, for the new goom 2k4 based plugin
Original commit message from CVS: 2008-02-23 Bastien Nocera <hadess@hadess.net> * configure.ac: Add checks for Flex/Yacc/Bison and other furry animals, for the new goom 2k4 based plugin * gst/goom/*: Update to use goom 2k4, uses liboil to detect CPU optimisations (not working yet), move the old plugin to... * gst/goom2k1/*: ... here, in case somebody is sick enough Fixes #515073
Diffstat (limited to 'gst/goom/ppc_drawings.s')
-rw-r--r--gst/goom/ppc_drawings.s381
1 files changed, 381 insertions, 0 deletions
diff --git a/gst/goom/ppc_drawings.s b/gst/goom/ppc_drawings.s
new file mode 100644
index 00000000..845e5ea1
--- /dev/null
+++ b/gst/goom/ppc_drawings.s
@@ -0,0 +1,381 @@
+; PowerPC optimized drawing methods for Goom
+; © 2003 Guillaume Borios
+; This Source Code is released under the terms of the General Public License
+
+; Change log :
+; 30 May 2003 : File creation
+
+; Section definition : We use a read only code section for the whole file
+.section __TEXT,__text,regular,pure_instructions
+
+
+; --------------------------------------------------------------------------------------
+; Single 32b pixel drawing macros
+; Usage :
+; DRAWMETHOD_XXXX_MACRO *pixelIN, *pixelOUT, COLOR, WR1, WR2, WR3, WR4
+; Only the work registers (WR) can be touched by the macros
+;
+; Available methods :
+; DRAWMETHOD_DFLT_MACRO : Default drawing method (Actually OVRW)
+; DRAWMETHOD_PLUS_MACRO : RVB Saturated per channel addition (SLOWEST)
+; DRAWMETHOD_HALF_MACRO : 50% Transparency color drawing
+; DRAWMETHOD_OVRW_MACRO : Direct COLOR drawing (FASTEST)
+; DRAWMETHOD_B_OR_MACRO : Bitwise OR
+; DRAWMETHOD_BAND_MACRO : Bitwise AND
+; DRAWMETHOD_BXOR_MACRO : Bitwise XOR
+; DRAWMETHOD_BNOT_MACRO : Bitwise NOT
+; --------------------------------------------------------------------------------------
+
+.macro DRAWMETHOD_OVRW_MACRO
+ stw $2,0($1) ;; *$1 <- $2
+.endmacro
+
+.macro DRAWMETHOD_B_OR_MACRO
+ lwz $3,0($0) ;; $3 <- *$0
+ or $3,$3,$2 ;; $3 <- $3 | $2
+ stw $3,0($1) ;; *$1 <- $3
+.endmacro
+
+.macro DRAWMETHOD_BAND_MACRO
+ lwz $3,0($0) ;; $3 <- *$0
+ and $3,$3,$2 ;; $3 <- $3 & $2
+ stw $3,0($1) ;; *$1 <- $3
+.endmacro
+
+.macro DRAWMETHOD_BXOR_MACRO
+ lwz $3,0($0) ;; $3 <- *$0
+ xor $3,$3,$2 ;; $3 <- $3 ^ $2
+ stw $3,0($1) ;; *$1 <- $3
+.endmacro
+
+.macro DRAWMETHOD_BNOT_MACRO
+ lwz $3,0($0) ;; $3 <- *$0
+ nand $3,$3,$3 ;; $3 <- ~$3
+ stw $3,0($1) ;; *$1 <- $3
+.endmacro
+
+.macro DRAWMETHOD_PLUS_MACRO
+ lwz $4,0($0) ;; $4 <- *$0
+ andi. $3,$4,0xFF00 ;; $3 <- $4 & 0x0000FF00
+ andi. $5,$2,0xFF00 ;; $5 <- $2 & 0x0000FF00
+ add $3,$3,$5 ;; $3 <- $3 + $5
+ rlwinm $5,$3,15,0,0 ;; $5 <- 0 | ($3[15] << 15)
+ srawi $5,$5,23 ;; $5 <- $5 >> 23 (algebraic for sign extension)
+ or $3,$3,$5 ;; $3 <- $3 | $5
+ lis $5,0xFF ;; $5 <- 0x00FF00FF
+ addi $5,$5,0xFF
+ and $4,$4,$5 ;; $4 <- $4 & $5
+ and $6,$2,$5 ;; $6 <- $2 & $5
+ add $4,$4,$6 ;; $4 <- $4 + $6
+ rlwinm $6,$4,7,0,0 ;; $6 <- 0 | ($4[7] << 7)
+ srawi $6,$6,15 ;; $6 <- $6 >> 15 (algebraic for sign extension)
+ rlwinm $5,$4,23,0,0 ;; $5 <- 0 | ($4[23] << 23)
+ srawi $5,$5,31 ;; $5 <- $5 >> 31 (algebraic for sign extension)
+ rlwimi $6,$5,0,24,31 ;; $6[24..31] <- $5[24..31]
+ or $4,$4,$6 ;; $4 <- $4 | $6
+ rlwimi $4,$3,0,16,23 ;; $4[16..23] <- $3[16..23]
+ stw $4,0($1) ;; *$1 <- $4
+.endmacro
+
+.macro DRAWMETHOD_HALF_MACRO
+ lwz $4,0($0) ;; $4 <- *$0
+ andi. $3,$4,0xFF00 ;; $3 <- $4 & 0x0000FF00
+ andi. $5,$2,0xFF00 ;; $5 <- $2 & 0x0000FF00
+ add $3,$3,$5 ;; $3 <- $3 + $5
+ lis $5,0xFF ;; $5 <- 0x00FF00FF
+ addi $5,$5,0xFF
+ and $4,$4,$5 ;; $4 <- $4 & $5
+ and $5,$2,$5 ;; $5 <- $2 & $5
+ add $4,$4,$5 ;; $4 <- $4 + $5
+ srwi $4,$4,1 ;; $4 <- $4 >> 1
+ rlwimi $4,$3,31,16,23 ;; $4[16..23] <- $3[15..22]
+ stw $4,0($1) ;; *$1 <- $4
+.endmacro
+
+.macro DRAWMETHOD_DFLT_MACRO
+ DRAWMETHOD_PLUS_MACRO
+.endmacro
+
+; --------------------------------------------------------------------------------------
+
+
+
+; **************************************************************************************
+; void DRAWMETHOD_PLUS_PPC(unsigned int * buf, unsigned int _col);
+; void DRAWMETHOD_PLUS_2_PPC(unsigned * in, unsigned int * out, unsigned int _col);
+; **************************************************************************************
+.globl _DRAWMETHOD_PLUS_2_PPC
+.align 3
+_DRAWMETHOD_PLUS_2_PPC:
+ DRAWMETHOD_PLUS_MACRO r3,r4,r5,r6,r7,r8,r9
+ blr ;; return
+
+.globl _DRAWMETHOD_PLUS_PPC
+.align 3
+_DRAWMETHOD_PLUS_PPC:
+ DRAWMETHOD_PLUS_MACRO r3,r3,r4,r5,r6,r7,r9
+ blr ;; return
+
+
+; **************************************************************************************
+; void DRAWMETHOD_HALF_PPC(unsigned int * buf, unsigned int _col);
+; void DRAWMETHOD_HALF_2_PPC(unsigned * in, unsigned int * out, unsigned int _col);
+; **************************************************************************************
+.globl _DRAWMETHOD_HALF_2_PPC
+.align 3
+_DRAWMETHOD_HALF_2_PPC:
+ DRAWMETHOD_HALF_MACRO r3,r4,r5,r6,r7,r8
+ blr ;; return
+
+.globl _DRAWMETHOD_HALF_PPC
+.align 3
+_DRAWMETHOD_HALF_PPC:
+ DRAWMETHOD_HALF_MACRO r3,r3,r4,r5,r6,r7
+ blr ;; return
+
+
+; **************************************************************************************
+; void DRAW_LINE_PPC(unsigned int *data, int x1, int y1, int x2, int y2, unsigned int col,
+; unsigned int screenx, unsigned int screeny)
+; **************************************************************************************
+.globl _DRAW_LINE_PPC
+.align 3
+_DRAW_LINE_PPC:
+ ;; NOT IMPLEMENTED YET
+ blr ;; return
+
+
+; **************************************************************************************
+; void _ppc_brightness(Pixel * src, Pixel * dest, unsigned int size, unsigned int coeff)
+; **************************************************************************************
+
+
+.const
+.align 4
+vectorZERO:
+ .long 0,0,0,0
+ .long 0x10101000, 0x10101001, 0x10101002, 0x10101003
+ .long 0x10101004, 0x10101005, 0x10101006, 0x10101007
+ .long 0x10101008, 0x10101009, 0x1010100A, 0x1010100B
+ .long 0x1010100C, 0x1010100D, 0x1010100E, 0x1010100F
+
+
+.section __TEXT,__text,regular,pure_instructions
+
+.globl _ppc_brightness_G4
+.align 3
+_ppc_brightness_G4:
+
+
+;; PowerPC Altivec code
+ srwi r5,r5,2
+ mtctr r5
+
+;;vrsave
+ mfspr r11,256
+ lis r12,0xCFFC
+ mtspr 256,r12
+
+ mflr r0
+ bcl 20,31,"L00000000001$pb"
+"L00000000001$pb":
+ mflr r10
+ mtlr r0
+
+ addis r9,r10,ha16(vectorZERO-"L00000000001$pb")
+ addi r9,r9,lo16(vectorZERO-"L00000000001$pb")
+
+ vxor v0,v0,v0 ;; V0 = NULL vector
+
+ addi r9,r9,16
+ lvx v10,0,r9
+ addi r9,r9,16
+ lvx v11,0,r9
+ addi r9,r9,16
+ lvx v12,0,r9
+ addi r9,r9,16
+ lvx v13,0,r9
+
+ addis r9,r10,ha16(vectortmpwork-"L00000000001$pb")
+ addi r9,r9,lo16(vectortmpwork-"L00000000001$pb")
+ stw r6,0(r9)
+ li r6,8
+ stw r6,4(r9)
+ lvx v9,0,r9
+ li r9,128
+ vspltw v8,v9,0
+ vspltw v9,v9,1
+
+;; elt counter
+ li r9,0
+ lis r7,0x0F01
+ b L7
+.align 4
+L7:
+ lvx v1,r9,r3
+
+ vperm v4,v1,v0,v10
+ ;*********************
+ add r10,r9,r3
+ ;*********************
+ vperm v5,v1,v0,v11
+ vperm v6,v1,v0,v12
+ vperm v7,v1,v0,v13
+
+ vmulouh v4,v4,v8
+ ;*********************
+ dst r10,r7,3
+ ;*********************
+ vmulouh v5,v5,v8
+ vmulouh v6,v6,v8
+ vmulouh v7,v7,v8
+ vsrw v4,v4,v9
+ vsrw v5,v5,v9
+ vsrw v6,v6,v9
+ vsrw v7,v7,v9
+
+ vpkuwus v4,v4,v5
+ vpkuwus v6,v6,v7
+ vpkuhus v1,v4,v6
+
+ stvx v1,r9,r4
+ addi r9,r9,16
+
+ bdnz L7
+
+ mtspr 256,r11
+ blr
+
+
+.globl _ppc_brightness_G5
+.align 3
+_ppc_brightness_G5:
+
+;; PowerPC Altivec G5 code
+ srwi r5,r5,2
+ mtctr r5
+
+;;vrsave
+ mfspr r11,256
+ lis r12,0xCFFC
+ mtspr 256,r12
+
+ mflr r0
+ bcl 20,31,"L00000000002$pb"
+"L00000000002$pb":
+ mflr r10
+ mtlr r0
+
+ addis r9,r10,ha16(vectorZERO-"L00000000002$pb")
+ addi r9,r9,lo16(vectorZERO-"L00000000002$pb")
+
+ vxor v0,v0,v0 ;; V0 = NULL vector
+
+ addi r9,r9,16
+ lvx v10,0,r9
+ addi r9,r9,16
+ lvx v11,0,r9
+ addi r9,r9,16
+ lvx v12,0,r9
+ addi r9,r9,16
+ lvx v13,0,r9
+
+ addis r9,r10,ha16(vectortmpwork-"L00000000002$pb")
+ addi r9,r9,lo16(vectortmpwork-"L00000000002$pb")
+ stw r6,0(r9)
+ li r6,8
+ stw r6,4(r9)
+ lvx v9,0,r9
+ li r9,128
+ vspltw v8,v9,0
+ vspltw v9,v9,1
+
+;; elt counter
+ li r9,0
+ lis r7,0x0F01
+ b L6
+.align 4
+L6:
+ lvx v1,r9,r3
+
+ vperm v4,v1,v0,v10
+ ;*********************
+ add r10,r9,r3
+ ;*********************
+ vperm v5,v1,v0,v11
+ vperm v6,v1,v0,v12
+ vperm v7,v1,v0,v13
+
+ vmulouh v4,v4,v8
+ vmulouh v5,v5,v8
+ vmulouh v6,v6,v8
+ vmulouh v7,v7,v8
+ vsrw v4,v4,v9
+ vsrw v5,v5,v9
+ vsrw v6,v6,v9
+ vsrw v7,v7,v9
+
+ vpkuwus v4,v4,v5
+ vpkuwus v6,v6,v7
+ vpkuhus v1,v4,v6
+
+ stvx v1,r9,r4
+ addi r9,r9,16
+
+ bdnz L6
+
+ mtspr 256,r11
+ blr
+
+
+.globl _ppc_brightness_generic
+.align 3
+_ppc_brightness_generic:
+ lis r12,0x00FF
+ ori r12,r12,0x00FF
+ subi r3,r3,4
+ subi r4,r4,4
+ mtctr r5
+ b L1
+.align 4
+L1:
+ lwzu r7,4(r3)
+
+ rlwinm r8,r7,16,24,31
+ rlwinm r9,r7,24,24,31
+ mullw r8,r8,r6
+ rlwinm r10,r7,0,24,31
+ mullw r9,r9,r6
+ srwi r8,r8,8
+ mullw r10,r10,r6
+ srwi r9,r9,8
+
+ rlwinm. r11,r8,0,0,23
+ beq L2
+ li r8,0xFF
+L2:
+ srwi r10,r10,8
+ rlwinm. r11,r9,0,0,23
+ beq L3
+ li r9,0xFF
+L3:
+ rlwinm r7,r8,16,8,15
+ rlwinm. r11,r10,0,0,23
+ beq L4
+ li r10,0xFF
+L4:
+ rlwimi r7,r9,8,16,23
+ rlwimi r7,r10,0,24,31
+
+ stwu r7,4(r4)
+ bdnz L1
+
+ blr
+
+
+
+.static_data
+.align 4
+vectortmpwork:
+ .long 0,0,0,0
+