; PowerPC optimized drawing methods for Goom ; © 2003 Guillaume Borios ; This library is free software; you can redistribute it and/or ; modify it under the terms of the GNU Library General Public ; License as published by the Free Software Foundation; either ; version 2 of the License, or (at your option) any later version. ; ; This library is distributed in the hope that it will be useful, ; but WITHOUT ANY WARRANTY; without even the implied warranty of ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ; Library General Public License for more details. ; ; You should have received a copy of the GNU Library General Public ; License along with this library; if not, write to the ; Free Software Foundation, Inc., 59 Temple Place - Suite 330, ; Boston, MA 02111-1307, USA. ; Change log : ; 30 May 2003 : File creation ; Section definition : We use a read only code section for the whole file .section __TEXT,__text,regular,pure_instructions ; -------------------------------------------------------------------------------------- ; Single 32b pixel drawing macros ; Usage : ; DRAWMETHOD_XXXX_MACRO *pixelIN, *pixelOUT, COLOR, WR1, WR2, WR3, WR4 ; Only the work registers (WR) can be touched by the macros ; ; Available methods : ; DRAWMETHOD_DFLT_MACRO : Default drawing method (Actually OVRW) ; DRAWMETHOD_PLUS_MACRO : RVB Saturated per channel addition (SLOWEST) ; DRAWMETHOD_HALF_MACRO : 50% Transparency color drawing ; DRAWMETHOD_OVRW_MACRO : Direct COLOR drawing (FASTEST) ; DRAWMETHOD_B_OR_MACRO : Bitwise OR ; DRAWMETHOD_BAND_MACRO : Bitwise AND ; DRAWMETHOD_BXOR_MACRO : Bitwise XOR ; DRAWMETHOD_BNOT_MACRO : Bitwise NOT ; -------------------------------------------------------------------------------------- .macro DRAWMETHOD_OVRW_MACRO stw $2,0($1) ;; *$1 <- $2 .endmacro .macro DRAWMETHOD_B_OR_MACRO lwz $3,0($0) ;; $3 <- *$0 or $3,$3,$2 ;; $3 <- $3 | $2 stw $3,0($1) ;; *$1 <- $3 .endmacro .macro DRAWMETHOD_BAND_MACRO lwz $3,0($0) ;; $3 <- *$0 and $3,$3,$2 ;; $3 <- $3 & $2 stw $3,0($1) ;; *$1 <- $3 .endmacro .macro DRAWMETHOD_BXOR_MACRO lwz $3,0($0) ;; $3 <- *$0 xor $3,$3,$2 ;; $3 <- $3 ^ $2 stw $3,0($1) ;; *$1 <- $3 .endmacro .macro DRAWMETHOD_BNOT_MACRO lwz $3,0($0) ;; $3 <- *$0 nand $3,$3,$3 ;; $3 <- ~$3 stw $3,0($1) ;; *$1 <- $3 .endmacro .macro DRAWMETHOD_PLUS_MACRO lwz $4,0($0) ;; $4 <- *$0 andi. $3,$4,0xFF00 ;; $3 <- $4 & 0x0000FF00 andi. $5,$2,0xFF00 ;; $5 <- $2 & 0x0000FF00 add $3,$3,$5 ;; $3 <- $3 + $5 rlwinm $5,$3,15,0,0 ;; $5 <- 0 | ($3[15] << 15) srawi $5,$5,23 ;; $5 <- $5 >> 23 (algebraic for sign extension) or $3,$3,$5 ;; $3 <- $3 | $5 lis $5,0xFF ;; $5 <- 0x00FF00FF addi $5,$5,0xFF and $4,$4,$5 ;; $4 <- $4 & $5 and $6,$2,$5 ;; $6 <- $2 & $5 add $4,$4,$6 ;; $4 <- $4 + $6 rlwinm $6,$4,7,0,0 ;; $6 <- 0 | ($4[7] << 7) srawi $6,$6,15 ;; $6 <- $6 >> 15 (algebraic for sign extension) rlwinm $5,$4,23,0,0 ;; $5 <- 0 | ($4[23] << 23) srawi $5,$5,31 ;; $5 <- $5 >> 31 (algebraic for sign extension) rlwimi $6,$5,0,24,31 ;; $6[24..31] <- $5[24..31] or $4,$4,$6 ;; $4 <- $4 | $6 rlwimi $4,$3,0,16,23 ;; $4[16..23] <- $3[16..23] stw $4,0($1) ;; *$1 <- $4 .endmacro .macro DRAWMETHOD_HALF_MACRO lwz $4,0($0) ;; $4 <- *$0 andi. $3,$4,0xFF00 ;; $3 <- $4 & 0x0000FF00 andi. $5,$2,0xFF00 ;; $5 <- $2 & 0x0000FF00 add $3,$3,$5 ;; $3 <- $3 + $5 lis $5,0xFF ;; $5 <- 0x00FF00FF addi $5,$5,0xFF and $4,$4,$5 ;; $4 <- $4 & $5 and $5,$2,$5 ;; $5 <- $2 & $5 add $4,$4,$5 ;; $4 <- $4 + $5 srwi $4,$4,1 ;; $4 <- $4 >> 1 rlwimi $4,$3,31,16,23 ;; $4[16..23] <- $3[15..22] stw $4,0($1) ;; *$1 <- $4 .endmacro .macro DRAWMETHOD_DFLT_MACRO DRAWMETHOD_PLUS_MACRO .endmacro ; -------------------------------------------------------------------------------------- ; ************************************************************************************** ; void DRAWMETHOD_PLUS_PPC(unsigned int * buf, unsigned int _col); ; void DRAWMETHOD_PLUS_2_PPC(unsigned * in, unsigned int * out, unsigned int _col); ; ************************************************************************************** .globl _DRAWMETHOD_PLUS_2_PPC .align 3 _DRAWMETHOD_PLUS_2_PPC: DRAWMETHOD_PLUS_MACRO r3,r4,r5,r6,r7,r8,r9 blr ;; return .globl _DRAWMETHOD_PLUS_PPC .align 3 _DRAWMETHOD_PLUS_PPC: DRAWMETHOD_PLUS_MACRO r3,r3,r4,r5,r6,r7,r9 blr ;; return ; ************************************************************************************** ; void DRAWMETHOD_HALF_PPC(unsigned int * buf, unsigned int _col); ; void DRAWMETHOD_HALF_2_PPC(unsigned * in, unsigned int * out, unsigned int _col); ; ************************************************************************************** .globl _DRAWMETHOD_HALF_2_PPC .align 3 _DRAWMETHOD_HALF_2_PPC: DRAWMETHOD_HALF_MACRO r3,r4,r5,r6,r7,r8 blr ;; return .globl _DRAWMETHOD_HALF_PPC .align 3 _DRAWMETHOD_HALF_PPC: DRAWMETHOD_HALF_MACRO r3,r3,r4,r5,r6,r7 blr ;; return ; ************************************************************************************** ; void DRAW_LINE_PPC(unsigned int *data, int x1, int y1, int x2, int y2, unsigned int col, ; unsigned int screenx, unsigned int screeny) ; ************************************************************************************** .globl _DRAW_LINE_PPC .align 3 _DRAW_LINE_PPC: ;; NOT IMPLEMENTED YET blr ;; return ; ************************************************************************************** ; void _ppc_brightness(Pixel * src, Pixel * dest, unsigned int size, unsigned int coeff) ; ************************************************************************************** .const .align 4 vectorZERO: .long 0,0,0,0 .long 0x10101000, 0x10101001, 0x10101002, 0x10101003 .long 0x10101004, 0x10101005, 0x10101006, 0x10101007 .long 0x10101008, 0x10101009, 0x1010100A, 0x1010100B .long 0x1010100C, 0x1010100D, 0x1010100E, 0x1010100F .section __TEXT,__text,regular,pure_instructions .globl _ppc_brightness_G4 .align 3 _ppc_brightness_G4: ;; PowerPC Altivec code srwi r5,r5,2 mtctr r5 ;;vrsave mfspr r11,256 lis r12,0xCFFC mtspr 256,r12 mflr r0 bcl 20,31,"L00000000001$pb" "L00000000001$pb": mflr r10 mtlr r0 addis r9,r10,ha16(vectorZERO-"L00000000001$pb") addi r9,r9,lo16(vectorZERO-"L00000000001$pb") vxor v0,v0,v0 ;; V0 = NULL vector addi r9,r9,16 lvx v10,0,r9 addi r9,r9,16 lvx v11,0,r9 addi r9,r9,16 lvx v12,0,r9 addi r9,r9,16 lvx v13,0,r9 addis r9,r10,ha16(vectortmpwork-"L00000000001$pb") addi r9,r9,lo16(vectortmpwork-"L00000000001$pb") stw r6,0(r9) li r6,8 stw r6,4(r9) lvx v9,0,r9 li r9,128 vspltw v8,v9,0 vspltw v9,v9,1 ;; elt counter li r9,0 lis r7,0x0F01 b L7 .align 4 L7: lvx v1,r9,r3 vperm v4,v1,v0,v10 ;********************* add r10,r9,r3 ;********************* vperm v5,v1,v0,v11 vperm v6,v1,v0,v12 vperm v7,v1,v0,v13 vmulouh v4,v4,v8 ;********************* dst r10,r7,3 ;********************* vmulouh v5,v5,v8 vmulouh v6,v6,v8 vmulouh v7,v7,v8 vsrw v4,v4,v9 vsrw v5,v5,v9 vsrw v6,v6,v9 vsrw v7,v7,v9 vpkuwus v4,v4,v5 vpkuwus v6,v6,v7 vpkuhus v1,v4,v6 stvx v1,r9,r4 addi r9,r9,16 bdnz L7 mtspr 256,r11 blr .globl _ppc_brightness_G5 .align 3 _ppc_brightness_G5: ;; PowerPC Altivec G5 code srwi r5,r5,2 mtctr r5 ;;vrsave mfspr r11,256 lis r12,0xCFFC mtspr 256,r12 mflr r0 bcl 20,31,"L00000000002$pb" "L00000000002$pb": mflr r10 mtlr r0 addis r9,r10,ha16(vectorZERO-"L00000000002$pb") addi r9,r9,lo16(vectorZERO-"L00000000002$pb") vxor v0,v0,v0 ;; V0 = NULL vector addi r9,r9,16 lvx v10,0,r9 addi r9,r9,16 lvx v11,0,r9 addi r9,r9,16 lvx v12,0,r9 addi r9,r9,16 lvx v13,0,r9 addis r9,r10,ha16(vectortmpwork-"L00000000002$pb") addi r9,r9,lo16(vectortmpwork-"L00000000002$pb") stw r6,0(r9) li r6,8 stw r6,4(r9) lvx v9,0,r9 li r9,128 vspltw v8,v9,0 vspltw v9,v9,1 ;; elt counter li r9,0 lis r7,0x0F01 b L6 .align 4 L6: lvx v1,r9,r3 vperm v4,v1,v0,v10 ;********************* add r10,r9,r3 ;********************* vperm v5,v1,v0,v11 vperm v6,v1,v0,v12 vperm v7,v1,v0,v13 vmulouh v4,v4,v8 vmulouh v5,v5,v8 vmulouh v6,v6,v8 vmulouh v7,v7,v8 vsrw v4,v4,v9 vsrw v5,v5,v9 vsrw v6,v6,v9 vsrw v7,v7,v9 vpkuwus v4,v4,v5 vpkuwus v6,v6,v7 vpkuhus v1,v4,v6 stvx v1,r9,r4 addi r9,r9,16 bdnz L6 mtspr 256,r11 blr .globl _ppc_brightness_generic .align 3 _ppc_brightness_generic: lis r12,0x00FF ori r12,r12,0x00FF subi r3,r3,4 subi r4,r4,4 mtctr r5 b L1 .align 4 L1: lwzu r7,4(r3) rlwinm r8,r7,16,24,31 rlwinm r9,r7,24,24,31 mullw r8,r8,r6 rlwinm r10,r7,0,24,31 mullw r9,r9,r6 srwi r8,r8,8 mullw r10,r10,r6 srwi r9,r9,8 rlwinm. r11,r8,0,0,23 beq L2 li r8,0xFF L2: srwi r10,r10,8 rlwinm. r11,r9,0,0,23 beq L3 li r9,0xFF L3: rlwinm r7,r8,16,8,15 rlwinm. r11,r10,0,0,23 beq L4 li r10,0xFF L4: rlwimi r7,r9,8,16,23 rlwimi r7,r10,0,24,31 stwu r7,4(r4) bdnz L1 blr .static_data .align 4 vectortmpwork: .long 0,0,0,0