Add armv4t assembly optimized division and modulo by 10
This commit is contained in:
parent
dcb91bac19
commit
2beab09f7d
30
include/armv4t/mtl/armv4t/asm/math.s
Normal file
30
include/armv4t/mtl/armv4t/asm/math.s
Normal file
@ -0,0 +1,30 @@
|
||||
.syntax unified
|
||||
|
||||
/*
|
||||
* Calculates rx / 10 and places the result in rd. Clobbers the value of
|
||||
* temporary register rt. The value in rx is unmodified.
|
||||
*
|
||||
* 0xCCCCCCCD >> 35 approximately equals 0.1
|
||||
* Performs a 64 bit multiply of 0xCCCCCCCD and rx, shifts the high 32 bits
|
||||
* by 3, and discards the low bits. This results in a division by 10 that
|
||||
* works for all unsigned values of rx
|
||||
*/
|
||||
.macro udiv10 rd, rx, rt
|
||||
ldr \rt, =0xCCCCCCCD
|
||||
umull \rt, \rd, \rx, \rt
|
||||
lsrs \rd, $3
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Calculates rx % 10 and places the result in rd. Clobbers the value of
|
||||
* temporary register rt
|
||||
*
|
||||
* Calculates the modulo by calculating the truncated division by 10,
|
||||
* multiplying by 10, and finding the difference between the original value.
|
||||
*/
|
||||
.macro umod10 rd, rx, rt
|
||||
udiv10 \rd, \rx, \rt
|
||||
mov \rt, $10
|
||||
mul \rd, \rt
|
||||
subs \rd, \rx, \rd
|
||||
.endm
|
||||
21
src/armv4t/math.s
Normal file
21
src/armv4t/math.s
Normal file
@ -0,0 +1,21 @@
|
||||
.syntax unified
|
||||
|
||||
.include "mtl/armv4t/asm/math.s"
|
||||
|
||||
.section .iwram, "ax", %progbits
|
||||
.arm
|
||||
.align 2
|
||||
|
||||
.global mtl_udiv10
|
||||
.type mtl_udiv10 STT_FUNC
|
||||
mtl_udiv10:
|
||||
udiv10 r1, r0, r2
|
||||
mov r0, r1
|
||||
bx lr
|
||||
|
||||
.global mtl_umod10
|
||||
.type mtl_umod10 STT_FUNC
|
||||
mtl_umod10:
|
||||
umod10 r1, r0, r2
|
||||
mov r0, r1
|
||||
bx lr
|
||||
Loading…
x
Reference in New Issue
Block a user