Add armv4t assembly optimized division and modulo by 10
This commit is contained in:
parent
6d5c083616
commit
998e3adf4d
30
include/armv4t/mtl/armv4t/asm/math.s
Normal file
30
include/armv4t/mtl/armv4t/asm/math.s
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
.syntax unified
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Calculates rx / 10 and places the result in rd. Clobbers the value of
|
||||||
|
* temporary register rt. The value in rx is unmodified.
|
||||||
|
*
|
||||||
|
* 0xCCCCCCCD >> 35 approximately equals 0.1
|
||||||
|
* Performs a 64 bit multiply of 0xCCCCCCCD and rx, shifts the high 32 bits
|
||||||
|
* by 3, and discards the low bits. This results in a division by 10 that
|
||||||
|
* works for all unsigned values of rx
|
||||||
|
*/
|
||||||
|
.macro udiv10 rd, rx, rt
|
||||||
|
ldr \rt, =0xCCCCCCCD
|
||||||
|
umull \rt, \rd, \rx, \rt
|
||||||
|
lsrs \rd, $3
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Calculates rx % 10 and places the result in rd. Clobbers the value of
|
||||||
|
* temporary register rt
|
||||||
|
*
|
||||||
|
* Calculates the modulo by calculating the truncated division by 10,
|
||||||
|
* multiplying by 10, and finding the difference between the original value.
|
||||||
|
*/
|
||||||
|
.macro umod10 rd, rx, rt
|
||||||
|
udiv10 \rd, \rx, \rt
|
||||||
|
mov \rt, $10
|
||||||
|
mul \rd, \rt
|
||||||
|
subs \rd, \rx, \rd
|
||||||
|
.endm
|
||||||
21
src/armv4t/math.s
Normal file
21
src/armv4t/math.s
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
.syntax unified
|
||||||
|
|
||||||
|
.include "mtl/armv4t/asm/math.s"
|
||||||
|
|
||||||
|
.section .iwram, "ax", %progbits
|
||||||
|
.arm
|
||||||
|
.align 2
|
||||||
|
|
||||||
|
.global mtl_udiv10
|
||||||
|
.type mtl_udiv10 STT_FUNC
|
||||||
|
mtl_udiv10:
|
||||||
|
udiv10 r1, r0, r2
|
||||||
|
mov r0, r1
|
||||||
|
bx lr
|
||||||
|
|
||||||
|
.global mtl_umod10
|
||||||
|
.type mtl_umod10 STT_FUNC
|
||||||
|
mtl_umod10:
|
||||||
|
umod10 r1, r0, r2
|
||||||
|
mov r0, r1
|
||||||
|
bx lr
|
||||||
Loading…
x
Reference in New Issue
Block a user