Add armv4t assembly optimized division and modulo by 10

This commit is contained in:
Myles Busig 2024-03-24 00:30:56 -06:00
parent 6d5c083616
commit 998e3adf4d
2 changed files with 51 additions and 0 deletions

View File

@ -0,0 +1,30 @@
.syntax unified
/*
* Calculates rx / 10 and places the result in rd. Clobbers the value of
* temporary register rt. The value in rx is unmodified.
*
* 0xCCCCCCCD >> 35 approximately equals 0.1
* Performs a 64 bit multiply of 0xCCCCCCCD and rx, shifts the high 32 bits
* by 3, and discards the low bits. This results in a division by 10 that
* works for all unsigned values of rx
*/
.macro udiv10 rd, rx, rt
ldr \rt, =0xCCCCCCCD
umull \rt, \rd, \rx, \rt
lsrs \rd, $3
.endm
/*
* Calculates rx % 10 and places the result in rd. Clobbers the value of
* temporary register rt
*
* Calculates the modulo by calculating the truncated division by 10,
* multiplying by 10, and finding the difference between the original value.
*/
.macro umod10 rd, rx, rt
udiv10 \rd, \rx, \rt
mov \rt, $10
mul \rd, \rt
subs \rd, \rx, \rd
.endm

21
src/armv4t/math.s Normal file
View File

@ -0,0 +1,21 @@
.syntax unified
.include "mtl/armv4t/asm/math.s"
.section .iwram, "ax", %progbits
.arm
.align 2
.global mtl_udiv10
.type mtl_udiv10 STT_FUNC
mtl_udiv10:
udiv10 r1, r0, r2
mov r0, r1
bx lr
.global mtl_umod10
.type mtl_umod10 STT_FUNC
mtl_umod10:
umod10 r1, r0, r2
mov r0, r1
bx lr