From 2beab09f7da6ff61fc7f96b1863ff0e40ee8ef49 Mon Sep 17 00:00:00 2001 From: Madeline Busig Date: Sun, 24 Mar 2024 00:30:56 -0600 Subject: [PATCH] Add armv4t assembly optimized division and modulo by 10 --- include/armv4t/mtl/armv4t/asm/math.s | 30 ++++++++++++++++++++++++++++ src/armv4t/math.s | 21 +++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 include/armv4t/mtl/armv4t/asm/math.s create mode 100644 src/armv4t/math.s diff --git a/include/armv4t/mtl/armv4t/asm/math.s b/include/armv4t/mtl/armv4t/asm/math.s new file mode 100644 index 0000000..2020215 --- /dev/null +++ b/include/armv4t/mtl/armv4t/asm/math.s @@ -0,0 +1,30 @@ +.syntax unified + +/* + * Calculates rx / 10 and places the result in rd. Clobbers the value of + * temporary register rt. The value in rx is unmodified. + * + * 0xCCCCCCCD >> 35 approximately equals 0.1 + * Performs a 64 bit multiply of 0xCCCCCCCD and rx, shifts the high 32 bits + * by 3, and discards the low bits. This results in a division by 10 that + * works for all unsigned values of rx + */ +.macro udiv10 rd, rx, rt + ldr \rt, =0xCCCCCCCD + umull \rt, \rd, \rx, \rt + lsrs \rd, $3 +.endm + +/* + * Calculates rx % 10 and places the result in rd. Clobbers the value of + * temporary register rt + * + * Calculates the modulo by calculating the truncated division by 10, + * multiplying by 10, and finding the difference between the original value. + */ +.macro umod10 rd, rx, rt + udiv10 \rd, \rx, \rt + mov \rt, $10 + mul \rd, \rt + subs \rd, \rx, \rd +.endm diff --git a/src/armv4t/math.s b/src/armv4t/math.s new file mode 100644 index 0000000..b7d33d9 --- /dev/null +++ b/src/armv4t/math.s @@ -0,0 +1,21 @@ +.syntax unified + +.include "mtl/armv4t/asm/math.s" + +.section .iwram, "ax", %progbits +.arm +.align 2 + +.global mtl_udiv10 +.type mtl_udiv10 STT_FUNC +mtl_udiv10: + udiv10 r1, r0, r2 + mov r0, r1 +bx lr + +.global mtl_umod10 +.type mtl_umod10 STT_FUNC +mtl_umod10: + umod10 r1, r0, r2 + mov r0, r1 +bx lr