From d1155befdba878bba170f7f19d88dfbd77e879e6 Mon Sep 17 00:00:00 2001 From: Madeline Busig Date: Mon, 1 Apr 2024 19:45:51 -0600 Subject: [PATCH] Fix inaccurate implementation of udiv100000 and udiv1000000000 Both assembly macros failed when given large numbers ending in 9. For example, udiv100000 of 3999999999 produced 40000 instead of 39999. Similarly, udiv1000000000 of 3999999999 produced 4 instead of 3. Both of the previous implementations failed the Granlund-Montgomery integer division algorithm. This commit replaces these macros with the correct implementation generated by clang for a constant integer division. I do not understand how this implementation works. All other macros do pass the Granlund-Montgomery algorithm. --- include/mtl/armv4t/asm/math.s | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/include/mtl/armv4t/asm/math.s b/include/mtl/armv4t/asm/math.s index 9333c3e..a3f38b2 100644 --- a/include/mtl/armv4t/asm/math.s +++ b/include/mtl/armv4t/asm/math.s @@ -7,7 +7,8 @@ * 0xCCCCCCCD >> 35 approximately equals 0.1 * Performs a 64 bit multiply of 0xCCCCCCCD and rx, shifts the high 32 bits * by 3, and discards the low bits. This results in a division by 10 that - * works for all unsigned values of rx + * works for all unsigned values of rx. This satifies the constraints of + * the Granlund-Montgomery integer division algorithm. */ .macro udiv10 rd, rx, rt ldr \rt, =0xCCCCCCCD @@ -33,10 +34,20 @@ lsrs \rd, $13 .endm +/* + * When using the Granlund-Montgomery integer division algorithm, the magic + * number produced does not fit inside the int32 range. GM produces: + * m = 0x14F8B588F + * k = 17 + * + * This division uses the output produces by clang for a division by 100000. + * I don't understand why it works, but it does. + */ .macro udiv100000 rd, rx, rt - ldr \rt, =0x29f17 + lsr \rx, $5 + ldr \rt, =0xA7C5AC5 umull \rt, \rd, \rx, \rt - lsrs \rd, $2 + lsrs \rd, $7 .endm .macro udiv1000000 rd, rx, rt @@ -57,10 +68,14 @@ lsrs \rd, $25 .endm +/* + * Same situation as udiv100000 + */ .macro udiv1000000000 rd, rx, rt - ldr \rt, =0x44b82fa1 + lsr \rx, $9 + ldr \rt, =0x44B83 umull \rt, \rd, \rx, \rt - lsrs \rd, $28 + lsrs \rd, $7 .endm /*