Fix inaccurate implementation of udiv100000 and udiv1000000000

Both assembly macros failed when given large numbers ending in 9. For
example, udiv100000 of 3999999999 produced 40000 instead of 39999.
Similarly, udiv1000000000 of 3999999999 produced 4 instead of 3.

Both of the previous implementations failed the Granlund-Montgomery
integer division algorithm. This commit replaces these macros with the
correct implementation generated by clang for a constant integer
division. I do not understand how this implementation works. All other
macros do pass the Granlund-Montgomery algorithm.
This commit is contained in:
Madeline Busig 2024-04-01 19:45:51 -06:00
parent 1b48b5ec80
commit d1155befdb

View File

@ -7,7 +7,8 @@
* 0xCCCCCCCD >> 35 approximately equals 0.1
* Performs a 64 bit multiply of 0xCCCCCCCD and rx, shifts the high 32 bits
* by 3, and discards the low bits. This results in a division by 10 that
* works for all unsigned values of rx
* works for all unsigned values of rx. This satifies the constraints of
* the Granlund-Montgomery integer division algorithm.
*/
.macro udiv10 rd, rx, rt
ldr \rt, =0xCCCCCCCD
@ -33,10 +34,20 @@
lsrs \rd, $13
.endm
/*
* When using the Granlund-Montgomery integer division algorithm, the magic
* number produced does not fit inside the int32 range. GM produces:
* m = 0x14F8B588F
* k = 17
*
* This division uses the output produces by clang for a division by 100000.
* I don't understand why it works, but it does.
*/
.macro udiv100000 rd, rx, rt
ldr \rt, =0x29f17
lsr \rx, $5
ldr \rt, =0xA7C5AC5
umull \rt, \rd, \rx, \rt
lsrs \rd, $2
lsrs \rd, $7
.endm
.macro udiv1000000 rd, rx, rt
@ -57,10 +68,14 @@
lsrs \rd, $25
.endm
/*
* Same situation as udiv100000
*/
.macro udiv1000000000 rd, rx, rt
ldr \rt, =0x44b82fa1
lsr \rx, $9
ldr \rt, =0x44B83
umull \rt, \rd, \rx, \rt
lsrs \rd, $28
lsrs \rd, $7
.endm
/*