diff --git a/include/mtl/armv4t/asm/math.s b/include/mtl/armv4t/asm/math.s index 9333c3e..a3f38b2 100644 --- a/include/mtl/armv4t/asm/math.s +++ b/include/mtl/armv4t/asm/math.s @@ -7,7 +7,8 @@ * 0xCCCCCCCD >> 35 approximately equals 0.1 * Performs a 64 bit multiply of 0xCCCCCCCD and rx, shifts the high 32 bits * by 3, and discards the low bits. This results in a division by 10 that - * works for all unsigned values of rx + * works for all unsigned values of rx. This satifies the constraints of + * the Granlund-Montgomery integer division algorithm. */ .macro udiv10 rd, rx, rt ldr \rt, =0xCCCCCCCD @@ -33,10 +34,20 @@ lsrs \rd, $13 .endm +/* + * When using the Granlund-Montgomery integer division algorithm, the magic + * number produced does not fit inside the int32 range. GM produces: + * m = 0x14F8B588F + * k = 17 + * + * This division uses the output produces by clang for a division by 100000. + * I don't understand why it works, but it does. + */ .macro udiv100000 rd, rx, rt - ldr \rt, =0x29f17 + lsr \rx, $5 + ldr \rt, =0xA7C5AC5 umull \rt, \rd, \rx, \rt - lsrs \rd, $2 + lsrs \rd, $7 .endm .macro udiv1000000 rd, rx, rt @@ -57,10 +68,14 @@ lsrs \rd, $25 .endm +/* + * Same situation as udiv100000 + */ .macro udiv1000000000 rd, rx, rt - ldr \rt, =0x44b82fa1 + lsr \rx, $9 + ldr \rt, =0x44B83 umull \rt, \rd, \rx, \rt - lsrs \rd, $28 + lsrs \rd, $7 .endm /*