Modify fixed-point numbers to use 8 bits for the decimal point

This was done because with 6 bits of precision, when computing a
projection matrix error would accumulate up to 0.078. Changing the
decimal point precision to 8 bits minimizes the affect of this error,
reducing it closer to 0.016. Although, this does decrease the maximum
value from around 33,000,000 to around 8,000,000, although this
shouldn't be an issue.
This commit is contained in:
Madeline Busig 2024-09-19 19:30:25 -07:00
parent 8f30ed4311
commit 094a4731c5
2 changed files with 24 additions and 17 deletions

View File

@ -9,13 +9,15 @@ namespace mtl {
/**
* \brief 32-bit Fixed point number
*
* Uses a base of 64. ie. the lower 6 bits are after the decimal place,
* the other 26 bits are before the decimal place.
* Uses a base of 256. ie. the lower 8 bits are after the decimal place,
* the next 23 bits are before the decimal place, and one negation bit.
*
* Valid values are in the range ~[-33'554'431.01, 33'554'432.98]
* Valid values are in the range ~[-8'388'306.000, 8'388'607.996]
*
* Has a maximum error of +/- 1/128 (~0.0078), integers are always
* exactly represented.
* Has a maximum error of +/- 1/512 (~0.0019), integers are always
* exactly represented. Keep in mind, this error accumulates each time an operation
* is performed. For example, when computing a vec4 projection matrix, the may
* diverge by closer to 0.0156.
*
* \par ARM
*
@ -57,7 +59,7 @@ public:
*/
template <typename T, std::enable_if_t<std::is_integral_v<T>, bool> = true>
ARM_MODE
constexpr fixed(T _i) noexcept : x(_i * 64) {}
constexpr fixed(T _i) noexcept : x(_i * 256) {}
/**
* \brief Floating point constructor
*
@ -73,7 +75,7 @@ public:
ARM_MODE
constexpr fixed(T _f) noexcept
// 0.5 offset accounts for truncating to integer, round instead
: x((_f * 64) + 0.5f) {}
: x((_f * 256) + 0.5f) {}
/**
* \brief Raw value factory
@ -93,7 +95,7 @@ public:
* \brief Raw value accessor
*
* Gets the raw value of the fixed point number. i.e. The fixed point
* number multiplied by 64.
* number multiplied by 256.
*/
ARM_MODE
constexpr int32_t raw() const noexcept {
@ -137,7 +139,7 @@ public:
*/
ARM_MODE
constexpr fixed operator*(fixed rhs) const noexcept {
return from_raw(((int64_t)x * rhs.x) >> 6);
return from_raw(((int64_t)x * rhs.x) >> 8);
}
ARM_MODE
constexpr fixed& operator*=(fixed rhs) noexcept {
@ -191,5 +193,11 @@ public:
}
};
template <typename STREAM_TYPE>
STREAM_TYPE operator<<(STREAM_TYPE& lhs, fixed rhs) {
lhs << rhs.raw();
return lhs;
}
} // namespace mtl

View File

@ -1,5 +1,3 @@
#include "mtl/target.hpp"
#include "mtl/fixed.hpp"
namespace mtl {
@ -17,24 +15,25 @@ fixed fixed::operator/(fixed rhs) const noexcept {
"movs r1, %[d];" // Load numerator and denominator, and check if negative or zero
"beq 4f;"
"movs r0, %[n];"
"ldr r3, =#0x7f800000;" // Load constant to check for overflow
"blt 1f;"
"tst r0, #0x7e000000;" // Check if the numerator is large enough to overflow
"tst r0, r3;" // Check if the numerator is large enough to overflow from the leftshift
"bne 3f;"
"b 2f;"
"1:" // check_negative
"mvn r2, r0;" // Check if the numerator is large enough to overflow.
"tst r2, #0x7e000000;"
"mvn r2, r0;" // Check if the numerator is large enough to overflow from the leftshift
"tst r2, r3;"
"bne 3f;"
"2:" // fast_div // Fast method
"lsl r0, #6;" // Shift first to avoid truncation
"lsl r0, #8;" // Shift first to avoid truncation
"swi #0x60000;" // GBA Div syscall
"mov %[res], r0;"
"b 5f;"
"3:" // compat_div // Compatible method
"swi #0x60000;" // Compute quotient and shift
"lsl r2, r0, #6;"
"lsl r2, r0, #8;"
"mov r0, r1;" // Div syscall puts the modulus in r1, use it as the numerator
"lsr r1, %[d], #6;" // Load the denominator again, shifted right to calculate decimal part
"lsr r1, %[d], #8;" // Load the denominator again, shifted right to calculate decimal part
"swi #0x60000;"
"mov %[res], r2;" // Calculate the final result
"add %[res], r0;"