Modify fixed-point numbers to use 8 bits for the decimal point

This was done because with 6 bits of precision, when computing a projection matrix error would accumulate up to 0.078. Changing the decimal point precision to 8 bits minimizes the affect of this error, reducing it closer to 0.016. Although, this does decrease the maximum value from around 33,000,000 to around 8,000,000, although this shouldn't be an issue.
2024-09-19 19:30:25 -07:00 · 2024-09-19 19:30:25 -07:00 · 094a4731c5
commit 094a4731c5
parent 8f30ed4311
2 changed files with 24 additions and 17 deletions
--- a/include/mtl/fixed.hpp
+++ b/include/mtl/fixed.hpp
@ -9,13 +9,15 @@ namespace mtl {
 /**
 * \brief 32-bit Fixed point number
 *
- * Uses a base of 64. ie. the lower 6 bits are after the decimal place,
- * the other 26 bits are before the decimal place.
+ * Uses a base of 256. ie. the lower 8 bits are after the decimal place,
+ * the next 23 bits are before the decimal place, and one negation bit.
 *
- * Valid values are in the range ~[-33'554'431.01, 33'554'432.98]
+ * Valid values are in the range ~[-8'388'306.000, 8'388'607.996]
 *
- * Has a maximum error of +/- 1/128 (~0.0078), integers are always
- * exactly represented.
+ * Has a maximum error of +/- 1/512 (~0.0019), integers are always
+ * exactly represented. Keep in mind, this error accumulates each time an operation
+ * is performed. For example, when computing a vec4 projection matrix, the may
+ * diverge by closer to 0.0156.
 *
 * \par ARM
 *
@ -57,7 +59,7 @@ public:
 	 */
 	template <typename T, std::enable_if_t<std::is_integral_v<T>, bool> = true>
 	ARM_MODE
-	constexpr fixed(T _i) noexcept : x(_i * 64) {}
+	constexpr fixed(T _i) noexcept : x(_i * 256) {}
 	/**
 	 * \brief Floating point constructor
 	 *
@ -73,7 +75,7 @@ public:
 	ARM_MODE
 	constexpr fixed(T _f) noexcept
 		// 0.5 offset accounts for truncating to integer, round instead
-		: x((_f * 64) + 0.5f) {}
+		: x((_f * 256) + 0.5f) {}

 	/**
 	 * \brief Raw value factory
@ -93,7 +95,7 @@ public:
 	 * \brief Raw value accessor
 	 *
 	 * Gets the raw value of the fixed point number. i.e. The fixed point
-	 * number multiplied by 64.
+	 * number multiplied by 256.
 	 */
 	ARM_MODE
 	constexpr int32_t raw() const noexcept {
@ -137,7 +139,7 @@ public:
 	 */
 	ARM_MODE
 	constexpr fixed operator*(fixed rhs) const noexcept {
-		return from_raw(((int64_t)x * rhs.x) >> 6);
+		return from_raw(((int64_t)x * rhs.x) >> 8);
 	}
 	ARM_MODE
 	constexpr fixed& operator*=(fixed rhs) noexcept {
@ -191,5 +193,11 @@ public:
 	}
 };

+template <typename STREAM_TYPE>
+STREAM_TYPE operator<<(STREAM_TYPE& lhs, fixed rhs) {
+	lhs << rhs.raw();
+	return lhs;
+}
+
 } // namespace mtl

--- a/src/gba/fixed.cpp
+++ b/src/gba/fixed.cpp
@ -1,5 +1,3 @@
-#include "mtl/target.hpp"
-
 #include "mtl/fixed.hpp"

 namespace mtl {
@ -17,24 +15,25 @@ fixed fixed::operator/(fixed rhs) const noexcept {
 			"movs	r1, %[d];"            // Load numerator and denominator, and check if negative or zero
 			"beq	4f;"
 			"movs	r0, %[n];"
+			"ldr	r3, =#0x7f800000;"    // Load constant to check for overflow
 			"blt	1f;"
-			"tst	r0, #0x7e000000;"     // Check if the numerator is large enough to overflow
+			"tst	r0, r3;"              // Check if the numerator is large enough to overflow from the leftshift
 			"bne	3f;"
 			"b	2f;"
 			"1:"	// check_negative
-			"mvn	r2, r0;"              // Check if the numerator is large enough to overflow.
-			"tst	r2, #0x7e000000;"
+			"mvn	r2, r0;"              // Check if the numerator is large enough to overflow from the leftshift
+			"tst	r2, r3;"
 			"bne	3f;"
 			"2:"	// fast_div           // Fast method
-			"lsl	r0, #6;"              // Shift first to avoid truncation
+			"lsl	r0, #8;"              // Shift first to avoid truncation
 			"swi	#0x60000;"            // GBA Div syscall
 			"mov	%[res], r0;"
 			"b	5f;"
 			"3:"	// compat_div         // Compatible method
 			"swi	#0x60000;"            // Compute quotient and shift
-			"lsl	r2, r0, #6;" 
+			"lsl	r2, r0, #8;" 
 			"mov	r0, r1;"              // Div syscall puts the modulus in r1, use it as the numerator
-			"lsr	r1, %[d], #6;"        // Load the denominator again, shifted right to calculate decimal part
+			"lsr	r1, %[d], #8;"        // Load the denominator again, shifted right to calculate decimal part
 			"swi	#0x60000;"
 			"mov	%[res], r2;"          // Calculate the final result
 			"add	%[res], r0;"