diff --git a/include/mtl/fixed.hpp b/include/mtl/fixed.hpp index 8ec057f..ab536de 100644 --- a/include/mtl/fixed.hpp +++ b/include/mtl/fixed.hpp @@ -5,6 +5,8 @@ #include "mtl/target.hpp" +TARGET_ARM_MODE + namespace mtl { /** * \brief 32-bit Fixed point number @@ -19,14 +21,16 @@ namespace mtl { * * \par ARM * - * All member functions are compiled in ARM mode because some operators (notably - * multiplication and division) use ARM-only instructions. For optimal performance, - * fixed point numbers should be used in ARM-mode code to enable inlining. To ensure - * inlining is enabled, enclose the include directive in `TARGET_ARM_MODE` and - * `TARGET_END_MODE` from ``. This is necessary because inline assembly - * is used and GCC can't tell that ARM-only instructions are used, so it tries - * to inline in Thumb mode too. If these directives are not used, some operations - * will not be inlined even in arm mode (ex. multiplication and division). + * All functions are compiled in ARM mode because some operators (notably + * multiplication and division) use ARM-only instructions. For compatability + * and optimal performance, fixed point numbers should only be used in ARM-mode + * code. If `operator*` is used in Thumb code, compilation will fail. + * This happens because GCC attempts to inline the function even though it + * cannot be inlined in Thumb-mode. Conditional inlining using TARGET_*_MODE + * is not used because it is fragile, for example, when including into `` + * and also in `foo.cpp`. In this case, `vec4` would attempt to include the + * inlined version but `foo` would not, causing a ODR violation. All other + * operations are usable from Thumb-mode, with a significant performance penalty. */ class fixed { private: @@ -46,10 +50,10 @@ private: * DO NOT use to set the fixed number to an integer value, use * the public constructor instead. */ - ARM_MODE constexpr fixed(int32_t _x, bool) : x(_x) {} + constexpr fixed(int32_t _x, bool) : x(_x) {} public: - ARM_MODE constexpr fixed() : x(0) {} + constexpr fixed() : x(0) {} /** * \brief Integer constructor * @@ -58,7 +62,7 @@ public: * the class description for more detail. */ template , bool> = true> - ARM_MODE constexpr fixed(T _i) : x(_i * 64) {} + constexpr fixed(T _i) : x(_i * 64) {} /** * \brief Floating point constructor * @@ -71,7 +75,7 @@ public: * float. */ template , bool> = true> - ARM_MODE constexpr fixed(T _f) + constexpr fixed(T _f) // 0.5 offset accounts for truncating to integer, round instead : x((_f * 64) + 0.5f) {} @@ -84,7 +88,7 @@ public: * * Should not be used unless absolutely needed. */ - ARM_MODE static constexpr fixed from_raw(int32_t x) { + static constexpr fixed from_raw(int32_t x) { return fixed(x, true); } @@ -94,7 +98,7 @@ public: * Gets the raw value of the fixed point number. i.e. The fixed point * number multiplied by 64. */ - ARM_MODE constexpr int32_t raw() const { + constexpr int32_t raw() const { return x; } @@ -104,13 +108,13 @@ public: * Addition with fixed point numbers is the same as with a 32-bit * integer, so should be extremely quick. */ - ARM_MODE constexpr fixed operator+(fixed rhs) const { + constexpr fixed operator+(fixed rhs) const { return from_raw(x + rhs.x); } /** * \brief Fixed point subtraction */ - ARM_MODE constexpr fixed operator-(fixed rhs) const { + constexpr fixed operator-(fixed rhs) const { return from_raw(x - rhs.x); } @@ -118,13 +122,13 @@ public: * \brief Fixed point multiplication * * Uses an assembly implementation to multiply the two numbers. + * + * \par ARM + * + * Use in ARM-mode only. Attempted use in Thumb-mode will cause a + * compilation failure. */ -#ifdef __ARM_32BIT_STATE // Safe to inline in ARM mode, but not in Thumb mode - ALWAYS_INLINE // because ARM-mode instructions are used. GCC isn't smart -#else // enough to figure it out on its own - NOINLINE -#endif - ARM_MODE fixed operator*(fixed rhs) const { + fixed operator*(fixed rhs) const { int32_t raw_result; asm( "smull r8, r9, %[a], %[b];" @@ -149,59 +153,15 @@ public: * that if a denominator slowly approaches zero, once it reaches zero * the quotient's sign will flip. The largest value is used because fixed * point numbers don't have a representation of infinity. + * + * \par GBA + * + * Placed in IWRAM */ -#ifdef __ARM_32BIT_STATE // Safe to inline in ARM mode, but not in Thumb mode - ALWAYS_INLINE // because ARM-mode instructions are used. GCC isn't smart -#else // enough to figure it out on its own - NOINLINE -#endif - ARM_MODE fixed operator/(fixed rhs) const { - int32_t raw_result; - asm( - // This division implementation has two methods it can use. - // The fastest uses a left shift followed by a single division. The value is shifted - // first to preserve the decimal part. Unfortunately, this means large numerators - // will cause the operation to overflow. In this case, a compatible method will be - // used. This method uses two divisions, one to calculate the integral quotient, - // and one to calculate the decimal part. Both these methods work for negative numbers as well. - "movs r1, %[d];" // Load numerator and denominator, and check if negative or zero - "beq 4f;" - "movs r0, %[n];" - "blt 1f;" - "tst r0, #0x7e000000;" // Check if the numerator is large enough to overflow - "bne 3f;" - "b 2f;" - "1:" // check_negative - "mvn r2, r0;" // Check if the numerator is large enough to overflow. - "tst r2, #0x7e000000;" - "bne 3f;" - "2:" // fast_div // Fast method - "lsl r0, #6;" // Shift first to avoid truncation - "swi #0x60000;" // GBA Div syscall - "mov %[res], r0;" - "b 5f;" - "3:" // compat_div // Compatible method - "swi #0x60000;" // Compute quotient and shift - "lsl r2, r0, #6;" - "mov r0, r1;" // Div syscall puts the modulus in r1, use it as the numerator - "lsr r1, %[d], #6;" // Load the denominator again, shifted right to calculate decimal part - "swi #0x60000;" - "mov %[res], r2;" // Calculate the final result - "add %[res], r0;" - "b 5f;" - "4:" // zero_div - "teq %[n], %[d];" // Set result to largest possible negative/positive value. - "movmi %[res], #0x80000000;" - "movpl %[res], #0x7FFFFFFF;" - "5:" - : [res] "=r" (raw_result) - : [n] "r" (x), - [d] "r" (rhs.x) - : "r0", "r1", "r2", "r3" - ); - - return from_raw(raw_result); - } + fixed operator/(fixed rhs) const; }; } // namespace mtl + +TARGET_END_MODE + diff --git a/src/gba/fixed.cpp b/src/gba/fixed.cpp new file mode 100644 index 0000000..2f6c5b9 --- /dev/null +++ b/src/gba/fixed.cpp @@ -0,0 +1,59 @@ +#include "mtl/target.hpp" + +#include "mtl/fixed.hpp" + +TARGET_ARM_MODE + +namespace mtl { + +GBA_IWRAM fixed fixed::operator/(fixed rhs) const { + int32_t raw_result; + asm( + // This division implementation has two methods it can use. + // The fastest uses a left shift followed by a single division. The value is shifted + // first to preserve the decimal part. Unfortunately, this means large numerators + // will cause the operation to overflow. In this case, a compatible method will be + // used. This method uses two divisions, one to calculate the integral quotient, + // and one to calculate the decimal part. Both these methods work for negative numbers as well. + "movs r1, %[d];" // Load numerator and denominator, and check if negative or zero + "beq 4f;" + "movs r0, %[n];" + "blt 1f;" + "tst r0, #0x7e000000;" // Check if the numerator is large enough to overflow + "bne 3f;" + "b 2f;" + "1:" // check_negative + "mvn r2, r0;" // Check if the numerator is large enough to overflow. + "tst r2, #0x7e000000;" + "bne 3f;" + "2:" // fast_div // Fast method + "lsl r0, #6;" // Shift first to avoid truncation + "swi #0x60000;" // GBA Div syscall + "mov %[res], r0;" + "b 5f;" + "3:" // compat_div // Compatible method + "swi #0x60000;" // Compute quotient and shift + "lsl r2, r0, #6;" + "mov r0, r1;" // Div syscall puts the modulus in r1, use it as the numerator + "lsr r1, %[d], #6;" // Load the denominator again, shifted right to calculate decimal part + "swi #0x60000;" + "mov %[res], r2;" // Calculate the final result + "add %[res], r0;" + "b 5f;" + "4:" // zero_div + "teq %[n], %[d];" // Set result to largest possible negative/positive value. + "movmi %[res], #0x80000000;" + "movpl %[res], #0x7FFFFFFF;" + "5:" + : [res] "=r" (raw_result) + : [n] "r" (x), + [d] "r" (rhs.x) + : "r0", "r1", "r2", "r3" + ); + return raw_result; +} + +} // namespace mtl + +TARGET_END_MODE +