From 739437753cfe049922698f9ac777cfba1beda0d4 Mon Sep 17 00:00:00 2001
From: Myles Busig <mdbusig@gmail.com>
Date: Fri, 2 Aug 2024 22:10:39 -0600
Subject: [PATCH] Fix fixed point number compilation failures during attempted
 inlining

Currently, some fixed point operations (notably multiplication) fail to
compile when used in Thumb-mode routines. This occurs because GCC
attempts to inline the operation into the Thumb-mode routine, but the
operation uses ARM-mode only instructions. This commit adds the ".arm"
directive into the inline assembly of the implementation, which informs
GCC that the assembly uses ARM-mode instructions and prevents inlining.
As a result, fixed point numbers can be used from both ARM-mode and
Thumb-mode code without issues! Usage in ARM-mode should still be
preferred for optimal performance though.
---
 include/mtl/fixed.hpp | 18 ++++--------------
 src/gba/fixed.cpp     |  1 +
 2 files changed, 5 insertions(+), 14 deletions(-)

diff --git a/include/mtl/fixed.hpp b/include/mtl/fixed.hpp
index 090daa2..83b08ee 100644
--- a/include/mtl/fixed.hpp
+++ b/include/mtl/fixed.hpp
@@ -22,15 +22,9 @@ namespace mtl {
  * \par ARM
  *
  * All functions are compiled in ARM mode because some operators (notably
- * multiplication and division) use ARM-only instructions. For compatability
- * and optimal performance, fixed point numbers should only be used in ARM-mode
- * code. If `operator*` is used in Thumb code, compilation will fail.
- * This happens because GCC attempts to inline the function even though it
- * cannot be inlined in Thumb-mode. Conditional inlining using TARGET_*_MODE
- * is not used because it is fragile, for example, when including into `<vec4.hpp>`
- * and also in `foo.cpp`. In this case, `vec4` would attempt to include the
- * inlined version but `foo` would not, causing a ODR violation. All other
- * operations are usable from Thumb-mode, with a significant performance penalty.
+ * multiplication and division) use ARM-only instructions. For optimal
+ * performance, fixed point numbers should only be used in ARM-mode
+ * code to enable as much inlining as possible.
  */
 class fixed {
 private:
@@ -125,15 +119,11 @@ public:
 	 * \brief Fixed point multiplication
 	 *
 	 * Uses an assembly implementation to multiply the two numbers.
-	 *
-	 * \par ARM
-	 *
-	 * Use in ARM-mode only. Attempted use in Thumb-mode will cause a
-	 * compilation failure.
 	 */
 	fixed operator*(fixed rhs) const {
 		int32_t raw_result;
 		asm(
+				".arm;"
 				"smull	r8, r9, %[a], %[b];"
 				"lsr	%[res], r8, #6;"
 				"orr	%[res], r9, lsl #26;"
diff --git a/src/gba/fixed.cpp b/src/gba/fixed.cpp
index 2f6c5b9..a65bb46 100644
--- a/src/gba/fixed.cpp
+++ b/src/gba/fixed.cpp
@@ -15,6 +15,7 @@ GBA_IWRAM fixed fixed::operator/(fixed rhs) const {
 			// will cause the operation to overflow. In this case, a compatible method will be
 			// used. This method uses two divisions, one to calculate the integral quotient,
 			// and one to calculate the decimal part. Both these methods work for negative numbers as well.
+			".arm;"
 			"movs	r1, %[d];"            // Load numerator and denominator, and check if negative or zero
 			"beq	4f;"
 			"movs	r0, %[n];"