Remove conditional fixed point number inlining
Caused issues with ODR rule violations. Now fixed point numbers should only be used in ARM-mode. Attempting to use them in Thumb-mode will cause a compilation failure. This commit also moves operator/ into IWRAM on the GBA.
This commit is contained in:
parent
62da9d03c1
commit
2181557d9d
@ -5,6 +5,8 @@
|
|||||||
|
|
||||||
#include "mtl/target.hpp"
|
#include "mtl/target.hpp"
|
||||||
|
|
||||||
|
TARGET_ARM_MODE
|
||||||
|
|
||||||
namespace mtl {
|
namespace mtl {
|
||||||
/**
|
/**
|
||||||
* \brief 32-bit Fixed point number
|
* \brief 32-bit Fixed point number
|
||||||
@ -19,14 +21,16 @@ namespace mtl {
|
|||||||
*
|
*
|
||||||
* \par ARM
|
* \par ARM
|
||||||
*
|
*
|
||||||
* All member functions are compiled in ARM mode because some operators (notably
|
* All functions are compiled in ARM mode because some operators (notably
|
||||||
* multiplication and division) use ARM-only instructions. For optimal performance,
|
* multiplication and division) use ARM-only instructions. For compatability
|
||||||
* fixed point numbers should be used in ARM-mode code to enable inlining. To ensure
|
* and optimal performance, fixed point numbers should only be used in ARM-mode
|
||||||
* inlining is enabled, enclose the include directive in `TARGET_ARM_MODE` and
|
* code. If `operator*` is used in Thumb code, compilation will fail.
|
||||||
* `TARGET_END_MODE` from `<mtl/target.hpp>`. This is necessary because inline assembly
|
* This happens because GCC attempts to inline the function even though it
|
||||||
* is used and GCC can't tell that ARM-only instructions are used, so it tries
|
* cannot be inlined in Thumb-mode. Conditional inlining using TARGET_*_MODE
|
||||||
* to inline in Thumb mode too. If these directives are not used, some operations
|
* is not used because it is fragile, for example, when including into `<vec4.hpp>`
|
||||||
* will not be inlined even in arm mode (ex. multiplication and division).
|
* and also in `foo.cpp`. In this case, `vec4` would attempt to include the
|
||||||
|
* inlined version but `foo` would not, causing a ODR violation. All other
|
||||||
|
* operations are usable from Thumb-mode, with a significant performance penalty.
|
||||||
*/
|
*/
|
||||||
class fixed {
|
class fixed {
|
||||||
private:
|
private:
|
||||||
@ -46,10 +50,10 @@ private:
|
|||||||
* DO NOT use to set the fixed number to an integer value, use
|
* DO NOT use to set the fixed number to an integer value, use
|
||||||
* the public constructor instead.
|
* the public constructor instead.
|
||||||
*/
|
*/
|
||||||
ARM_MODE constexpr fixed(int32_t _x, bool) : x(_x) {}
|
constexpr fixed(int32_t _x, bool) : x(_x) {}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
ARM_MODE constexpr fixed() : x(0) {}
|
constexpr fixed() : x(0) {}
|
||||||
/**
|
/**
|
||||||
* \brief Integer constructor
|
* \brief Integer constructor
|
||||||
*
|
*
|
||||||
@ -58,7 +62,7 @@ public:
|
|||||||
* the class description for more detail.
|
* the class description for more detail.
|
||||||
*/
|
*/
|
||||||
template <typename T, std::enable_if_t<std::is_integral_v<T>, bool> = true>
|
template <typename T, std::enable_if_t<std::is_integral_v<T>, bool> = true>
|
||||||
ARM_MODE constexpr fixed(T _i) : x(_i * 64) {}
|
constexpr fixed(T _i) : x(_i * 64) {}
|
||||||
/**
|
/**
|
||||||
* \brief Floating point constructor
|
* \brief Floating point constructor
|
||||||
*
|
*
|
||||||
@ -71,7 +75,7 @@ public:
|
|||||||
* float.
|
* float.
|
||||||
*/
|
*/
|
||||||
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, bool> = true>
|
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, bool> = true>
|
||||||
ARM_MODE constexpr fixed(T _f)
|
constexpr fixed(T _f)
|
||||||
// 0.5 offset accounts for truncating to integer, round instead
|
// 0.5 offset accounts for truncating to integer, round instead
|
||||||
: x((_f * 64) + 0.5f) {}
|
: x((_f * 64) + 0.5f) {}
|
||||||
|
|
||||||
@ -84,7 +88,7 @@ public:
|
|||||||
*
|
*
|
||||||
* Should not be used unless absolutely needed.
|
* Should not be used unless absolutely needed.
|
||||||
*/
|
*/
|
||||||
ARM_MODE static constexpr fixed from_raw(int32_t x) {
|
static constexpr fixed from_raw(int32_t x) {
|
||||||
return fixed(x, true);
|
return fixed(x, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -94,7 +98,7 @@ public:
|
|||||||
* Gets the raw value of the fixed point number. i.e. The fixed point
|
* Gets the raw value of the fixed point number. i.e. The fixed point
|
||||||
* number multiplied by 64.
|
* number multiplied by 64.
|
||||||
*/
|
*/
|
||||||
ARM_MODE constexpr int32_t raw() const {
|
constexpr int32_t raw() const {
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -104,13 +108,13 @@ public:
|
|||||||
* Addition with fixed point numbers is the same as with a 32-bit
|
* Addition with fixed point numbers is the same as with a 32-bit
|
||||||
* integer, so should be extremely quick.
|
* integer, so should be extremely quick.
|
||||||
*/
|
*/
|
||||||
ARM_MODE constexpr fixed operator+(fixed rhs) const {
|
constexpr fixed operator+(fixed rhs) const {
|
||||||
return from_raw(x + rhs.x);
|
return from_raw(x + rhs.x);
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* \brief Fixed point subtraction
|
* \brief Fixed point subtraction
|
||||||
*/
|
*/
|
||||||
ARM_MODE constexpr fixed operator-(fixed rhs) const {
|
constexpr fixed operator-(fixed rhs) const {
|
||||||
return from_raw(x - rhs.x);
|
return from_raw(x - rhs.x);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -118,13 +122,13 @@ public:
|
|||||||
* \brief Fixed point multiplication
|
* \brief Fixed point multiplication
|
||||||
*
|
*
|
||||||
* Uses an assembly implementation to multiply the two numbers.
|
* Uses an assembly implementation to multiply the two numbers.
|
||||||
|
*
|
||||||
|
* \par ARM
|
||||||
|
*
|
||||||
|
* Use in ARM-mode only. Attempted use in Thumb-mode will cause a
|
||||||
|
* compilation failure.
|
||||||
*/
|
*/
|
||||||
#ifdef __ARM_32BIT_STATE // Safe to inline in ARM mode, but not in Thumb mode
|
fixed operator*(fixed rhs) const {
|
||||||
ALWAYS_INLINE // because ARM-mode instructions are used. GCC isn't smart
|
|
||||||
#else // enough to figure it out on its own
|
|
||||||
NOINLINE
|
|
||||||
#endif
|
|
||||||
ARM_MODE fixed operator*(fixed rhs) const {
|
|
||||||
int32_t raw_result;
|
int32_t raw_result;
|
||||||
asm(
|
asm(
|
||||||
"smull r8, r9, %[a], %[b];"
|
"smull r8, r9, %[a], %[b];"
|
||||||
@ -149,59 +153,15 @@ public:
|
|||||||
* that if a denominator slowly approaches zero, once it reaches zero
|
* that if a denominator slowly approaches zero, once it reaches zero
|
||||||
* the quotient's sign will flip. The largest value is used because fixed
|
* the quotient's sign will flip. The largest value is used because fixed
|
||||||
* point numbers don't have a representation of infinity.
|
* point numbers don't have a representation of infinity.
|
||||||
|
*
|
||||||
|
* \par GBA
|
||||||
|
*
|
||||||
|
* Placed in IWRAM
|
||||||
*/
|
*/
|
||||||
#ifdef __ARM_32BIT_STATE // Safe to inline in ARM mode, but not in Thumb mode
|
fixed operator/(fixed rhs) const;
|
||||||
ALWAYS_INLINE // because ARM-mode instructions are used. GCC isn't smart
|
|
||||||
#else // enough to figure it out on its own
|
|
||||||
NOINLINE
|
|
||||||
#endif
|
|
||||||
ARM_MODE fixed operator/(fixed rhs) const {
|
|
||||||
int32_t raw_result;
|
|
||||||
asm(
|
|
||||||
// This division implementation has two methods it can use.
|
|
||||||
// The fastest uses a left shift followed by a single division. The value is shifted
|
|
||||||
// first to preserve the decimal part. Unfortunately, this means large numerators
|
|
||||||
// will cause the operation to overflow. In this case, a compatible method will be
|
|
||||||
// used. This method uses two divisions, one to calculate the integral quotient,
|
|
||||||
// and one to calculate the decimal part. Both these methods work for negative numbers as well.
|
|
||||||
"movs r1, %[d];" // Load numerator and denominator, and check if negative or zero
|
|
||||||
"beq 4f;"
|
|
||||||
"movs r0, %[n];"
|
|
||||||
"blt 1f;"
|
|
||||||
"tst r0, #0x7e000000;" // Check if the numerator is large enough to overflow
|
|
||||||
"bne 3f;"
|
|
||||||
"b 2f;"
|
|
||||||
"1:" // check_negative
|
|
||||||
"mvn r2, r0;" // Check if the numerator is large enough to overflow.
|
|
||||||
"tst r2, #0x7e000000;"
|
|
||||||
"bne 3f;"
|
|
||||||
"2:" // fast_div // Fast method
|
|
||||||
"lsl r0, #6;" // Shift first to avoid truncation
|
|
||||||
"swi #0x60000;" // GBA Div syscall
|
|
||||||
"mov %[res], r0;"
|
|
||||||
"b 5f;"
|
|
||||||
"3:" // compat_div // Compatible method
|
|
||||||
"swi #0x60000;" // Compute quotient and shift
|
|
||||||
"lsl r2, r0, #6;"
|
|
||||||
"mov r0, r1;" // Div syscall puts the modulus in r1, use it as the numerator
|
|
||||||
"lsr r1, %[d], #6;" // Load the denominator again, shifted right to calculate decimal part
|
|
||||||
"swi #0x60000;"
|
|
||||||
"mov %[res], r2;" // Calculate the final result
|
|
||||||
"add %[res], r0;"
|
|
||||||
"b 5f;"
|
|
||||||
"4:" // zero_div
|
|
||||||
"teq %[n], %[d];" // Set result to largest possible negative/positive value.
|
|
||||||
"movmi %[res], #0x80000000;"
|
|
||||||
"movpl %[res], #0x7FFFFFFF;"
|
|
||||||
"5:"
|
|
||||||
: [res] "=r" (raw_result)
|
|
||||||
: [n] "r" (x),
|
|
||||||
[d] "r" (rhs.x)
|
|
||||||
: "r0", "r1", "r2", "r3"
|
|
||||||
);
|
|
||||||
|
|
||||||
return from_raw(raw_result);
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace mtl
|
} // namespace mtl
|
||||||
|
|
||||||
|
TARGET_END_MODE
|
||||||
|
|
||||||
|
|||||||
59
src/gba/fixed.cpp
Normal file
59
src/gba/fixed.cpp
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
#include "mtl/target.hpp"
|
||||||
|
|
||||||
|
#include "mtl/fixed.hpp"
|
||||||
|
|
||||||
|
TARGET_ARM_MODE
|
||||||
|
|
||||||
|
namespace mtl {
|
||||||
|
|
||||||
|
GBA_IWRAM fixed fixed::operator/(fixed rhs) const {
|
||||||
|
int32_t raw_result;
|
||||||
|
asm(
|
||||||
|
// This division implementation has two methods it can use.
|
||||||
|
// The fastest uses a left shift followed by a single division. The value is shifted
|
||||||
|
// first to preserve the decimal part. Unfortunately, this means large numerators
|
||||||
|
// will cause the operation to overflow. In this case, a compatible method will be
|
||||||
|
// used. This method uses two divisions, one to calculate the integral quotient,
|
||||||
|
// and one to calculate the decimal part. Both these methods work for negative numbers as well.
|
||||||
|
"movs r1, %[d];" // Load numerator and denominator, and check if negative or zero
|
||||||
|
"beq 4f;"
|
||||||
|
"movs r0, %[n];"
|
||||||
|
"blt 1f;"
|
||||||
|
"tst r0, #0x7e000000;" // Check if the numerator is large enough to overflow
|
||||||
|
"bne 3f;"
|
||||||
|
"b 2f;"
|
||||||
|
"1:" // check_negative
|
||||||
|
"mvn r2, r0;" // Check if the numerator is large enough to overflow.
|
||||||
|
"tst r2, #0x7e000000;"
|
||||||
|
"bne 3f;"
|
||||||
|
"2:" // fast_div // Fast method
|
||||||
|
"lsl r0, #6;" // Shift first to avoid truncation
|
||||||
|
"swi #0x60000;" // GBA Div syscall
|
||||||
|
"mov %[res], r0;"
|
||||||
|
"b 5f;"
|
||||||
|
"3:" // compat_div // Compatible method
|
||||||
|
"swi #0x60000;" // Compute quotient and shift
|
||||||
|
"lsl r2, r0, #6;"
|
||||||
|
"mov r0, r1;" // Div syscall puts the modulus in r1, use it as the numerator
|
||||||
|
"lsr r1, %[d], #6;" // Load the denominator again, shifted right to calculate decimal part
|
||||||
|
"swi #0x60000;"
|
||||||
|
"mov %[res], r2;" // Calculate the final result
|
||||||
|
"add %[res], r0;"
|
||||||
|
"b 5f;"
|
||||||
|
"4:" // zero_div
|
||||||
|
"teq %[n], %[d];" // Set result to largest possible negative/positive value.
|
||||||
|
"movmi %[res], #0x80000000;"
|
||||||
|
"movpl %[res], #0x7FFFFFFF;"
|
||||||
|
"5:"
|
||||||
|
: [res] "=r" (raw_result)
|
||||||
|
: [n] "r" (x),
|
||||||
|
[d] "r" (rhs.x)
|
||||||
|
: "r0", "r1", "r2", "r3"
|
||||||
|
);
|
||||||
|
return raw_result;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace mtl
|
||||||
|
|
||||||
|
TARGET_END_MODE
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user