mtl/include/mtl/fixed.hpp

208 lines
6.5 KiB
C++

#pragma once
#include <cstdint>
#include <type_traits>
#include "mtl/target.hpp"
namespace mtl {
/**
* \brief 32-bit Fixed point number
*
* Uses a base of 64. ie. the lower 6 bits are after the decimal place,
* the other 26 bits are before the decimal place.
*
* Valid values are in the range ~[-33'554'431.01, 33'554'432.98]
*
* Has a maximum error of +/- 1/128 (~0.0078), integers are always
* exactly.
*
* \par ARM
*
* All member functions are compiled in ARM mode because some operators (notably
* multiplication and division) use ARM-only instructions. For optimal performance,
* fixed point numbers should be used in ARM-mode code to enable inlining. To ensure
* inlining is enabled, enclose the include directive in `TARGET_ARM_MODE` and
* `TARGET_END_MODE` from `<mtl/target.hpp>`. This is necessary because inline assembly
* is used and GCC can't tell that ARM-only instructions are used, so it tries
* to inline in Thumb mode too. If these directives are not used, some operations
* will not be inlined even in arm mode (ex. multiplication and division).
*/
class fixed {
private:
int32_t x;
/**
* \brief Raw constructor
*
* Creates a new fixed point number with the raw data of x.
*
* \note
*
* DO NOT USE DIRECTLY. Use `from_raw` instead.
*
* \note
*
* DO NOT use to set the fixed number to an integer value, use
* the public constructor instead.
*/
ARM_MODE constexpr fixed(int32_t _x, bool) : x(_x) {}
public:
ARM_MODE constexpr fixed() : x(0) {}
/**
* \brief Integer constructor
*
* Creates a new fixed point number with the value of the integer.
* Must be within the range represented by fixed point numbers, see
* the class description for more detail.
*/
template <typename T, std::enable_if_t<std::is_integral_v<T>, bool> = true>
ARM_MODE constexpr fixed(T _i) : x(_i * 64) {}
/**
* \brief Floating point constructor
*
* Creates a new fixed point number with the closest number to
* the floating point number. Must be within the range represented by
* fixed point numbers, see the class description for more detail.
*
* Must be implemented as a template with enable_if, otherwise passing
* an int (not int32_t) is ambiguous between the promotion to int32_t and
* float.
*/
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, bool> = true>
ARM_MODE constexpr fixed(T _f)
// 0.5 offset accounts for truncating to integer, round instead
: x((_f * 64) + 0.5f) {}
/**
* \brief Raw value factory
*
* Creates a new fixed point number with the raw data of x.
*
* \note
*
* Should not be used unless absolutely needed.
*/
ARM_MODE static constexpr fixed from_raw(int32_t x) {
return fixed(x, true);
}
/**
* \brief Raw value accessor
*
* Gets the raw value of the fixed point number. i.e. The fixed point
* number multiplied by 64.
*/
ARM_MODE constexpr int32_t raw() const {
return x;
}
/**
* \brief Fixed point addition
*
* Addition with fixed point numbers is the same as with a 32-bit
* integer, so should be extremely quick.
*/
ARM_MODE constexpr fixed operator+(fixed rhs) const {
return from_raw(x + rhs.x);
}
/**
* \brief Fixed point subtraction
*/
ARM_MODE constexpr fixed operator-(fixed rhs) const {
return from_raw(x - rhs.x);
}
/**
* \brief Fixed point multiplication
*
* Uses an assembly implementation to multiply the two numbers.
*/
#ifdef __ARM_32BIT_STATE // Safe to inline in ARM mode, but not in Thumb mode
ALWAYS_INLINE // because ARM-mode instructions are used. GCC isn't smart
#else // enough to figure it out on its own
NOINLINE
#endif
ARM_MODE fixed operator*(fixed rhs) const {
int32_t raw_result;
asm(
"smull r8, r9, %[a], %[b];"
"lsr %[res], r8, #6;"
"orr %[res], r9, lsl #26;"
: [res] "=r" (raw_result)
: [a] "r" (x),
[b] "r" (rhs.x)
: "r8", "r9"
);
return from_raw(raw_result);
}
/**
* \brief Fixed point division
*
* Faster for numerators in domain [-0x7FFFF, 0x7FFFF].
*
* On attempted division by zero, the result is set to the largest
* absolute value possible with the same sign as the numerator. This means
* that if a denominator slowly approaches zero, once it reaches zero
* the quotient's sign will flip. The largest value is used because fixed
* point numbers don't have a representation of infinity.
*/
#ifdef __ARM_32BIT_STATE // Safe to inline in ARM mode, but not in Thumb mode
ALWAYS_INLINE // because ARM-mode instructions are used. GCC isn't smart
#else // enough to figure it out on its own
NOINLINE
#endif
ARM_MODE fixed operator/(fixed rhs) const {
int32_t raw_result;
asm(
// This division implementation has two methods it can use.
// The fastest uses a left shift followed by a single division. The value is shifted
// first to preserve the decimal part. Unfortunately, this means large numerators
// will cause the operation to overflow. In this case, a compatible method will be
// used. This method uses two divisions, one to calculate the integral quotient,
// and one to calculate the decimal part. Both these methods work for negative numbers as well.
"movs r1, %[d];" // Load numerator and denominator, and check if negative or zero
"beq 4f;"
"movs r0, %[n];"
"blt 1f;"
"tst r0, #0x7e000000;" // Check if the numerator is large enough to overflow
"bne 3f;"
"b 2f;"
"1:" // check_negative
"mvn r2, r0;" // Check if the numerator is large enough to overflow.
"tst r2, #0x7e000000;"
"bne 3f;"
"2:" // fast_div // Fast method
"lsl r0, #6;" // Shift first to avoid truncation
"swi #0x60000;" // GBA Div syscall
"mov %[res], r0;"
"b 5f;"
"3:" // compat_div // Compatible method
"swi #0x60000;" // Compute quotient and shift
"lsl r2, r0, #6;"
"mov r0, r1;" // Div syscall puts the modulus in r1, use it as the numerator
"lsr r1, %[d], #6;" // Load the denominator again, shifted right to calculate decimal part
"swi #0x60000;"
"mov %[res], r2;" // Calculate the final result
"add %[res], r0;"
"b 5f;"
"4:" // zero_div
"teq %[n], %[d];" // Set result to largest possible negative/positive value.
"movmi %[res], #0x80000000;"
"movpl %[res], #0x7FFFFFFF;"
"5:"
: [res] "=r" (raw_result)
: [n] "r" (x),
[d] "r" (rhs.x)
: "r0", "r1", "r2", "r3"
);
return from_raw(raw_result);
}
};
} // namespace mtl