30 changed files with 2440 additions and 45 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -3,17 +3,21 @@ cmake_minimum_required(VERSION 3.5)
 project(mtl LANGUAGES CXX C ASM)
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 option(BUILD_TESTS "Build tests" ON)
 message("Compiling MTL for ${CMAKE_SYSTEM_PROCESSOR}")
 # CONFIGURE_DEPENDS option was added in CMake v3.12. This option allows the
 # build system to automatically re-run CMake if the glob changes, solving the
 # major issue with globbing. May have a performance impact, but it should be
 # negligible compared to the time spent building.
-file(GLOB mtl_sources_common LIST_DIRECTORIES false CONFIGURE_DEPENDS src/*.cpp src/*.c src/*.s)
+file(GLOB mtl_sources_common LIST_DIRECTORIES false CONFIGURE_DEPENDS src/common/*.cpp src/common/*.c src/common/*.s)
 file(GLOB mtl_sources_armv4t LIST_DIRECTORIES false CONFIGURE_DEPENDS src/armv4t/*.cpp src/armv4t/*.c src/armv4t/*.s)
 file(GLOB mtl_sources_gba LIST_DIRECTORIES false CONFIGURE_DEPENDS src/gba/*.cpp src/gba/*.c src/gba/*.s)
 file(GLOB mtl_sources_test LIST_DIRECTORIES false CONFIGURE_DEPENDS src/test/*.cpp)
 set(mtl_include_common "include")
 set(mtl_include_test "include/test")
 set(mtl_include_gba "include/gba")
 set(mtl_include_armv4t "include/armv4t")
@ -39,6 +43,14 @@ if (CMAKE_SYSTEM_PROCESSOR STREQUAL "armv4t")
 	target_link_libraries(${PROJECT_NAME} PUBLIC ${PROJECT_NAME}_armv4t)
 endif()
 if (BUILD_TESTS STREQUAL "ON")
 	add_library(${PROJECT_NAME}_test OBJECT)
 	target_sources(${PROJECT_NAME}_test PRIVATE "${mtl_sources_test}")
 	target_include_directories(${PROJECT_NAME}_test PUBLIC "${mtl_include_common}")
 	target_include_directories(${PROJECT_NAME}_test PUBLIC "${mtl_include_test}")
 	target_link_libraries(${PROJECT_NAME} PUBLIC ${PROJECT_NAME}_test)
 endif()
 target_sources(${PROJECT_NAME}_common PRIVATE "${mtl_sources_common}")
 target_include_directories(${PROJECT_NAME}_common PUBLIC "${mtl_include_common}")
 target_link_libraries(${PROJECT_NAME} PUBLIC ${PROJECT_NAME}_common)
--- a/README.md
+++ b/README.md
@ -1,3 +1,4 @@
 # mtl
-C++ STL replacement, intended for embedded systems. Aims to be more performant than ETL.
+An embedded data structure and linear algebra library built for the GBA. Aims to be
 more performant than the ETL by taking advantage of GBA-specific features.
--- a/include/mtl/fixed.hpp
+++ b/include/mtl/fixed.hpp
@ -1,83 +1,203 @@
 #pragma once
 #include <cstdint>
 #include <type_traits>
-/**
+#include "mtl/target.hpp"
 * \brief Fixed point multiply assembly implementation
 *
 * DO NOT USE DIRECTLY! Use fixed::operator* instead
 */
 extern "C" int32_t mtl_fixed_mul(int32_t x, int32_t y);
 namespace mtl {
 /**
 * \brief 32-bit Fixed point number
 *
- * Uses a base of 64. ie. the lower 6 bits are after the decimal place,
+ * Uses a base of 256. ie. the lower 8 bits are after the decimal place,
- * the other 26 bits are before the decimal place.
+ * the next 23 bits are before the decimal place, and one negation bit.
 *
- * Valid values are in the range ~[-33'554'431.01, 33'554'432.98]
+ * Valid values are in the range ~[-8'388'306.000, 8'388'607.996]
 *
- * Has a maximum error of +/- 1/128 (~0.0078), integers are always
+ * Has a maximum error of +/- 1/512 (~0.0019), integers are always
- * exactly.
+ * exactly represented. Keep in mind, this error accumulates each time an operation
 * is performed. For example, when computing a vec4 projection matrix, the may
 * diverge by closer to 0.0156.
 *
 * \par ARM
 *
 * All functions are compiled in ARM-mode because some operators (notably
 * multiplication and division) are much faster in ARM-mode. For optimal
 * performance, fixed point numbers should only be used in ARM-mode
 * code to enable as much inlining as possible.
 */
 class fixed {
 private:
 	int32_t x;
 	/**
 	 * \brief Raw constructor
 	 *
 	 * Creates a new fixed point number with the raw data of x.
 	 *
 	 * \note
 	 *
 	 * DO NOT USE DIRECTLY. Use `from_raw` instead.
 	 *
 	 * \note
 	 *
 	 * DO NOT use to set the fixed number to an integer value, use
 	 * the public constructor instead.
 	 */
-	constexpr fixed(int32_t _x, bool) : x(_x) {}
+	ARM_MODE
 	constexpr fixed(int32_t _x, bool) noexcept : x(_x) {}
 public:
-	constexpr fixed() : x(0) {}
+	ARM_MODE
 	constexpr fixed() noexcept : x(0) {}
 	/**
-	 * \brief 32-bit integer constructor
+	 * \brief Integer constructor
 	 *
 	 * Creates a new fixed point number with the value of the integer.
 	 * Must be within the range represented by fixed point numbers, see
 	 * the class description for more detail.
 	 */
-	constexpr fixed(int32_t _i) : x(_i * 64) {}
+	template <typename T, std::enable_if_t<std::is_integral_v<T>, bool> = true>
 	ARM_MODE
 	constexpr fixed(T _i) noexcept : x(_i * 256) {}
 	/**
 	 * \brief Floating point constructor
 	 *
 	 * Creates a new fixed point number with the closest number to
 	 * the floating point number. Must be within the range represented by
 	 * fixed point numbers, see the class description for more detail.
 	 *
 	 * Must be implemented as a template with enable_if, otherwise passing
 	 * an int (not int32_t) is ambiguous between the promotion to int32_t and
 	 * float.
 	 */
-	constexpr fixed(float _f)
+	template <typename T, std::enable_if_t<std::is_floating_point_v<T>, bool> = true>
 	ARM_MODE
 	constexpr fixed(T _f) noexcept
 		// 0.5 offset accounts for truncating to integer, round instead
-		: x((_f * 64) + 0.5f) {}
+		: x((_f * 256) + 0.5f) {}
-	
+
 	/**
 	 * \brief Raw value factory
 	 *
 	 * Creates a new fixed point number with the raw data of x.
 	 *
 	 * \note
 	 *
 	 * Should not be used unless absolutely needed.
 	 */
 	ARM_MODE
 	static constexpr fixed from_raw(int32_t x) noexcept {
 		return fixed(x, true);
 	}
 	/**
 	 * \brief Raw value accessor
 	 *
 	 * Gets the raw value of the fixed point number. i.e. The fixed point
 	 * number multiplied by 256.
 	 */
 	ARM_MODE
 	constexpr int32_t raw() const noexcept {
 		return x;
 	}
 	/**
 	 * \brief Fixed point addition
 	 *
 	 * Addition with fixed point numbers is the same as with a 32-bit
 	 * integer, so should be extremely quick.
 	 */
-	fixed operator+(fixed rhs) const {
+	ARM_MODE
-		return fixed(x + rhs.x, true);
+	constexpr fixed operator+(fixed rhs) const noexcept {
 		return from_raw(x + rhs.x);
 	}
 	ARM_MODE
 	constexpr fixed& operator+=(fixed rhs) noexcept {
 		x += rhs.x;
 		return *this;
 	}
 	/**
 	 * \brief Fixed point subtraction
 	 */
 	ARM_MODE
 	constexpr fixed operator-(fixed rhs) const noexcept {
 		return from_raw(x - rhs.x);
 	}
 	ARM_MODE
 	constexpr fixed& operator-=(fixed rhs) noexcept {
 		x -= rhs.x;
 		return *this;
 	}
 	ARM_MODE
 	constexpr fixed operator-() const noexcept {
 		return from_raw(-x);
 	}
 	/**
 	 * \brief Fixed point multiplication
 	 *
 	 * Uses an assembly implementation to multiply the two numbers.
 	 * Not as quick as an integer multiplication. Use sparringly.
 	 *
 	 * Tested on the MGBA Gameboy Advance emulator, takes around 70
 	 * cycles when the assembly routine is placed in IWRAM.
 	 * The Gameboy Advance uses an armv7tdmi, and IWRAM is the fastest
 	 * available RAM.
 	 */
-	fixed operator*(fixed rhs) const {
+	ARM_MODE
-		return fixed(mtl_fixed_mul(x, rhs.x), true);
+	constexpr fixed operator*(fixed rhs) const noexcept {
 		return from_raw(((int64_t)x * rhs.x) >> 8);
 	}
 	ARM_MODE
 	constexpr fixed& operator*=(fixed rhs) noexcept {
 		*this = *this * rhs;
 		return *this;
 	}
 	/**
 	 * \brief Fixed point division
 	 *
 	 * Faster for numerators in domain [-0x7FFFF, 0x7FFFF].
 	 *
 	 * On attempted division by zero, the result is set to the largest
 	 * absolute value possible with the same sign as the numerator. This means
 	 * that if a denominator slowly approaches zero, once it reaches zero
 	 * the quotient's sign will flip. The largest value is used because fixed
 	 * point numbers don't have a representation of infinity.
 	 *
 	 * \par GBA
 	 *
 	 * Placed in IWRAM
 	 */
 	ARM_MODE GBA_IWRAM
 	fixed operator/(fixed rhs) const noexcept;
 	ARM_MODE
 	fixed& operator/=(fixed rhs) noexcept {
 		*this = *this / rhs;
 		return *this;
 	}
 	/**
 	 * \brief Comparison operators
 	 */
 	constexpr bool operator==(fixed rhs) const noexcept {
 		return x == rhs.x;
 	}
 	constexpr bool operator!=(fixed rhs) const noexcept {
 		return x != rhs.x;
 	}
 	constexpr bool operator>(fixed rhs) const noexcept {
 		return x > rhs.x;
 	}
 	constexpr bool operator>=(fixed rhs) const noexcept {
 		return x >= rhs.x;
 	}
 	constexpr bool operator<(fixed rhs) const noexcept {
 		return x < rhs.x;
 	}
 	constexpr bool operator<=(fixed rhs) const noexcept {
 		return x <= rhs.x;
 	}
 };
 template <typename STREAM_TYPE>
 STREAM_TYPE operator<<(STREAM_TYPE& lhs, fixed rhs) {
 	lhs << rhs.raw();
 	return lhs;
 }
 } // namespace mtl
--- a/include/mtl/log.hpp
+++ b/include/mtl/log.hpp
@ -21,6 +21,8 @@ constexpr char endl_char = 0;
 constexpr char endl_char = '\n';
 #endif
 using stream_type = basic_string_stream<false, endl_char>;
 /**
 * \brief Log stream
 *
@ -42,7 +44,7 @@ constexpr char endl_char = '\n';
 * On MGBA this also starts the line with "GBA Debug: ",so the line is not
 * completely blank either.
 */
-class stream : public basic_string_stream<false, endl_char> {
+class stream : public stream_type {
 private:
 	level m_log_level;
--- a/include/mtl/mat.hpp
+++ b/include/mtl/mat.hpp
@ -0,0 +1,93 @@
 #pragma once
 #include "mtl/target.hpp"
 #include <cstddef>
 #include "mtl/fixed.hpp"
 namespace mtl {
 template <size_t N>
 class vec;
 template <size_t M, size_t N>
 class mat {
 public:
 	fixed e[M][N];
 	constexpr mat() noexcept {}
 	mat(const vec<M>& v) noexcept {
 		for (size_t i = 0; i < M; ++i) {
 			e[i][0] = v[i];
 		}
 	}
 	constexpr mat(const fixed(&_e)[M][N]) noexcept {
 		for (size_t i = 0; i < M; ++i) {
 			for (size_t j = 0; j < N; ++j) {
 				e[i][j] = _e[i][j];
 			}
 		}
 	}
 	vec<N> row(size_t i) const noexcept {
 		return vec<N>(e[i]);
 	}
 	vec<M> col(size_t i) const noexcept {
 		vec<M> r;
 		for (size_t j = 0; j < M; ++j) {
 			r.e[j] = e[j][i];
 		}
 		return r;
 	}
 	ARM_MODE GBA_IWRAM
 	mat<M, N> operator+(const mat<M, N>& rhs) const;
 	ARM_MODE GBA_IWRAM
 	mat<M, N> operator-(const mat<M, N>& rhs) const;
 	ARM_MODE GBA_IWRAM
 	mat<M, N> operator*(fixed rhs) const;
 	ARM_MODE GBA_IWRAM
 	mat<M, N> operator/(fixed rhs) const;
 	ARM_MODE GBA_IWRAM
 	vec<M> operator*(const vec<N>& rhs) const noexcept;
 	ARM_MODE GBA_IWRAM
 	bool operator==(const mat<M, N>& rhs) const;
 	bool operator!=(const mat<M, N>& rhs) const {
 		return !(*this == rhs);
 	}
 	template <size_t RHS_N>
 	ARM_MODE GBA_IWRAM
 	mat<M, RHS_N> operator*(const mat<N, RHS_N>& rhs) const noexcept;
 	template <size_t S>
 	friend mat<S, S> operator*(const vec<S>& lhs, const mat<1, S>& rhs);
 	template <size_t D = (M == N ? M : 0), std::enable_if_t<D != 0, bool> = true>
 	static mat<D, D> identity() {
 		mat<D, D> r;
 		for (size_t i = 0; i < D; ++i) {
 			r.e[i][i] = 1;
 		}
 		return r;
 	}
 	template <size_t D = (M == N ? M : 0), std::enable_if_t<D == 4, bool> = true>
 	static mat<M, N> translation(const vec<D - 1>& v); // Not defined in header because `v` will rarely be an r-value
 };
 template <size_t S>
 ARM_MODE GBA_IWRAM
 mat<S, S> operator*(const vec<S>& lhs, const mat<1, S>& rhs);
 } // namespace mtl
--- a/include/mtl/mat_impl.hpp
+++ b/include/mtl/mat_impl.hpp
@ -0,0 +1,136 @@
 /*
 * This file should only be included by files that explicitly instantiate
 * matrix classes.
 */
 #pragma once
 #pragma GCC push_options
 #pragma GCC optimize("unroll-loops")
 #include "mtl/mat.hpp"
 #include "mtl/vec.hpp"
 namespace mtl {
 template <size_t M, size_t N>
 mat<M, N> mat<M, N>::operator+(const mat<M, N>& rhs) const {
 	mat<M, N> r;
 	for (size_t im = 0; im < M; ++im) {
 		for (size_t in = 0; in < N; ++in) {
 			r.e[im][in] = e[im][in] + rhs.e[im][in];
 		}
 	}
 	return r;
 }
 template <size_t M, size_t N>
 mat<M, N> mat<M, N>::operator-(const mat<M, N>& rhs) const {
 	mat<M, N> r;
 	for (size_t im = 0; im < M; ++im) {
 		for (size_t in = 0; in < N; ++in) {
 			r.e[im][in] = e[im][in] - rhs.e[im][in];
 		}
 	}
 	return r;
 }
 template <size_t M, size_t N>
 mat<M, N> mat<M, N>::operator*(fixed rhs) const {
 	mat<M, N> r;
 	for (size_t im = 0; im < M; ++im) {
 		for (size_t in = 0; in < N; ++in) {
 			r.e[im][in] = e[im][in] * rhs;
 		}
 	}
 	return r;
 }
 template <size_t M, size_t N>
 mat<M, N> mat<M, N>::operator/(fixed rhs) const {
 	mat<M, N> r;
 	for (size_t im = 0; im < M; ++im) {
 		for (size_t in = 0; in < N; ++in) {
 			r.e[im][in] = e[im][in] / rhs;
 		}
 	}
 	return r;
 }
 template <size_t M, size_t N>
 vec<M> mat<M, N>::operator*(const vec<N>& rhs) const noexcept {
 	vec<M> r;
 	for (size_t i = 0; i < M; ++i) {
 		r.e[i] = row(i) * rhs;
 	}
 	return r;
 }
 template <size_t M, size_t N>
 template <size_t RHS_N>
 mat<M, RHS_N> mat<M, N>::operator*(const mat<N, RHS_N>& rhs) const noexcept {
 	mat<M, RHS_N> r;
 	for (size_t im = 0; im < M; ++im) {
 		for (size_t in = 0; in < RHS_N; ++in) {
 			r.e[im][in] = row(im) * rhs.col(in);
 		}
 	}
 	return r;
 }
 template <size_t S>
 mat<S, S> operator*(const vec<S>& lhs, const mat<1, S>& rhs) {
 	mat<S, S> r;
 	for (size_t im = 0; im < S; ++im) {
 		for (size_t in = 0; in < S; ++in) {
 			r.e[im][in] = lhs[im] * rhs.e[0][in];
 		}
 	}
 	return r;
 }
 template <size_t M, size_t N>
 bool mat<M, N>::operator==(const mat<M, N>& rhs) const {
 	for (size_t im = 0; im < M; ++im) {
 		for (size_t in = 0; in < N; ++in) {
 			if (e[im][in] != rhs.e[im][in]) {
 				return false;
 			}
 		}
 	}
 	return true;
 }
 template <>
 template <size_t D, std::enable_if_t<D == 4, bool>>
 mat<4, 4> mat<4, 4>::translation(const vec<D - 1>& v) {
 	mat<4, 4> r;
 	r.e[0][0] = 1;
 	r.e[1][1] = 1;
 	r.e[2][2] = 1;
 	r.e[3][3] = 1;
 	r.e[0][3] = v.e[0];
 	r.e[1][3] = v.e[1];
 	r.e[2][3] = v.e[2];
 	return r;
 }
 } // namespace mtl
 #pragma GCC pop_options
--- a/include/mtl/queue.hpp
+++ b/include/mtl/queue.hpp
@ -0,0 +1,92 @@
 #pragma once
 #include <cstddef>
 #include <utility>
 #include <mtl/exception.hpp>
 namespace mtl {
 /**
 * \brief Statically allocated queue
 */
 template <typename T>
 class iqueue {
 private:
 	T* m_buf;
 	size_t m_size;
 	const size_t m_capacity;
 	size_t m_begin;
 	size_t m_end;
 public:
 	using value_type = T;
 	using size_type = size_t;
 	using reference = T&;
 	using const_reference = const T&;
 	iqueue(T* buf, size_t capacity) noexcept : m_buf(buf), m_capacity(capacity), m_begin(0), m_end(0) {}
 	reference front() noexcept {
 		return m_buf[m_begin];
 	}
 	const_reference front() const noexcept {
 		return m_buf[m_begin];
 	}
 	const_reference cfront() const noexcept {
 		return m_buf[m_begin];
 	}
 	reference back() noexcept {
 		return m_buf[m_end];
 	}
 	const_reference back() const noexcept {
 		return m_buf[m_end];
 	}
 	const_reference cback() const noexcept {
 		return m_buf[m_end];
 	}
 	bool empty() const noexcept {
 		return m_size == 0;
 	}
 	size_t size() const noexcept {
 		return m_size;
 	}
 	void push(T value) {
 		if (m_size == m_capacity) {
 			throw mtl::length_error();
 		}
 		m_buf[m_end] = std::move(value);
 		++m_size;
 		++m_end;
 		m_end %= m_capacity;
 	}
 	void pop() noexcept {
 		if (m_size == 0) {
 			return;
 		}
 		--m_size;
 		++m_begin;
 		m_begin %= m_capacity;
 	}
 };
 template <typename T, size_t C>
 class queue : public iqueue<T> {
 private:
 	T m_buf[C];
 public:
 	queue() noexcept : iqueue<T>(m_buf, C) {}
 };
 } // namespace mtl
--- a/include/mtl/string.hpp
+++ b/include/mtl/string.hpp
@ -19,8 +19,8 @@ namespace mtl {
 class istring {
 protected:
 	char* m_str;
 	size_t m_size;
 	size_t m_capacity;
 	size_t m_size;
 public:
 	static constexpr const size_t npos = -1;
--- a/include/mtl/string_view.hpp
+++ b/include/mtl/string_view.hpp
@ -85,6 +85,13 @@ public:
 	int32_t compare(size_t pos, size_t count, const string_view& str) const;
 	int32_t compare(size_t pos1, size_t count1, const string_view& str, size_t pos2, size_t count2 = npos) const;
 	bool operator==(const string_view& rhs) const;
 	bool operator!=(const string_view& rhs) const;
 	bool operator<(const string_view& rhs) const;
 	bool operator<=(const string_view& rhs) const;
 	bool operator>(const string_view& rhs) const;
 	bool operator>=(const string_view& rhs) const;
 	friend bool operator==(const istring& lhs, const istring& rhs);
 	friend bool operator!=(const istring& lhs, const istring& rhs);
 	friend bool operator<(const istring& lhs, const istring& rhs);
--- a/include/mtl/target.hpp
+++ b/include/mtl/target.hpp
@ -0,0 +1,48 @@
 #pragma once
 #ifndef __GNUC__
 #error Failed to create target macros. Compiler is not GCC.
 #endif
 #define NOINLINE [[gnu::noinline]]
 #define ALWAYS_INLINE [[gnu::always_inline]]
 #ifdef __arm__
 #define TARGET_ARM_MODE _Pragma("GCC push_options") _Pragma("GCC target(\"arm\")")
 #define TARGET_THUMB_MODE _Pragma("GCC push_options") _Pragma("GCC target(\"thumb\")")
 #define TARGET_END_MODE _Pragma("GCC pop_options")
 #define ARM_MODE [[gnu::target("arm")]]
 #define THUMB_MODE [[gnu::target("thumb")]]
 #else
 #define TARGET_ARM_MODE
 #define TARGET_THUMB_MODE
 #define TARGET_END_MODE
 #define ARM_MODE
 #define THUMB_MODE
 #endif
 #ifdef __GBA__
 // If a section attribute is used, the symbol will be placed in the section
 // exactly as specified. This means that when -ffunction-sections is used, functions
 // will no longer be placed in separate sections. This macro is used to mimic
 // the effect of -ffunction-sections while still placing the function in IWRAM.
 #define GBA_IWRAM_FUNC(f) [[gnu::section(".iwram." #f), gnu::long_call]]
 #define GBA_IWRAM [[gnu::section(".iwram"), gnu::long_call]]
 #define GBA_EWRAM [[gnu::section(".ewram"), gnu::long_call]]
 #define GBA_IWRAM_DATA [[gnu::section(".iwram")]]
 #define GBA_EWRAM_DATA [[gnu::section(".ewram")]]
 #else
 #define GBA_IWRAM
 #define GBA_EWRAM
 #define GBA_IWRAM_DATA
 #define GBA_EWRAM_DATA
 #endif
--- a/include/mtl/vec.hpp
+++ b/include/mtl/vec.hpp
@ -0,0 +1,233 @@
 #pragma once
 #include "mtl/target.hpp"
 #include <cstddef>
 #include "mtl/fixed.hpp"
 namespace mtl {
 template <size_t M, size_t N>
 class mat;
 template <size_t N>
 class vec {
 public:
 	fixed e[N];
 	constexpr vec() noexcept {}
 	constexpr vec(const vec<N>& other) noexcept {
 		// We need to explicitly define the copy constructor, otherwise
 		// GCC uses memcpy to copy while in Thumb mode, and that's slow.
 #pragma GCC unroll 4 // Force unroll loops. Can't use pragmas or attributes
 		     // because they don't work for inlined functions. Requires
 		     // GCC 8.1
 		for (size_t i = 0; i < N; ++i) {
 			e[i] = other.e[i];
 		}
 	}
 	constexpr vec(const fixed (&_e)[N]) noexcept {
 #pragma GCC unroll 4
 		for (size_t i = 0; i < N; ++i) {
 			e[i] = _e[i];
 		}
 	}
 	constexpr fixed& operator[](size_t i) noexcept {
 		return e[i];
 	}
 	constexpr const fixed& operator[](size_t i) const noexcept {
 		return e[i];
 	}
 	vec<N> operator+(const vec<N>& rhs) const noexcept {
 		vec<N> res;
 #pragma GCC unroll 4
 		for (size_t i = 0; i < N; ++i) {
 			res[i] = e[i] + rhs[i];
 		}
 		return res;
 	}
 	vec<N> operator-(const vec<N>& rhs) const noexcept {
 		vec<N> res;
 #pragma GCC unroll 4
 		for (size_t i = 0; i < N; ++i) {
 			res[i] = e[i] - rhs[i];
 		}
 		return res;
 	}
 	vec<N> operator-() const noexcept {
 		vec<N> res;
 #pragma GCC unroll 4
 		for (size_t i = 0; i < N; ++i) {
 			res[i] = -e[i];
 		}
 		return res;
 	}
 	vec<N> operator*(fixed rhs) const noexcept {
 		vec<N> res;
 #pragma GCC unroll 4
 		for (size_t i = 0; i < N; ++i) {
 			res[i] = e[i] * rhs;
 		}
 		return res;
 	}
 	friend vec<N> operator*(fixed lhs, vec<N> rhs) noexcept {
 		return rhs * lhs;
 	}
 	fixed operator*(const vec<N>& rhs) const noexcept {
 		fixed res;
 #pragma GCC unroll 4
 		for (size_t i = 0; i < N; ++i) {
 			res += e[i] * rhs[i];
 		}
 		return res;
 	}
 	vec<N> operator/(fixed rhs) const noexcept {
 		vec<N> r;
 #pragma GCC unroll 4
 		for (size_t i = 0; i < N; ++i) {
 			r[i] = e[i] / rhs;
 		}
 		return r;
 	}
 	fixed magnitude_sqr() const noexcept {
 		fixed r;
 #pragma GCC unroll 4
 		for (size_t i = 0; i < N; ++i) {
 			r += e[i] * e[i];
 		}
 		return r;
 	}
 	mat<1, N> transpose() const noexcept {
 		mat<1, N> r;
 #pragma GCC unroll 4
 		for (size_t i = 0; i < N; ++i) {
 			r.e[0][i] = e[i];
 		}
 		return r;
 	}
 	bool operator==(const vec<N>& rhs) noexcept {
 #pragma GCC unroll 4
 		for (size_t i = 0; i < N; ++i) {
 			if (e[i] != rhs.e[i]) {
 				return false;
 			}
 		}
 		return true;
 	}
 	bool operator!=(const vec<N>& rhs) noexcept {
 		return !(*this == rhs);
 	}
 };
 class vec2 : public vec<2> {
 public:
 	fixed& x = e[0];
 	fixed& y = e[1];
 	constexpr vec2() noexcept {}
 	constexpr vec2(const vec<2>& other) noexcept : vec(other) {}
 	constexpr vec2(fixed _x, fixed _y) noexcept {
 		x = _x;
 		y = _y;
 	}
 	constexpr vec2& operator=(const vec2& other) noexcept {
 		vec2::operator=(other);
 		return *this;
 	}
 	constexpr vec2& operator=(const vec<2>& other) noexcept {
 		vec::operator=(other);
 		return *this;
 	}
 };
 class vec3 : public vec<3> {
 public:
 	fixed& x = e[0];
 	fixed& y = e[1];
 	fixed& z = e[2];
 	constexpr vec3() noexcept {}
 	constexpr vec3(const vec<3>& other) noexcept : vec(other) {}
 	constexpr vec3(fixed _x, fixed _y, fixed _z) noexcept {
 		x = _x;
 		y = _y;
 		z = _z;
 	}
 	constexpr vec3& operator=(const vec<3>& other) noexcept {
 		vec::operator=(other);
 		return *this;
 	}
 };
 class vec4 : public vec<4> {
 public:
 	fixed& x = e[0];
 	fixed& y = e[1];
 	fixed& z = e[2];
 	fixed& w = e[3];
 	constexpr vec4() noexcept {}
 	constexpr vec4(const vec<4>& other) noexcept : vec(other) {}
 	constexpr vec4(fixed _x, fixed _y, fixed _z, fixed _w) noexcept {
 		x = _x;
 		y = _y;
 		z = _z;
 		w = _w;
 	}
 	constexpr vec4& operator=(const vec<4>& other) noexcept {
 		vec::operator=(other);
 		return *this;
 	}
 };
 template <typename STREAM_TYPE, size_t N>
 STREAM_TYPE& operator<<(STREAM_TYPE& lhs, const vec<N>& rhs) {
 	lhs << '<';
 	for (size_t i = 0; i < N; ++i) {
 		lhs << rhs.e[i];
 		if (i < N - 1) {
 			lhs << ", ";
 		}
 	}
 	lhs << '>';
 	return lhs;
 }
 } // namespace mtl
--- a/include/mtl/vector.hpp
+++ b/include/mtl/vector.hpp
@ -0,0 +1,314 @@
 #pragma once
 #include <cstddef>
 #include <iterator>
 #include <mtl/exception.hpp>
 namespace mtl {
 /**
 * \brief Resizable statically allocated array
 *
 * Implements most functions from std::vector, see std::vector documentation
 * for function descriptions. Some operations are unimplemented due to the lack
 * of dynamic memory usage
 */
 template <typename T>
 class ivector {
 private:
 	T* m_arr;
 	const size_t m_capacity;
 	size_t m_size;
 public:
 	using value_type = T;
 	using size_type = size_t;
 	using difference_type = ptrdiff_t;
 	using reference = T&;
 	using const_reference = const T&;
 	using pointer = T*;
 	using const_pointer = const T*;
 	using iterator = T*;
 	using const_iterator = const T*;
 	using reverse_iterator = std::reverse_iterator<iterator>;
 	using const_reverse_iterator = std::reverse_iterator<const_iterator>;
 	ivector(T* arr, size_t capacity) : m_arr(arr), m_capacity(capacity), m_size(0) {}
 	/**
 	 * \brief Copy constructor
 	 *
 	 * \exception mtl::length_error Thrown if the source vector's size is
 	 * larger than the new capacity
 	 */
 	ivector(T* arr, size_t capacity, const ivector& other) : m_arr(arr), m_capacity(capacity) {
 		if (&other == this) {
 			return;
 		}
 		if (other.m_size > m_capacity) {
 			throw mtl::length_error();
 		}
 		T* x = m_arr;
 		T* begin = other.m_arr;
 		T* end = other.m_arr + other.m_size;
 		for (; begin != end; ++x, ++begin) {
 			*x = *begin;
 		}
 		m_size = other.m_size;
 	}
 	/**
 	 * \brief Move constructor
 	 *
 	 * \exception mtl::length_error Thrown if the source vector's size is
 	 * larger than the new capacity
 	 */
 	ivector(ivector&& other) {
 		if (&other == this) {
 			return;
 		}
 		if (other.m_size > m_capacity) {
 			throw mtl::length_error();
 		}
 		// We're not able to simply swap pointers because dynamic memory
 		// is not used. However, we can still swap each individual
 		// element if possible.
 		T* x = m_arr;
 		T* begin = other.m_arr;
 		T* end = other.m_arr + other.m_size;
 		for (; begin != end; ++x, ++begin) {
 			*x = std::move(*begin);
 		}
 		m_size = other.m_size;
 	}
 	~ivector() {}
 	ivector& operator=(ivector rhs) {
 		throw mtl::system_error();
 		return *this;
 	}
 	void assign(size_t count, const T& value) {
 		throw mtl::system_error();
 	}
 	/**
 	 * \brief Assigns values to the vector
 	 *
 	 * \exception mtl::length_error Thrown if the number of source elements
 	 * is larger than the capacity
 	 */
 	template <typename It>
 	void assign(It begin, It end) {
 		m_size = 0;
 		T* x = m_arr;
 		for (; begin != end; ++begin, ++x, ++m_size) {
 			if (m_size > m_capacity) {
 				//throw mtl::length_error();
 			}
 			*x = *begin;
 		}
 	}
 	/**
 	 * \brief Bounds-checked element access
 	 *
 	 * \exception mtl::out_of_range Thrown if pos > size
 	 */
 	reference at(size_t pos) {
 		if (pos > m_size) {
 			throw mtl::out_of_range();
 		}
 		return m_arr[pos];
 	}
 	const_reference at(size_t pos) const {
 		if (pos > m_size) {
 			throw mtl::out_of_range();
 		}
 		return m_arr[pos];
 	}
 	reference operator[](size_t pos) {
 		return m_arr[pos];
 	}
 	const_reference operator[](size_t pos) const {
 		return m_arr[pos];
 	}
 	reference front() {
 		return m_arr[0];
 	}
 	const_reference front() const {
 		return m_arr[0];
 	}
 	reference back() {
 		return m_arr[m_size - 1];
 	}
 	const_reference back() const {
 		return m_arr[m_size - 1];
 	}
 	pointer data() {
 		return m_arr;
 	}
 	const_pointer data() const {
 		return m_arr;
 	}
 	iterator begin() {
 		return m_arr;
 	}
 	const_iterator begin() const {
 		return m_arr;
 	}
 	const_iterator cbegin() const {
 		return m_arr;
 	}
 	iterator end() {
 		return m_arr + m_size;
 	}
 	const_iterator end() const {
 		return m_arr + m_size;
 	}
 	const_iterator cend() const {
 		return m_arr + m_size;
 	}
 	reverse_iterator rbegin() {
 		return reverse_iterator(end());
 	}
 	const_reverse_iterator rbegin() const {
 		return const_reverse_iterator(cend());
 	}
 	const_reverse_iterator crbegin() const {
 		return const_reverse_iterator(cend());
 	}
 	reverse_iterator rend() {
 		return  reverse_iterator(begin());
 	}
 	const_reverse_iterator rend() const {
 		return  const_reverse_iterator(cbegin());
 	}
 	const_reverse_iterator crend() const {
 		return  const_reverse_iterator(cbegin());
 	}
 	bool empty() const {
 		return m_size == 0;
 	}
 	size_t size() const {
 		return m_size;
 	}
 	size_t capacity() const {
 		return m_capacity;
 	}
 	void clear() {
 		m_size = 0;
 	}
 	iterator insert(const_iterator pos, T value) {
 		if (m_size == m_capacity) {
 			throw mtl::length_error();
 		}
 		iterator it = end();
 		while (it != pos) {
 			*it = *(it - 1);
 			--it;
 		}
 		*it = std::move(value);
 		++m_size;
 		return it;
 	}
 	/**
 	 * \brief Erase the element at the given iterator
 	 *
 	 * \param pos iterator of the element to erase
 	 * \exception nothrow if `T` is nothrow assignable
 	 * \returns iterator of the element after element erased
 	 */
 	iterator erase(const_iterator pos) noexcept(std::is_nothrow_assignable<T, T>::value) {
 		iterator it = const_cast<iterator>(pos);
 		iterator last = end() - 1;
 		while (it != last) {
 			*it = *(it + 1);
 			++it;
 		}
 		--m_size;
 		return const_cast<iterator>(pos);
 	}
 	void push_back(T value) {
 		if (m_size == m_capacity) {
 			throw mtl::length_error();
 		}
 		m_arr[m_size] = std::move(value);
 		++m_size;
 	}
 	void pop_back() noexcept {
 		if (m_size > 0) {
 			--m_size;
 		}
 	}
 };
 template <typename T, size_t C>
 class vector : public ivector<T> {
 private:
 	T m_arr[C];
 public:
 	vector() : ivector<T>(m_arr, C) { }
 	vector(const ivector<T>& other) {
 		if (other.size() > C) {
 			return;
 		}
 		this->assign(other.begin(), other.end());
 	}
 	vector(const vector<T, C>& other) : ivector<T>(m_arr, C) {
 		if (other.size() > C) {
 			return;
 		}
 		this->assign(other.begin(), other.end());
 	}
 	vector<T, C>& operator=(const ivector<T>& rhs) {
 		this->assign(rhs.begin(), rhs.end());
 		return *this;
 	}
 	// TODO: FIX BAD ASSIGNMENT OPERATORS
 	vector<T, C>& operator=(const vector<T, C>& rhs) {
 		this->assign(rhs.begin(), rhs.end());
 		return *this;
 	}
 };
 } // namespace mtl
--- a/include/test/mtl/test.hpp
+++ b/include/test/mtl/test.hpp
@ -0,0 +1,112 @@
 #pragma once
 #ifdef __GBA__
 #ifndef REG_TM2D
 #define REG_TM2D *(volatile uint16_t*)(0x04000108)
 #endif
 #ifndef REG_TM2CNT
 #define REG_TM2CNT *(volatile uint16_t*)(0x0400010A)
 #endif
 #ifndef TM_ENABLE
 #define TM_ENABLE 0x80
 #endif
 #endif
 #include "mtl/log.hpp"
 #include "mtl/string_view.hpp"
 #include "mtl/vector.hpp"
 namespace mtl {
 namespace test {
 template <typename SUITE_TYPE>
 class suite {
 public:
 	using TEST_TYPE = bool (*)();
 	vector<TEST_TYPE, 256> m_tests;
 	vector<string_view, 256> m_test_names;
 	virtual string_view name() = 0;
 	void add_test(TEST_TYPE test, const string_view& name) {
 		m_tests.push_back(test);
 		m_test_names.push_back(name);
 	}
 	static void reset_timer() {
 #ifdef __GBA__
 		REG_TM2D = UINT16_MAX;
 		// We must enable and disable the timer to write UINT16_MAX
 		// to the timer register
 		REG_TM2CNT = TM_ENABLE;
 		REG_TM2CNT &= ~TM_ENABLE;
 #endif
 	}
 	static void start_timer() {
 #ifdef __GBA__
 		REG_TM2D = 0;
 		REG_TM2CNT = TM_ENABLE;
 #endif
 	}
 	static void end_timer() {
 #ifdef __GBA__
 		REG_TM2CNT &= ~TM_ENABLE;
 #endif
 	}
 	static uint16_t query_timer() {
 #ifdef __GBA__
 		return REG_TM2D;
 #endif
 		return UINT16_MAX;
 	}
 	static bool run_tests() {
 		log::info << "=========================" << endl;
 		log::info << "Running suite \"" << instance().name() << '\"' << endl;
 		log::info << endl;
 		size_t num_ok = 0;
 		size_t num_fail = 0;
 		for (size_t i = 0; i < instance().m_tests.size(); ++i) {
 			TEST_TYPE test = instance().m_tests[i];
 			string_view name = instance().m_test_names[i];
 			log::info << "Running \"" << name << "\"..." << endl;
 			reset_timer();
 			bool result = test();
 			log::info << (result ? "OK" : "FAILED") << ", TIME: " << query_timer()  << endl;
 			if (result) { ++num_ok; }
 			else { ++num_fail; }
 		}
 		log::info << endl;
 		log::info << "Finished. Tests OK: " << num_ok << '/' << num_ok + num_fail << endl;
 		log::info << (num_fail == 0 ? "ALL OK" : "FAILED") << endl;
 		log::info << "=========================" << endl;
 		return num_fail == 0;
 	}
 	static SUITE_TYPE& instance() {
 		static SUITE_TYPE instance;
 		return instance;
 	}
 };
 } // namespace test
 } // namespace mtl
--- a/include/test/mtl/test/fixed.hpp
+++ b/include/test/mtl/test/fixed.hpp
@ -0,0 +1,44 @@
 #pragma once
 #include "mtl/test.hpp"
 #include "mtl/fixed.hpp"
 namespace mtl {
 namespace test {
 class fixed_suite : public suite<fixed_suite> {
 public:
 	fixed_suite() {
 		add_test(&construction, "construction");
 		add_test(&addition, "addition");
 		add_test(&subtraction, "subtraction");
 		add_test(&multiplication, "multiplication");
 		add_test(&mult_overflow, "mult_overflow");
 		add_test(&division, "division");
 	}
 	virtual string_view name() {
 		return "fixed_suite";
 	}
 	static bool construction() {
 		start_timer();
 		bool r = fixed(7).raw() == 7 * 64;
 		end_timer();
 		return r;
 	}
 	// All tests are placed in IWRAM in ARM mode.
 	static bool addition();
 	static bool subtraction();
 	static bool multiplication();
 	static bool mult_overflow();
 	static bool division();
 };
 } // namespace test
 } // namespace mtl
--- a/include/test/mtl/test/mat.hpp
+++ b/include/test/mtl/test/mat.hpp
@ -0,0 +1,64 @@
 #pragma once
 #include "mtl/test.hpp"
 #include "mtl/target.hpp"
 namespace mtl {
 namespace test {
 class mat_suite : public suite<mat_suite> {
 public:
 	mat_suite() {
 		add_test(&construction_m2, "construction_m2");
 		add_test(&addition_m2, "addition_m2");
 		add_test(&addition_m3, "addition_m3");
 		add_test(&addition_m4, "addition_m4");
 		add_test(&subtraction_m2, "subtraction_m2");
 		add_test(&subtraction_m3, "subtraction_m3");
 		add_test(&subtraction_m4, "subtraction_m4");
 		add_test(&mult_vec_m2, "mult_vec_m2");
 		add_test(&mult_vec_m3, "mult_vec_m3");
 		add_test(&mult_vec_m4, "mult_vec_m4");
 		add_test(&mult_mat_m2, "mult_mat_m2");
 		add_test(&mult_mat_m3, "mult_mat_m3");
 		add_test(&mult_mat_m4, "mult_mat_m4");
 		add_test(&projection_build_m4, "projection_build_m4");
 		add_test(&projection_calc_m4, "projection_calc_m4");
 	}
 	virtual string_view name() {
 		return "mat_suite";
 	}
 	static bool construction_m2();
 	static bool addition_m2();
 	static bool addition_m3();
 	static bool addition_m4();
 	static bool subtraction_m2();
 	static bool subtraction_m3();
 	static bool subtraction_m4();
 	static bool mult_vec_m2();
 	static bool mult_vec_m3();
 	static bool mult_vec_m4();
 	static bool mult_mat_m2();
 	static bool mult_mat_m3();
 	static bool mult_mat_m4();
 	static bool projection_build_m4();
 	static bool projection_calc_m4();
 };
 } // namespace test
 } // namespace mtl
--- a/include/test/mtl/test/vec.hpp
+++ b/include/test/mtl/test/vec.hpp
@ -0,0 +1,104 @@
 #pragma once
 #include "mtl/test.hpp"
 #include "mtl/vec.hpp"
 namespace mtl {
 namespace test {
 class vec_suite : public suite<vec_suite> {
 public:
 	vec_suite() {
 		add_test(&construction_v2, "construction");
 		add_test(&construction_v3, "construction");
 		add_test(&construction_v4, "construction");
 		add_test(&addition_v2, "addition_v2");
 		add_test(&addition_v3, "addition_v3");
 		add_test(&addition_v4, "addition_v4");
 		add_test(&subtraction_v2, "subtraction_v2");
 		add_test(&subtraction_v3, "subtraction_v3");
 		add_test(&subtraction_v4, "subtraction_v4");
 		add_test(&negation_v2, "negation_v2");
 		add_test(&negation_v3, "negation_v3");
 		add_test(&negation_v4, "negation_v4");
 		add_test(&mult_scalar_v2, "mult_scalar_v2");
 		add_test(&mult_scalar_v3, "mult_scalar_v3");
 		add_test(&mult_scalar_v4, "mult_scalar_v4");
 		add_test(&dot_v2, "dot_v2");
 		add_test(&dot_v3, "dot_v3");
 		add_test(&dot_v4, "dot_v4");
 		add_test(&division_scalar_v2, "division_scalar_v2");
 		add_test(&division_scalar_v3, "division_scalar_v3");
 		add_test(&division_scalar_v4, "division_scalar_v4");
 		add_test(&magnitude_sqr_v2, "magnitude_sqr_v2");
 		add_test(&magnitude_sqr_v3, "magnitude_sqr_v3");
 		add_test(&magnitude_sqr_v4, "magnitude_sqr_v4");
 		add_test(&transpose_v2, "transpose_v2");
 		add_test(&transpose_v3, "transpose_v3");
 		add_test(&transpose_v4, "transpose_v4");
 		add_test(&projection_v4, "projection_v4");
 	}
 	virtual string_view name() {
 		return "vec_suite";
 	}
 	static bool construction_v2() {
 		vec2 a(1, 2);
 		return a.x == 1 && a.y == 2;
 	}
 	static bool construction_v3() {
 		vec3 a(1, 2, 3);
 		return a.x == 1 && a.y == 2 && a.z == 3;
 	}
 	static bool construction_v4() {
 		vec4 a(1, 2, 3, 4);
 		return a.x == 1 && a.y == 2 && a.z == 3 && a.w == 4;
 	}
 	static bool addition_v2();
 	static bool addition_v3();
 	static bool addition_v4();
 	static bool subtraction_v2();
 	static bool subtraction_v3();
 	static bool subtraction_v4();
 	static bool negation_v2();
 	static bool negation_v3();
 	static bool negation_v4();
 	static bool mult_scalar_v2();
 	static bool mult_scalar_v3();
 	static bool mult_scalar_v4();
 	static bool dot_v2();
 	static bool dot_v3();
 	static bool dot_v4();
 	static bool division_scalar_v2();
 	static bool division_scalar_v3();
 	static bool division_scalar_v4();
 	static bool magnitude_sqr_v2();
 	static bool magnitude_sqr_v3();
 	static bool magnitude_sqr_v4();
 	static bool transpose_v2();
 	static bool transpose_v3();
 	static bool transpose_v4();
 	static bool projection_v4();
 };
 } // namespace test
 } // namespace mtl
--- a/src/armv4t/fixed.s
+++ b/src/armv4t/fixed.s
@ -1,11 +0,0 @@
 .section .iwram, "ax", %progbits
 .arm
 .align 2
 .global mtl_fixed_mul
 .type mtl_fixed_mul STT_FUNC
 mtl_fixed_mul:
 smull	r2, r3, r0, r1
 lsr	r0, r2, #6
 orr	r0, r3, lsl #26
 bx	lr
--- a/src/common/log.cpp
+++ b/src/common/log.cpp
--- a/src/common/mat_1x2.cpp
+++ b/src/common/mat_1x2.cpp
@ -0,0 +1,8 @@
 #include "mtl/mat_impl.hpp"
 namespace mtl {
 template class mat<1, 2>;
 }
--- a/src/common/mat_1x3.cpp
+++ b/src/common/mat_1x3.cpp
@ -0,0 +1,8 @@
 #include "mtl/mat_impl.hpp"
 namespace mtl {
 template class mat<1, 3>;
 }
--- a/src/common/mat_1x4.cpp
+++ b/src/common/mat_1x4.cpp
@ -0,0 +1,8 @@
 #include "mtl/mat_impl.hpp"
 namespace mtl {
 template class mat<1, 4>;
 }
--- a/src/common/mat_2x2.cpp
+++ b/src/common/mat_2x2.cpp
@ -0,0 +1,11 @@
 #include "mtl/mat_impl.hpp"
 namespace mtl {
 template class mat<2, 2>;
 template mat<2, 2> operator*(const vec<2>&, const mat<1, 2>&);
 template mat<2, 2> mat<2, 2>::operator*(const mat<2, 2>&) const;
 }
--- a/src/common/mat_3x3.cpp
+++ b/src/common/mat_3x3.cpp
@ -0,0 +1,11 @@
 #include "mtl/mat_impl.hpp"
 namespace mtl {
 template class mat<3, 3>;
 template mat<3, 3> operator*(const vec<3>&, const mat<1, 3>&);
 template mat<3, 3> mat<3, 3>::operator*(const mat<3, 3>&) const;
 }
--- a/src/common/mat_4x4.cpp
+++ b/src/common/mat_4x4.cpp
@ -0,0 +1,12 @@
 #include "mtl/mat_impl.hpp"
 namespace mtl {
 template class mat<4, 4>;
 template mat<4, 4> operator*(const vec<4>&, const mat<1, 4>&);
 template mat<4, 4> mat<4, 4>::operator*(const mat<4, 4>&) const;
 template mat<4, 4> mat<4, 4>::translation<4, true>(const vec<3>&);
 }
--- a/src/common/string.cpp
+++ b/src/common/string.cpp
@ -2,6 +2,7 @@
 #include <cstddef>
 #include <cstring>
 #include <algorithm>
 #include "mtl/utility.hpp"
 #include "mtl/string_view.hpp"
--- a/src/common/string_view.cpp
+++ b/src/common/string_view.cpp
@ -18,4 +18,11 @@ size_t string_view::copy(char* dest, size_t count, size_t pos) const {
 	return count;
 }
 bool string_view::operator==(const string_view& rhs) const {
 	return strcmp(m_str, rhs.m_str) == 0;
 }
 bool string_view::operator!=(const string_view& rhs) const {
 	return strcmp(m_str, rhs.m_str) != 0;
 }
 } // namespace mtl
--- a/src/gba/fixed.cpp
+++ b/src/gba/fixed.cpp
@ -0,0 +1,55 @@
 #include "mtl/fixed.hpp"
 namespace mtl {
 fixed fixed::operator/(fixed rhs) const noexcept {
 	int32_t raw_result;
 	asm(
 			// This division implementation has two methods it can use.
 			// The fastest uses a left shift followed by a single division. The value is shifted
 			// first to preserve the decimal part. Unfortunately, this means large numerators
 			// will cause the operation to overflow. In this case, a compatible method will be
 			// used. This method uses two divisions, one to calculate the integral quotient,
 			// and one to calculate the decimal part. Both these methods work for negative numbers as well.
 			".arm;"
 			"movs	r1, %[d];"            // Load numerator and denominator, and check if negative or zero
 			"beq	4f;"
 			"movs	r0, %[n];"
 			"ldr	r3, =#0x7f800000;"    // Load constant to check for overflow
 			"blt	1f;"
 			"tst	r0, r3;"              // Check if the numerator is large enough to overflow from the leftshift
 			"bne	3f;"
 			"b	2f;"
 			"1:"	// check_negative
 			"mvn	r2, r0;"              // Check if the numerator is large enough to overflow from the leftshift
 			"tst	r2, r3;"
 			"bne	3f;"
 			"2:"	// fast_div           // Fast method
 			"lsl	r0, #8;"              // Shift first to avoid truncation
 			"swi	#0x60000;"            // GBA Div syscall
 			"mov	%[res], r0;"
 			"b	5f;"
 			"3:"	// compat_div         // Compatible method
 			"swi	#0x60000;"            // Compute quotient and shift
 			"lsl	r2, r0, #8;" 
 			"mov	r0, r1;"              // Div syscall puts the modulus in r1, use it as the numerator
 			"lsr	r1, %[d], #8;"        // Load the denominator again, shifted right to calculate decimal part
 			"swi	#0x60000;"
 			"mov	%[res], r2;"          // Calculate the final result
 			"add	%[res], r0;"
 			"b	5f;"
 			"4:"	// zero_div
 			"teq	%[n], %[d];"          // Set result to largest possible negative/positive value.
 			"movmi	%[res], #0x80000000;"
 			"movpl	%[res], #0x7FFFFFFF;"
 			"5:"
 			: [res] "=r" (raw_result)
 			: [n] "r" (x),
 			[d] "r" (rhs.x)
 			   :  "r0", "r1", "r2", "r3"
 				   );
 	return from_raw(raw_result);
 }
 } // namespace mtl
--- a/src/test/fixed.cpp
+++ b/src/test/fixed.cpp
@ -0,0 +1,84 @@
 #include "mtl/test/fixed.hpp"
 #include "mtl/target.hpp"
 #include "mtl/fixed.hpp"
 TARGET_ARM_MODE
 namespace mtl {
 namespace test{
 bool fixed_suite::addition() {
 	struct {
 		NOINLINE fixed operator()(fixed x, fixed y) {
 			start_timer();
 			fixed r = x + y;
 			end_timer();
 			return r;
 		}
 	} f;
 	return f(3, 4) == fixed(7);
 }
 bool fixed_suite::subtraction() {
 	struct {
 		NOINLINE fixed operator()(fixed x, fixed y) {
 			start_timer();
 			fixed r = x - y;
 			end_timer();
 			return r;
 		}
 	} f;
 	return f(8, 3) == fixed(5);
 }
 bool fixed_suite::multiplication() {
 	struct {
 		NOINLINE fixed operator()(fixed x, fixed y) {
 			start_timer();
 			fixed r = x * y;
 			end_timer();
 			return r;
 		}
 	} f;
 	return f(3, 4) == fixed(12);
 }
 bool fixed_suite::mult_overflow() {
 	// This tests fixed point multiplication where the intermediate result
 	// (x.raw * y.raw) overflows int32 max still works. (IE. int64 multiplication
 	// is used internally)
 	struct {
 		NOINLINE fixed operator()(fixed x, fixed y) {
 			start_timer();
 			fixed r = x * y;
 			end_timer();
 			return r;
 		}
 	} f;
 	return f(1048575, 8) == fixed(8388600);
 }
 bool fixed_suite::division() {
 	struct {
 		NOINLINE fixed operator()(fixed x, fixed y) {
 			start_timer();
 			fixed r = x / y;
 			end_timer();
 			return r;
 		}
 	} f;
 	return f(73, 13) == fixed(73.0 / 13.0);
 }
 }
 }
 TARGET_END_MODE
--- a/src/test/mat.cpp
+++ b/src/test/mat.cpp
@ -0,0 +1,446 @@
 #include "mtl/test/mat.hpp"
 #include "mtl/target.hpp"
 #include "mtl/mat.hpp"
 #include "mtl/vec.hpp"
 namespace mtl {
 namespace test {
 bool mat_suite::construction_m2() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		mat<2, 2> operator()(fixed a, fixed b, fixed c, fixed d) {
 			start_timer();
 			mat<2, 2> r({
 					{ a, b },
 					{ c, d },
 					});
 			end_timer();
 			return r;
 		}
 	} f;
 	mat<2, 2> a = f(1, 2, 3, 4);
 	return a.row(0)[0] == 1 && a.row(0)[1] == 2 &&
 	       a.row(1)[0] == 3 && a.row(1)[1] == 4;
 }
 bool mat_suite::addition_m2() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		mat<2, 2> operator()(mat<2, 2> a, mat<2, 2> b) {
 			start_timer();
 			mat<2, 2> r = a + b;
 			end_timer();
 			return r;
 		}
 	} f;
 	mat<2, 2> a({
 			{ 1, 2 },
 			{ 3, 4 },
 			});
 	mat<2, 2> b({
 			{ 10, 20 },
 			{ 30, 40 },
 			});
 	mat<2, 2> c = f(a, b);
 	mat<2, 2> exp({
 			{ 11, 22 },
 			{ 33, 44 },
 			});
 	return c == exp;
 }
 bool mat_suite::addition_m3() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		mat<3, 3> operator()(mat<3, 3> a, mat<3, 3> b) {
 			start_timer();
 			mat<3, 3> r = a + b;
 			end_timer();
 			return r;
 		}
 	} f;
 	mat<3, 3> a({
 			{ 1, 2, 3 },
 			{ 4, 5, 6 },
 			{ 7, 8, 9 },
 			});
 	mat<3, 3> b({
 			{ 10, 20, 30 },
 			{ 40, 50, 60 },
 			{ 70, 80, 90 },
 			});
 	mat<3, 3> c = f(a, b);
 	mat<3, 3> exp({
 			{ 11, 22, 33 },
 			{ 44, 55, 66 },
 			{ 77, 88, 99 },
 			});
 	return c == exp;
 }
 bool mat_suite::addition_m4() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		mat<4, 4> operator()(mat<4, 4> a, mat<4, 4> b) {
 			start_timer();
 			mat<4, 4> r = a + b;
 			end_timer();
 			return r;
 		}
 	} f;
 	mat<4, 4> a({
 			{ 1, 2, 3, 4 },
 			{ 5, 6, 7, 8 },
 			{ 9, 1, 2, 3 },
 			{ 4, 5, 6, 7 },
 			});
 	mat<4, 4> b({
 			{ 10, 20, 30, 40 },
 			{ 50, 60, 70, 80 },
 			{ 90, 10, 20, 30 },
 			{ 40, 50, 60, 70 },
 			});
 	mat<4, 4> c = f(a, b);
 	mat<4, 4> exp({
 			{ 11, 22, 33, 44 },
 			{ 55, 66, 77, 88 },
 			{ 99, 11, 22, 33 },
 			{ 44, 55, 66, 77 },
 			});
 	return c == exp;
 }
 bool mat_suite::subtraction_m2() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		mat<2, 2> operator()(mat<2, 2> a, mat<2, 2> b) {
 			start_timer();
 			mat<2, 2> r = a - b;
 			end_timer();
 			return r;
 		}
 	} f;
 	mat<2, 2> a({
 			{ 11, 22 },
 			{ 33, 44 },
 			});
 	mat<2, 2> b({
 			{ 1, 2 },
 			{ 3, 4 },
 			});
 	mat<2, 2> c = f(a, b);
 	mat<2, 2> exp({
 			{ 10, 20 },
 			{ 30, 40 },
 			});
 	return c == exp;
 }
 bool mat_suite::subtraction_m3() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		mat<3, 3> operator()(mat<3, 3> a, mat<3, 3> b) {
 			start_timer();
 			mat<3, 3> r = a - b;
 			end_timer();
 			return r;
 		}
 	} f;
 	mat<3, 3> a({
 			{ 11, 22, 33 },
 			{ 44, 55, 66 },
 			{ 77, 88, 99 },
 			});
 	mat<3, 3> b({
 			{ 1, 2, 3 },
 			{ 4, 5, 6 },
 			{ 7, 8, 9 },
 			});
 	mat<3, 3> c = f(a, b);
 	mat<3, 3> exp({
 			{ 10, 20, 30 },
 			{ 40, 50, 60 },
 			{ 70, 80, 90 },
 			});
 	return c == exp;
 }
 bool mat_suite::subtraction_m4() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		mat<4, 4> operator()(mat<4, 4> a, mat<4, 4> b) {
 			start_timer();
 			mat<4, 4> r = a - b;
 			end_timer();
 			return r;
 		}
 	} f;
 	mat<4, 4> a({
 			{ 11, 22, 33, 44 },
 			{ 55, 66, 77, 88 },
 			{ 99, 11, 22, 33 },
 			{ 44, 55, 66, 77 },
 			});
 	mat<4, 4> b({
 			{ 1, 2, 3, 4 },
 			{ 5, 6, 7, 8 },
 			{ 9, 1, 2, 3 },
 			{ 4, 5, 6, 7 },
 			});
 	mat<4, 4> c = f(a, b);
 	mat<4, 4> exp({
 			{ 10, 20, 30, 40 },
 			{ 50, 60, 70, 80 },
 			{ 90, 10, 20, 30 },
 			{ 40, 50, 60, 70 },
 			});
 	return c == exp;
 }
 bool mat_suite::mult_vec_m2() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		vec2 operator()(mat<2, 2> a, vec2 b) {
 			start_timer();
 			vec2 r = a * b;
 			end_timer();
 			return r;
 		}
 	} f;
 	mat<2, 2> a({
 			{ 1, 2 }, // col 1
 			{ 3, 4 }  // col 2
 			});
 	vec2 b(1, 2);
 	vec2 c = f(a, b);
 	return true;
 }
 bool mat_suite::mult_vec_m3() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		vec3 operator()(mat<3, 3> a, vec3 b) {
 			start_timer();
 			vec3 r = a * b;
 			end_timer();
 			return r;
 		}
 	} f;
 	mat<3, 3> a({
 			{ 1, 2, 3 }, // col 1
 			{ 4, 5, 6 }, // col 2
 			{ 7, 8, 9 }, // col 3
 			});
 	vec3 b(1, 2, 3);
 	f(a, b);
 	return true;
 }
 bool mat_suite::mult_vec_m4() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		vec4 operator()(mat<4, 4> a, vec4 b) {
 			start_timer();
 			vec4 r = a * b;
 			end_timer();
 			return r;
 		}
 	} f;
 	mat<4, 4> a({
 			{ 1, 2, 3, 4 }, // col 1
 			{ 5, 6, 7, 8 }, // col 2
 			{ 9, 1, 2, 3 }, // col 3
 			{ 4, 5, 6, 7 }  // col 4
 			});
 	vec4 b(1, 2, 3, 4);
 	f(a, b);
 	return true;
 }
 bool mat_suite::mult_mat_m2() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		mat<2, 2> operator()(mat<2, 2> a, mat<2, 2> b) {
 			start_timer();
 			mat<2, 2> r = a * b;
 			end_timer();
 			return r;
 		}
 	} f;
 	mat<2, 2> a({
 			{ 1, 2 },
 			{ 3, 4 },
 			});
 	mat<2, 2> b({
 			{ 11, 22 },
 			{ 33, 44 },
 			});
 	mat<2, 2> exp({
 			{ 77, 110 },
 			{ 165, 242 },
 			});
 	mat<2, 2> c = f(a, b);
 	return c == exp;
 }
 bool mat_suite::mult_mat_m3() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		mat<3, 3> operator()(mat<3, 3> a, mat<3, 3> b) {
 			start_timer();
 			mat<3, 3> r = a * b;
 			end_timer();
 			return r;
 		}
 	} f;
 	mat<3, 3> a({
 			{ 1, 2, 3 },
 			{ 4, 5, 6 },
 			{ 7, 8, 9 },
 			});
 	mat<3, 3> b({
 			{ 11, 22, 33 },
 			{ 44, 55, 66 },
 			{ 77, 88, 99 },
 			});
 	mat<3, 3> exp({
 			{ 330, 396, 462 },
 			{ 726, 891, 1056 },
 			{ 1122, 1386, 1650 },
 			});
 	mat<3, 3> c = f(a, b);
 	return c == exp;
 }
 bool mat_suite::mult_mat_m4() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		mat<4, 4> operator()(mat<4, 4> a, mat<4, 4> b) {
 			start_timer();
 			mat<4, 4> r = a * b;
 			end_timer();
 			return r;
 		}
 	} f;
 	mat<4, 4> a({
 			{ 1, 2, 3, 4 },
 			{ 5, 6, 7, 8 },
 			{ 9, 1, 2, 3 },
 			{ 4, 5, 6, 7 },
 			});
 	mat<4, 4> b({
 			{ 11, 22, 33, 44 },
 			{ 55, 66, 77, 88 },
 			{ 99, 11, 22, 33 },
 			{ 44, 55, 66, 77 },
 			});
 	mat<4, 4> exp({
 			{ 594, 407, 517, 627 },
 			{ 1430, 1023, 1309, 1595 },
 			{ 484, 451, 616, 781 },
 			{ 1221, 869, 1111, 1353 },
 			});
 	mat<4, 4> c = f(a, b);
 	return c == exp;
 }
 bool mat_suite::projection_build_m4() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		mat<4, 4> operator()(vec4 b) {
 			start_timer();
 			mat<4, 4> r = b * b.transpose() * (fixed(64) / b.magnitude_sqr()) * fixed(1.0 / 64.0);
 			end_timer();
 			return r;
 		}
 	} f;
 	vec4 b(-3, 4, 14, 0);
 	f(b);
 	return false;
 }
 bool mat_suite::projection_calc_m4() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		vec4 operator()(mat<4, 4> A, vec4 b) {
 			start_timer();
 			vec4 r = A * b;
 			end_timer();
 			return r;
 		}
 	} f;
 	vec4 a(8, 3, 7, 0);
 	vec4 b(-3, 4, 14, 0);
 	mat<4, 4> A = b * b.transpose() * (fixed(64) / b.magnitude_sqr()) * fixed(1.0 / 64.0);
 	vec4 c = f(A, a);
 	vec4 exp(fixed::from_raw(-74), fixed::from_raw(99), fixed::from_raw(348), 0);
 	vec4 exact_value(-1.1674, 1.5566, 5.4480, 0);
 	log::debug << "A = " << a << endl;
 	log::debug << "B = " << b << endl;
 	log::debug << "C = proj(A, B) = " << c << endl;
 	log::debug << "Exact value: " << exact_value << endl;
 	log::debug << "Divergence: " << exact_value - c << endl;
 	return c == exp;
 }
 } // namespace test
 } // namespace mtl
--- a/src/test/vec.cpp
+++ b/src/test/vec.cpp
@ -0,0 +1,373 @@
 #include "mtl/test/vec.hpp"
 #include "mtl/target.hpp"
 #include "mtl/vec.hpp"
 namespace mtl {
 namespace test {
 bool vec_suite::addition_v2() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		vec2 operator()(vec2 a, vec2 b) {
 			start_timer();
 			vec2 r = a + b;
 			end_timer();
 			return r;
 		}
 	} f;
 	vec2 c = f(vec2(1, 2), vec2(10, 20));
 	return c.x == 11 && c.y == 22;
 }
 bool vec_suite::addition_v3() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		vec3 operator()(vec3 a, vec3 b) {
 			start_timer();
 			vec3 r = a + b;
 			end_timer();
 			return r;
 		}
 	} f;
 	vec3 c = f(vec3(1, 2, 3), vec3(10, 20, 30));
 	return c.x == 11 && c.y == 22 && c.z == 33;
 }
 bool vec_suite::addition_v4() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		vec4 operator()(vec4 a, vec4 b) {
 			start_timer();
 			vec4 r = a + b;
 			end_timer();
 			return r;
 		}
 	} f;
 	vec4 c = f(vec4(1, 2, 3, 4), vec4(10, 20, 30, 40));
 	return c.x == 11 && c.y == 22 && c.z == 33 && c.w == 44;
 }
 bool vec_suite::subtraction_v2() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		vec2 operator()(vec2 a, vec2 b) {
 			start_timer();
 			vec2 r = a - b;
 			end_timer();
 			return r;
 		}
 	} f;
 	vec2 c = f(vec2(10, 20), vec2(1, 2));
 	return c.x == 9 && c.y == 18;
 }
 bool vec_suite::subtraction_v3() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		vec3 operator()(vec3 a, vec3 b) {
 			start_timer();
 			vec3 r = a - b;
 			end_timer();
 			return r;
 		}
 	} f;
 	vec3 c = f(vec3(10, 20, 30), vec3(1, 2, 3));
 	return c.x == 9 && c.y == 18 && c.z == 27;
 }
 bool vec_suite::subtraction_v4() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		vec4 operator()(vec4 a, vec4 b) {
 			start_timer();
 			vec4 r = a - b;
 			end_timer();
 			return r;
 		}
 	} f;
 	vec4 c = f(vec4(10, 20, 30, 40), vec4(1, 2, 3, 4));
 	return c.x == 9 && c.y == 18 && c.z == 27 && c.w == 36;
 }
 bool vec_suite::negation_v2() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		vec2 operator()(vec2 a) {
 			start_timer();
 			vec2 r = -a;
 			end_timer();
 			return r;
 		}
 	} f;
 	vec2 c = f(vec2(10, 20));
 	return c.x == -10 && c.y == -20;
 }
 bool vec_suite::negation_v3() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		vec3 operator()(vec3 a) {
 			start_timer();
 			vec3 r = -a;
 			end_timer();
 			return r;
 		}
 	} f;
 	vec3 c = f(vec3(10, 20, 30));
 	return c.x == -10 && c.y == -20 && c.z == -30;
 }
 bool vec_suite::negation_v4() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		vec4 operator()(vec4 a) {
 			start_timer();
 			vec4 r = -a;
 			end_timer();
 			return r;
 		}
 	} f;
 	vec4 c = f(vec4(10, 20, 30, 40));
 	return c.x == -10 && c.y == -20 && c.z == -30 && c.w == -40;
 }
 bool vec_suite::mult_scalar_v2() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		vec2 operator()(vec2 a, fixed x) {
 			start_timer();
 			vec2 r = a * x;
 			end_timer();
 			return r;
 		}
 	} f;
 	vec2 c = f(vec2(10, 20), 10);
 	return c.x == 100 && c.y == 200;
 }
 bool vec_suite::mult_scalar_v3() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		vec3 operator()(vec3 a, fixed x) {
 			start_timer();
 			vec3 r = a * x;
 			end_timer();
 			return r;
 		}
 	} f;
 	vec3 c = f(vec3(10, 20, 30), 10);
 	return c.x == 100 && c.y == 200 && c.z == 300;
 }
 bool vec_suite::mult_scalar_v4() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		vec4 operator()(vec4 a, fixed x) {
 			start_timer();
 			vec4 r = a * x;
 			end_timer();
 			return r;
 		}
 	} f;
 	vec4 c = f(vec4(10, 20, 30, 40), 10);
 	return c.x == 100 && c.y == 200 && c.z == 300 && c.w == 400;
 }
 bool vec_suite::dot_v2() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		fixed operator()(vec2 a, vec2 b) {
 			start_timer();
 			fixed r = a * b;
 			end_timer();
 			return r;
 		}
 	} f;
 	fixed c = f(vec2(1, 2), vec2(1, 10));
 	return c == 21;
 }
 bool vec_suite::dot_v3() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		fixed operator()(vec3 a, vec3 b) {
 			start_timer();
 			fixed r = a * b;
 			end_timer();
 			return r;
 		}
 	} f;
 	fixed c = f(vec3(1, 2, 3), vec3(1, 10, 100));
 	return c == 321;
 }
 bool vec_suite::dot_v4() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		fixed operator()(vec4 a, vec4 b) {
 			start_timer();
 			fixed r = a * b;
 			end_timer();
 			return r;
 		}
 	} f;
 	fixed c = f(vec4(1, 2, 3, 4), vec4(1, 10, 100, 1000));
 	return c == 4321;
 }
 bool vec_suite::division_scalar_v2() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		vec2 operator()(vec2 a, fixed x) {
 			start_timer();
 			vec2 r = a / x;
 			end_timer();
 			return r;
 		}
 	} f;
 	vec2 c = f(vec2(10, 20), 10);
 	return c.x == 1 && c.y == 2;
 }
 bool vec_suite::division_scalar_v3() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		vec3 operator()(vec3 a, fixed x) {
 			start_timer();
 			vec3 r = a / x;
 			end_timer();
 			return r;
 		}
 	} f;
 	vec3 c = f(vec3(10, 20, 30), 10);
 	return c.x == 1 && c.y == 2 && c.z == 3;
 }
 bool vec_suite::division_scalar_v4() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		vec4 operator()(vec4 a, fixed x) {
 			start_timer();
 			vec4 r = a / x;
 			end_timer();
 			return r;
 		}
 	} f;
 	vec4 c = f(vec4(10, 20, 30, 40), 10);
 	return c.x == 1 && c.y == 2 && c.z == 3 && c.w == 4;
 }
 bool vec_suite::magnitude_sqr_v2() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		fixed operator()(vec2 a) {
 			start_timer();
 			fixed r = a.magnitude_sqr();
 			end_timer();
 			return r;
 		}
 	} f;
 	fixed c = f(vec2(1, 2));
 	return c == 5;
 }
 bool vec_suite::magnitude_sqr_v3() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		fixed operator()(vec3 a) {
 			start_timer();
 			fixed r = a.magnitude_sqr();
 			end_timer();
 			return r;
 		}
 	} f;
 	fixed c = f(vec3(1, 2, 3));
 	return c == 14;
 }
 bool vec_suite::magnitude_sqr_v4() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		fixed operator()(vec4 a) {
 			start_timer();
 			fixed r = a.magnitude_sqr();
 			end_timer();
 			return r;
 		}
 	} f;
 	fixed c = f(vec4(1, 2, 3, 4));
 	return c == 30;
 }
 bool vec_suite::transpose_v2() {
 	log::debug << "UNIMPLEMENTED" << endl;
 	return true;
 }
 bool vec_suite::transpose_v3() {
 	log::debug << "UNIMPLEMENTED" << endl;
 	return true;
 }
 bool vec_suite::transpose_v4() {
 	log::debug << "UNIMPLEMENTED" << endl;
 	return true;
 }
 bool vec_suite::projection_v4() {
 	struct {
 		NOINLINE GBA_IWRAM ARM_MODE
 		vec4 operator()(vec4 a, vec4 b) {
 			start_timer();
 			vec4 r = b * (a * b) / b.magnitude_sqr();
 			end_timer();
 			return r;
 		}
 	} f;
 	vec4 a(8, 3, 7, 0);
 	vec4 b(-3, 4, 14, 0);
 	vec4 c = f(a, b);
 	vec4 exp(fixed::from_raw(-74), fixed::from_raw(99), fixed::from_raw(348), 0);
 	log::debug << "A = " << a << endl;
 	log::debug << "B = " << b << endl;
 	log::debug << "C = proj(A, B) = " << c << endl;
 	return c == exp;
 }
 } // namespace test
 } // namespace mtl