Force loop unrolling in vec

We can't push/pop optimize options because they don't apply for inlined functions. Function attributes also won't apply for inlined functions. Because most (if not all) vector operations are inlined, neither of these are appropriate options. However, GCC 8.1 introduces a new pragma, unroll, that allows us to unroll specific loops. This pragma does apply for inlined functions.
2024-09-19 14:43:17 -07:00 · 2024-09-19 14:43:17 -07:00 · d71be61575
commit d71be61575
parent 543559d4c1
1 changed files with 11 additions and 0 deletions
--- a/include/mtl/vec.hpp
+++ b/include/mtl/vec.hpp
@ -21,11 +21,15 @@ public:
 	constexpr vec(const vec<N>& other) noexcept {
 		// We need to explicitly define the copy constructor, otherwise
 		// GCC uses memcpy to copy while in Thumb mode, and that's slow.
 #pragma GCC unroll 4 // Force unroll loops. Can't use pragmas or attributes
 		     // because they don't work for inlined functions. Requires
 		     // GCC 8.1
 		for (size_t i = 0; i < N; ++i) {
 			e[i] = other.e[i];
 		}
 	}
 	constexpr vec(const fixed (&_e)[N]) noexcept {
 #pragma GCC unroll 4
 		for (size_t i = 0; i < N; ++i) {
 			e[i] = _e[i];
 		}
@ -41,6 +45,7 @@ public:
 	vec<N> operator+(const vec<N>& rhs) const noexcept {
 		vec<N> res;
 #pragma GCC unroll 4
 		for (size_t i = 0; i < N; ++i) {
 			res[i] = e[i] + rhs[i];
 		}
@ -51,6 +56,7 @@ public:
 	vec<N> operator-(const vec<N>& rhs) const noexcept {
 		vec<N> res;
 #pragma GCC unroll 4
 		for (size_t i = 0; i < N; ++i) {
 			res[i] = e[i] - rhs[i];
 		}
@ -61,6 +67,7 @@ public:
 	vec<N> operator-() const noexcept {
 		vec<N> res;
 #pragma GCC unroll 4
 		for (size_t i = 0; i < N; ++i) {
 			res[i] = -e[i];
 		}
@ -71,6 +78,7 @@ public:
 	vec<N> operator*(fixed rhs) const noexcept {
 		vec<N> res;
 #pragma GCC unroll 4
 		for (size_t i = 0; i < N; ++i) {
 			res[i] = e[i] * rhs;
 		}
@ -84,6 +92,7 @@ public:
 	fixed operator*(const vec<N>& rhs) const noexcept {
 		fixed res;
 #pragma GCC unroll 4
 		for (size_t i = 0; i < N; ++i) {
 			res += e[i] * rhs[i];
 		}
@ -94,6 +103,7 @@ public:
 	vec<N> operator/(fixed rhs) const noexcept {
 		vec<N> r;
 #pragma GCC unroll 4
 		for (size_t i = 0; i < N; ++i) {
 			r[i] = e[i] / rhs;
 		}
@ -104,6 +114,7 @@ public:
 	fixed magnitude_sqr() const noexcept {
 		fixed r;
 #pragma GCC unroll 4
 		for (size_t i = 0; i < N; ++i) {
 			r += e[i] * e[i];
 		}