From 41ea3b2ee540bbba9cb2551481ef42a9c131089c Mon Sep 17 00:00:00 2001 From: Madeline Busig Date: Thu, 19 Sep 2024 14:43:17 -0700 Subject: [PATCH] Force loop unrolling in vec We can't push/pop optimize options because they don't apply for inlined functions. Function attributes also won't apply for inlined functions. Because most (if not all) vector operations are inlined, neither of these are appropriate options. However, GCC 8.1 introduces a new pragma, unroll, that allows us to unroll specific loops. This pragma does apply for inlined functions. --- include/mtl/vec.hpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/mtl/vec.hpp b/include/mtl/vec.hpp index d499fdb..4568ad0 100644 --- a/include/mtl/vec.hpp +++ b/include/mtl/vec.hpp @@ -21,11 +21,15 @@ public: constexpr vec(const vec& other) noexcept { // We need to explicitly define the copy constructor, otherwise // GCC uses memcpy to copy while in Thumb mode, and that's slow. +#pragma GCC unroll 4 // Force unroll loops. Can't use pragmas or attributes + // because they don't work for inlined functions. Requires + // GCC 8.1 for (size_t i = 0; i < N; ++i) { e[i] = other.e[i]; } } constexpr vec(const fixed (&_e)[N]) noexcept { +#pragma GCC unroll 4 for (size_t i = 0; i < N; ++i) { e[i] = _e[i]; } @@ -41,6 +45,7 @@ public: vec operator+(const vec& rhs) const noexcept { vec res; +#pragma GCC unroll 4 for (size_t i = 0; i < N; ++i) { res[i] = e[i] + rhs[i]; } @@ -51,6 +56,7 @@ public: vec operator-(const vec& rhs) const noexcept { vec res; +#pragma GCC unroll 4 for (size_t i = 0; i < N; ++i) { res[i] = e[i] - rhs[i]; } @@ -61,6 +67,7 @@ public: vec operator-() const noexcept { vec res; +#pragma GCC unroll 4 for (size_t i = 0; i < N; ++i) { res[i] = -e[i]; } @@ -71,6 +78,7 @@ public: vec operator*(fixed rhs) const noexcept { vec res; +#pragma GCC unroll 4 for (size_t i = 0; i < N; ++i) { res[i] = e[i] * rhs; } @@ -84,6 +92,7 @@ public: fixed operator*(const vec& rhs) const noexcept { fixed res; +#pragma GCC unroll 4 for (size_t i = 0; i < N; ++i) { res += e[i] * rhs[i]; } @@ -94,6 +103,7 @@ public: vec operator/(fixed rhs) const noexcept { vec r; +#pragma GCC unroll 4 for (size_t i = 0; i < N; ++i) { r[i] = e[i] / rhs; } @@ -104,6 +114,7 @@ public: fixed magnitude_sqr() const noexcept { fixed r; +#pragma GCC unroll 4 for (size_t i = 0; i < N; ++i) { r += e[i] * e[i]; }