Force loop unrolling in vec

We can't push/pop optimize options because they don't apply for inlined
functions. Function attributes also won't apply for inlined functions.
Because most (if not all) vector operations are inlined, neither of
these are appropriate options. However, GCC 8.1 introduces a new pragma,
unroll, that allows us to unroll specific loops. This pragma does apply
for inlined functions.
This commit is contained in:
Myles Busig 2024-09-19 14:43:17 -07:00
parent 543559d4c1
commit d71be61575

View File

@ -21,11 +21,15 @@ public:
constexpr vec(const vec<N>& other) noexcept { constexpr vec(const vec<N>& other) noexcept {
// We need to explicitly define the copy constructor, otherwise // We need to explicitly define the copy constructor, otherwise
// GCC uses memcpy to copy while in Thumb mode, and that's slow. // GCC uses memcpy to copy while in Thumb mode, and that's slow.
#pragma GCC unroll 4 // Force unroll loops. Can't use pragmas or attributes
// because they don't work for inlined functions. Requires
// GCC 8.1
for (size_t i = 0; i < N; ++i) { for (size_t i = 0; i < N; ++i) {
e[i] = other.e[i]; e[i] = other.e[i];
} }
} }
constexpr vec(const fixed (&_e)[N]) noexcept { constexpr vec(const fixed (&_e)[N]) noexcept {
#pragma GCC unroll 4
for (size_t i = 0; i < N; ++i) { for (size_t i = 0; i < N; ++i) {
e[i] = _e[i]; e[i] = _e[i];
} }
@ -41,6 +45,7 @@ public:
vec<N> operator+(const vec<N>& rhs) const noexcept { vec<N> operator+(const vec<N>& rhs) const noexcept {
vec<N> res; vec<N> res;
#pragma GCC unroll 4
for (size_t i = 0; i < N; ++i) { for (size_t i = 0; i < N; ++i) {
res[i] = e[i] + rhs[i]; res[i] = e[i] + rhs[i];
} }
@ -51,6 +56,7 @@ public:
vec<N> operator-(const vec<N>& rhs) const noexcept { vec<N> operator-(const vec<N>& rhs) const noexcept {
vec<N> res; vec<N> res;
#pragma GCC unroll 4
for (size_t i = 0; i < N; ++i) { for (size_t i = 0; i < N; ++i) {
res[i] = e[i] - rhs[i]; res[i] = e[i] - rhs[i];
} }
@ -61,6 +67,7 @@ public:
vec<N> operator-() const noexcept { vec<N> operator-() const noexcept {
vec<N> res; vec<N> res;
#pragma GCC unroll 4
for (size_t i = 0; i < N; ++i) { for (size_t i = 0; i < N; ++i) {
res[i] = -e[i]; res[i] = -e[i];
} }
@ -71,6 +78,7 @@ public:
vec<N> operator*(fixed rhs) const noexcept { vec<N> operator*(fixed rhs) const noexcept {
vec<N> res; vec<N> res;
#pragma GCC unroll 4
for (size_t i = 0; i < N; ++i) { for (size_t i = 0; i < N; ++i) {
res[i] = e[i] * rhs; res[i] = e[i] * rhs;
} }
@ -84,6 +92,7 @@ public:
fixed operator*(const vec<N>& rhs) const noexcept { fixed operator*(const vec<N>& rhs) const noexcept {
fixed res; fixed res;
#pragma GCC unroll 4
for (size_t i = 0; i < N; ++i) { for (size_t i = 0; i < N; ++i) {
res += e[i] * rhs[i]; res += e[i] * rhs[i];
} }
@ -94,6 +103,7 @@ public:
vec<N> operator/(fixed rhs) const noexcept { vec<N> operator/(fixed rhs) const noexcept {
vec<N> r; vec<N> r;
#pragma GCC unroll 4
for (size_t i = 0; i < N; ++i) { for (size_t i = 0; i < N; ++i) {
r[i] = e[i] / rhs; r[i] = e[i] / rhs;
} }
@ -104,6 +114,7 @@ public:
fixed magnitude_sqr() const noexcept { fixed magnitude_sqr() const noexcept {
fixed r; fixed r;
#pragma GCC unroll 4
for (size_t i = 0; i < N; ++i) { for (size_t i = 0; i < N; ++i) {
r += e[i] * e[i]; r += e[i] * e[i];
} }