Force loop unrolling in vec

We can't push/pop optimize options because they don't apply for inlined
functions. Function attributes also won't apply for inlined functions.
Because most (if not all) vector operations are inlined, neither of
these are appropriate options. However, GCC 8.1 introduces a new pragma,
unroll, that allows us to unroll specific loops. This pragma does apply
for inlined functions.
This commit is contained in:
Madeline Busig 2024-09-19 14:43:17 -07:00
parent 810750febb
commit 41ea3b2ee5

View File

@ -21,11 +21,15 @@ public:
constexpr vec(const vec<N>& other) noexcept {
// We need to explicitly define the copy constructor, otherwise
// GCC uses memcpy to copy while in Thumb mode, and that's slow.
#pragma GCC unroll 4 // Force unroll loops. Can't use pragmas or attributes
// because they don't work for inlined functions. Requires
// GCC 8.1
for (size_t i = 0; i < N; ++i) {
e[i] = other.e[i];
}
}
constexpr vec(const fixed (&_e)[N]) noexcept {
#pragma GCC unroll 4
for (size_t i = 0; i < N; ++i) {
e[i] = _e[i];
}
@ -41,6 +45,7 @@ public:
vec<N> operator+(const vec<N>& rhs) const noexcept {
vec<N> res;
#pragma GCC unroll 4
for (size_t i = 0; i < N; ++i) {
res[i] = e[i] + rhs[i];
}
@ -51,6 +56,7 @@ public:
vec<N> operator-(const vec<N>& rhs) const noexcept {
vec<N> res;
#pragma GCC unroll 4
for (size_t i = 0; i < N; ++i) {
res[i] = e[i] - rhs[i];
}
@ -61,6 +67,7 @@ public:
vec<N> operator-() const noexcept {
vec<N> res;
#pragma GCC unroll 4
for (size_t i = 0; i < N; ++i) {
res[i] = -e[i];
}
@ -71,6 +78,7 @@ public:
vec<N> operator*(fixed rhs) const noexcept {
vec<N> res;
#pragma GCC unroll 4
for (size_t i = 0; i < N; ++i) {
res[i] = e[i] * rhs;
}
@ -84,6 +92,7 @@ public:
fixed operator*(const vec<N>& rhs) const noexcept {
fixed res;
#pragma GCC unroll 4
for (size_t i = 0; i < N; ++i) {
res += e[i] * rhs[i];
}
@ -94,6 +103,7 @@ public:
vec<N> operator/(fixed rhs) const noexcept {
vec<N> r;
#pragma GCC unroll 4
for (size_t i = 0; i < N; ++i) {
r[i] = e[i] / rhs;
}
@ -104,6 +114,7 @@ public:
fixed magnitude_sqr() const noexcept {
fixed r;
#pragma GCC unroll 4
for (size_t i = 0; i < N; ++i) {
r += e[i] * e[i];
}