Force loop unrolling in vec
We can't push/pop optimize options because they don't apply for inlined functions. Function attributes also won't apply for inlined functions. Because most (if not all) vector operations are inlined, neither of these are appropriate options. However, GCC 8.1 introduces a new pragma, unroll, that allows us to unroll specific loops. This pragma does apply for inlined functions.
This commit is contained in:
parent
810750febb
commit
41ea3b2ee5
@ -21,11 +21,15 @@ public:
|
|||||||
constexpr vec(const vec<N>& other) noexcept {
|
constexpr vec(const vec<N>& other) noexcept {
|
||||||
// We need to explicitly define the copy constructor, otherwise
|
// We need to explicitly define the copy constructor, otherwise
|
||||||
// GCC uses memcpy to copy while in Thumb mode, and that's slow.
|
// GCC uses memcpy to copy while in Thumb mode, and that's slow.
|
||||||
|
#pragma GCC unroll 4 // Force unroll loops. Can't use pragmas or attributes
|
||||||
|
// because they don't work for inlined functions. Requires
|
||||||
|
// GCC 8.1
|
||||||
for (size_t i = 0; i < N; ++i) {
|
for (size_t i = 0; i < N; ++i) {
|
||||||
e[i] = other.e[i];
|
e[i] = other.e[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
constexpr vec(const fixed (&_e)[N]) noexcept {
|
constexpr vec(const fixed (&_e)[N]) noexcept {
|
||||||
|
#pragma GCC unroll 4
|
||||||
for (size_t i = 0; i < N; ++i) {
|
for (size_t i = 0; i < N; ++i) {
|
||||||
e[i] = _e[i];
|
e[i] = _e[i];
|
||||||
}
|
}
|
||||||
@ -41,6 +45,7 @@ public:
|
|||||||
vec<N> operator+(const vec<N>& rhs) const noexcept {
|
vec<N> operator+(const vec<N>& rhs) const noexcept {
|
||||||
vec<N> res;
|
vec<N> res;
|
||||||
|
|
||||||
|
#pragma GCC unroll 4
|
||||||
for (size_t i = 0; i < N; ++i) {
|
for (size_t i = 0; i < N; ++i) {
|
||||||
res[i] = e[i] + rhs[i];
|
res[i] = e[i] + rhs[i];
|
||||||
}
|
}
|
||||||
@ -51,6 +56,7 @@ public:
|
|||||||
vec<N> operator-(const vec<N>& rhs) const noexcept {
|
vec<N> operator-(const vec<N>& rhs) const noexcept {
|
||||||
vec<N> res;
|
vec<N> res;
|
||||||
|
|
||||||
|
#pragma GCC unroll 4
|
||||||
for (size_t i = 0; i < N; ++i) {
|
for (size_t i = 0; i < N; ++i) {
|
||||||
res[i] = e[i] - rhs[i];
|
res[i] = e[i] - rhs[i];
|
||||||
}
|
}
|
||||||
@ -61,6 +67,7 @@ public:
|
|||||||
vec<N> operator-() const noexcept {
|
vec<N> operator-() const noexcept {
|
||||||
vec<N> res;
|
vec<N> res;
|
||||||
|
|
||||||
|
#pragma GCC unroll 4
|
||||||
for (size_t i = 0; i < N; ++i) {
|
for (size_t i = 0; i < N; ++i) {
|
||||||
res[i] = -e[i];
|
res[i] = -e[i];
|
||||||
}
|
}
|
||||||
@ -71,6 +78,7 @@ public:
|
|||||||
vec<N> operator*(fixed rhs) const noexcept {
|
vec<N> operator*(fixed rhs) const noexcept {
|
||||||
vec<N> res;
|
vec<N> res;
|
||||||
|
|
||||||
|
#pragma GCC unroll 4
|
||||||
for (size_t i = 0; i < N; ++i) {
|
for (size_t i = 0; i < N; ++i) {
|
||||||
res[i] = e[i] * rhs;
|
res[i] = e[i] * rhs;
|
||||||
}
|
}
|
||||||
@ -84,6 +92,7 @@ public:
|
|||||||
fixed operator*(const vec<N>& rhs) const noexcept {
|
fixed operator*(const vec<N>& rhs) const noexcept {
|
||||||
fixed res;
|
fixed res;
|
||||||
|
|
||||||
|
#pragma GCC unroll 4
|
||||||
for (size_t i = 0; i < N; ++i) {
|
for (size_t i = 0; i < N; ++i) {
|
||||||
res += e[i] * rhs[i];
|
res += e[i] * rhs[i];
|
||||||
}
|
}
|
||||||
@ -94,6 +103,7 @@ public:
|
|||||||
vec<N> operator/(fixed rhs) const noexcept {
|
vec<N> operator/(fixed rhs) const noexcept {
|
||||||
vec<N> r;
|
vec<N> r;
|
||||||
|
|
||||||
|
#pragma GCC unroll 4
|
||||||
for (size_t i = 0; i < N; ++i) {
|
for (size_t i = 0; i < N; ++i) {
|
||||||
r[i] = e[i] / rhs;
|
r[i] = e[i] / rhs;
|
||||||
}
|
}
|
||||||
@ -104,6 +114,7 @@ public:
|
|||||||
fixed magnitude_sqr() const noexcept {
|
fixed magnitude_sqr() const noexcept {
|
||||||
fixed r;
|
fixed r;
|
||||||
|
|
||||||
|
#pragma GCC unroll 4
|
||||||
for (size_t i = 0; i < N; ++i) {
|
for (size_t i = 0; i < N; ++i) {
|
||||||
r += e[i] * e[i];
|
r += e[i] * e[i];
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user