-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathperf.h
84 lines (74 loc) · 2.39 KB
/
perf.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#include "./common.h"
#ifndef SIGNALSMITH_DSP_PERF_H
#define SIGNALSMITH_DSP_PERF_H
#include <complex>
#if defined(__SSE__) || defined(_M_X64)
# include <xmmintrin.h>
#else
# include <cstdint> // for uintptr_t
#endif
namespace signalsmith {
namespace perf {
/** @defgroup Performance Performance helpers
@brief Nothing serious, just some `#defines` and helpers
@{
@file
*/
/// *Really* insist that a function/method is inlined (mostly for performance in DEBUG builds)
#ifndef SIGNALSMITH_INLINE
#ifdef __GNUC__
#define SIGNALSMITH_INLINE __attribute__((always_inline)) inline
#elif defined(__MSVC__)
#define SIGNALSMITH_INLINE __forceinline inline
#else
#define SIGNALSMITH_INLINE inline
#endif
#endif
/** @brief Complex-multiplication (with optional conjugate second-arg), without handling NaN/Infinity
The `std::complex` multiplication has edge-cases around NaNs which slow things down and prevent auto-vectorisation. Flags like `-ffast-math` sort this out anyway, but this helps with Debug builds.
*/
template <bool conjugateSecond=false, typename V>
SIGNALSMITH_INLINE static std::complex<V> mul(const std::complex<V> &a, const std::complex<V> &b) {
return conjugateSecond ? std::complex<V>{
b.real()*a.real() + b.imag()*a.imag(),
b.real()*a.imag() - b.imag()*a.real()
} : std::complex<V>{
a.real()*b.real() - a.imag()*b.imag(),
a.real()*b.imag() + a.imag()*b.real()
};
}
#if defined(__SSE__) || defined(_M_X64)
class StopDenormals {
unsigned int controlStatusRegister;
public:
StopDenormals() : controlStatusRegister(_mm_getcsr()) {
_mm_setcsr(controlStatusRegister|0x8040); // Flush-to-Zero and Denormals-Are-Zero
}
~StopDenormals() {
_mm_setcsr(controlStatusRegister);
}
};
#elif (defined (__ARM_NEON) || defined (__ARM_NEON__))
class StopDenormals {
uintptr_t status;
public:
StopDenormals() {
uintptr_t asmStatus;
asm volatile("mrs %0, fpcr" : "=r"(asmStatus));
status = asmStatus = asmStatus|0x01000000U; // Flush to Zero
asm volatile("msr fpcr, %0" : : "ri"(asmStatus));
}
~StopDenormals() {
uintptr_t asmStatus = status;
asm volatile("msr fpcr, %0" : : "ri"(asmStatus));
}
};
#else
# if __cplusplus >= 202302L
# warning "The `StopDenormals` class doesn't do anything for this architecture"
# endif
class StopDenormals {}; // FIXME: add for other architectures
#endif
/** @} */
}} // signalsmith::perf::
#endif // include guard