Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Macros for OSACA (and other *CA) analysis of the latency of specific paths of functions #4151

Merged
merged 26 commits into from
Dec 31, 2024
Merged
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 99 additions & 34 deletions numerics/sin_cos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,78 @@
#include "numerics/polynomial_evaluators.hpp"
#include "quantities/elementary_functions.hpp"

#if PRINCIPIA_USE_OSACA_SIN
#define OSACA_SIN_BEGIN OSACA_FUNCTION_BEGIN
#define OSACA_RETURN_SIN OSACA_RETURN
#else
#define OSACA_SIN_BEGIN(arg)
#define OSACA_RETURN_SIN(result) return (result)
#endif

#if PRINCIPIA_USE_OSACA_COS
#define OSACA_COS_BEGIN OSACA_FUNCTION_BEGIN
#define OSACA_RETURN_COS OSACA_RETURN
#else
#define OSACA_COS_BEGIN(arg)
#define OSACA_RETURN_COS(result) return (result)
#endif

#if PRINCIPIA_USE_OSACA_SIN || PRINCIPIA_USE_OSACA_COS
#include "intel/iacaMarks.h"
static bool OSACA_loop_terminator = false;
#define OSACA_FUNCTION_BEGIN(arg) \
eggrobin marked this conversation as resolved.
Show resolved Hide resolved
double volatile OSACA_input = arg; \
/* Putting a load of the input from memory in the analysed section makes */ \
/* the dependency graph clearer, but adds a potentially spurious move to */ \
/* the loop-carried latency. Remove the `volatile` above to carry the */ \
/* loop through registers.*/ \
IACA_VC64_START; \
double OSACA_loop_carry = OSACA_input; \
OSACA_loop: \
arg = OSACA_loop_carry

#define OSACA_RETURN(result) \
OSACA_loop_carry = (result); \
if (!OSACA_loop_terminator) { \
goto OSACA_loop; \
} \
double volatile OSACA_result = OSACA_loop_carry; \
IACA_VC64_END; \
return OSACA_result
#define OSACA_IF_(condition) \
eggrobin marked this conversation as resolved.
Show resolved Hide resolved
if constexpr (bool volatile OSACA_computed_condition = (condition); \
[] { UNDER_OSACA_HYPOTHESES(return (condition)); }())

#define UNDER_OSACA_HYPOTHESES(statement) \
do { \
constexpr bool UseHardwareFMA = true; \
constexpr double θ = 0.1; \
/* From argument reduction. */ \
constexpr double n_double = θ * (2 / π); \
constexpr double reduction_value = θ - n_double * π_over_2_high; \
constexpr double reduction_error = n_double * π_over_2_low; \
/* Used to determine whether a better argument reduction is needed. */ \
constexpr DoublePrecision<double> θ_reduced = \
QuickTwoDifference(reduction_value, reduction_error); \
/* Used in Sin to detect the near-0 case. */ \
constexpr double abs_x = \
θ_reduced.value > 0 ? θ_reduced.value : -θ_reduced.value; \
/* Used throughout the top-level functions. */ \
constexpr std::int64_t quadrant = \
static_cast<std::int64_t>(n_double) & 0b11; \
/* Used in DetectDangerousRounding. */ \
constexpr double normalized_error = 0; \
/* Not NaN is the only part that matters; used at the end of the */ \
/* top-level functions to determine whether to call the slow path. */ \
constexpr double value = 1; \
{ statement; } \
} while (false)

#else
#define OSACA_IF_(condition) if (condition)
#endif


namespace principia {
namespace numerics {
namespace _sin_cos {
Expand Down Expand Up @@ -75,8 +143,8 @@

template<FMAPolicy fma_policy>
double FusedMultiplyAdd(double const a, double const b, double const c) {
if ((fma_policy == FMAPolicy::Force && CanEmitFMAInstructions) ||
(fma_policy == FMAPolicy::Auto && UseHardwareFMA)) {
OSACA_IF_((fma_policy == FMAPolicy::Force && CanEmitFMAInstructions) ||
(fma_policy == FMAPolicy::Auto && UseHardwareFMA)) {
using quantities::_elementary_functions::FusedMultiplyAdd;
return FusedMultiplyAdd(a, b, c);
} else {
Expand All @@ -86,8 +154,8 @@

template<FMAPolicy fma_policy>
double FusedNegatedMultiplyAdd(double const a, double const b, double const c) {
if ((fma_policy == FMAPolicy::Force && CanEmitFMAInstructions) ||
(fma_policy == FMAPolicy::Auto && UseHardwareFMA)) {
OSACA_IF_((fma_policy == FMAPolicy::Force && CanEmitFMAInstructions) ||
(fma_policy == FMAPolicy::Auto && UseHardwareFMA)) {
using quantities::_elementary_functions::FusedNegatedMultiplyAdd;
return FusedNegatedMultiplyAdd(a, b, c);
} else {
Expand All @@ -110,7 +178,8 @@
_mm_castsi128_pd(_mm_sub_epi64(error_128i, value_exponent_128i)));
// TODO(phl): Error analysis to refine the thresholds. Also, can we avoid
// negative numbers?
if (normalized_error < -0x1.ffffp971 && normalized_error > -0x1.00008p972) {
OSACA_IF_(normalized_error < -0x1.ffffp971 &&
normalized_error > -0x1.00008p972) {
eggrobin marked this conversation as resolved.
Show resolved Hide resolved
#if _DEBUG
LOG(ERROR) << std::setprecision(25) << x << " " << std::hexfloat << value
<< " " << error << " " << normalized_error;
Expand All @@ -124,12 +193,12 @@
inline void Reduce(Argument const θ,
DoublePrecision<Argument>& θ_reduced,
std::int64_t& quadrant) {
if (θ < π / 4 && θ > -π / 4) {
OSACA_IF_(θ < π / 4 && θ > -π / 4) {
θ_reduced.value = θ;
θ_reduced.error = 0;
quadrant = 0;
return;
} else if (θ <= π_over_2_threshold && θ >= -π_over_2_threshold) {
} else OSACA_IF_(θ <= π_over_2_threshold && θ >= -π_over_2_threshold) {

Check warning on line 201 in numerics/sin_cos.cpp

View workflow job for this annotation

GitHub Actions / check-cpp

whitespace/newline

Else clause should never be on same line as else (use 2 lines)
// We are not very sensitive to rounding errors in this expression, because
// in the worst case it could cause the reduced angle to jump from the
// vicinity of π / 4 to the vicinity of -π / 4 with appropriate adjustment
Expand All @@ -142,7 +211,7 @@
Argument const error = n_double * π_over_2_low;
θ_reduced = QuickTwoDifference(value, error);
// TODO(phl): Error analysis needed to find the right bounds.
if (θ_reduced.value < -0x1.0p-30 || θ_reduced.value > 0x1.0p-30) {
OSACA_IF_(θ_reduced.value < -0x1.0p-30 || θ_reduced.value > 0x1.0p-30) {
quadrant = n & 0b11;
return;
}
Expand Down Expand Up @@ -180,7 +249,7 @@
auto const& x = θ_reduced.value;
auto const& e = θ_reduced.error;
double const abs_x = std::abs(x);
if (abs_x < sin_near_zero_cutoff) {
OSACA_IF_(abs_x < sin_near_zero_cutoff) {
double const x² = x * x;
double const x³ = x² * x;
double const x³_term = FusedMultiplyAdd<fma_policy>(
Expand Down Expand Up @@ -254,71 +323,67 @@
FORCE_INLINE(inline)
#endif
Value __cdecl Sin(Argument const θ) {
OSACA_SIN_BEGIN(θ);
DoublePrecision<Argument> θ_reduced;
std::int64_t quadrant;
Reduce(θ, θ_reduced, quadrant);
double value;
if (UseHardwareFMA) {
if (quadrant & 0b1) {
OSACA_IF_(UseHardwareFMA) {
OSACA_IF_(quadrant & 0b1) {
value = CosImplementation<FMAPolicy::Force>(θ_reduced);
} else {
#if PRINCIPIA_USE_OSACA_SIN
IACA_VC64_START;
eggrobin marked this conversation as resolved.
Show resolved Hide resolved
OSACA_VC64_START;
#endif
value = SinImplementation<FMAPolicy::Force>(θ_reduced);
#if PRINCIPIA_USE_OSACA_SIN
IACA_VC64_END;
OSACA_VC64_END;
#endif
}
} else {
if (quadrant & 0b1) {
OSACA_IF_(quadrant & 0b1) {
value = CosImplementation<FMAPolicy::Disallow>(θ_reduced);
} else {
value = SinImplementation<FMAPolicy::Disallow>(θ_reduced);
}
}
if (value != value) {
return cr_sin(θ);
} else if (quadrant & 0b10) {
return -value;
OSACA_IF_(value != value) {
OSACA_RETURN_SIN(cr_sin(θ));
} else OSACA_IF_(quadrant & 0b10) {

Check warning on line 352 in numerics/sin_cos.cpp

View workflow job for this annotation

GitHub Actions / check-cpp

whitespace/newline

Else clause should never be on same line as else (use 2 lines)
OSACA_RETURN_SIN(-value);
} else {
return value;
OSACA_RETURN_SIN(value);
}
}

#if PRINCIPIA_INLINE_SIN_COS
FORCE_INLINE(inline)
#endif
Value __cdecl Cos(Argument const θ) {
Value __cdecl Cos(Argument θ) {
eggrobin marked this conversation as resolved.
Show resolved Hide resolved
OSACA_COS_BEGIN(θ);
DoublePrecision<Argument> θ_reduced;
std::int64_t quadrant;
Reduce(θ, θ_reduced, quadrant);
double value;
if (UseHardwareFMA) {
if (quadrant & 0b1) {
OSACA_IF_(UseHardwareFMA) {
OSACA_IF_(quadrant & 0b1) {
value = SinImplementation<FMAPolicy::Force>(θ_reduced);
} else {
#if PRINCIPIA_USE_OSACA_COS
IACA_VC64_START;
#endif
value = CosImplementation<FMAPolicy::Force>(θ_reduced);
#if PRINCIPIA_USE_OSACA_COS
IACA_VC64_END;
#endif
}
} else {
if (quadrant & 0b1) {
OSACA_IF_(quadrant & 0b1) {
value = SinImplementation<FMAPolicy::Disallow>(θ_reduced);
} else {
value = CosImplementation<FMAPolicy::Disallow>(θ_reduced);
}
}
if (value != value) {
return cr_cos(θ);
} else if (quadrant == 1 || quadrant == 2) {
return -value;
OSACA_IF_(value != value) {
OSACA_RETURN_COS(cr_cos(θ));
} else OSACA_IF_(quadrant == 1 || quadrant == 2) {

Check warning on line 383 in numerics/sin_cos.cpp

View workflow job for this annotation

GitHub Actions / check-cpp

whitespace/newline

Else clause should never be on same line as else (use 2 lines)
OSACA_RETURN_COS(-value);
} else {
return value;
OSACA_RETURN_COS(value);
}
}

Expand Down
Loading