diff --git a/.github/workflows/gha_ci.yml b/.github/workflows/gha_ci.yml index 278c2c74e..0534c9cb5 100644 --- a/.github/workflows/gha_ci.yml +++ b/.github/workflows/gha_ci.yml @@ -38,7 +38,7 @@ jobs: mkdir build cd build cmake ../ -G "Visual Studio 16 2019" -A x64 -DHEYOKA_BUILD_TESTS=yes -DHEYOKA_WITH_MPPP=yes -DHEYOKA_BUILD_TUTORIALS=ON -DHEYOKA_ENABLE_IPO=yes -DBoost_NO_BOOST_CMAKE=ON -DHEYOKA_WITH_SLEEF=yes -DMPPP_GMP_INCLUDE_DIR=C:\Miniconda\envs\test\Library\include -DMPPP_GMP_LIBRARY=C:\Miniconda\envs\test\Library\lib\mpir.lib - cmake --build . --config Release + cmake --build . --config Release -j2 copy Release\heyoka.dll test\Release\ ctest -j4 -V -C Release windows_2019_llvm13: @@ -60,7 +60,7 @@ jobs: mkdir build cd build cmake ../ -G "Visual Studio 16 2019" -A x64 -DHEYOKA_BUILD_TESTS=yes -DHEYOKA_WITH_MPPP=yes -DHEYOKA_BUILD_TUTORIALS=ON -DHEYOKA_ENABLE_IPO=yes -DBoost_NO_BOOST_CMAKE=ON -DHEYOKA_WITH_SLEEF=yes -DMPPP_GMP_INCLUDE_DIR=C:\Miniconda\envs\test\Library\include -DMPPP_GMP_LIBRARY=C:\Miniconda\envs\test\Library\lib\mpir.lib - cmake --build . --config Release + cmake --build . --config Release -j2 copy Release\heyoka.dll test\Release\ ctest -j4 -V -C Release windows_2019_llvm14: @@ -82,7 +82,7 @@ jobs: mkdir build cd build cmake ../ -G "Visual Studio 16 2019" -A x64 -DHEYOKA_BUILD_TESTS=yes -DHEYOKA_WITH_MPPP=yes -DHEYOKA_BUILD_TUTORIALS=ON -DHEYOKA_ENABLE_IPO=yes -DBoost_NO_BOOST_CMAKE=ON -DHEYOKA_WITH_SLEEF=yes -DMPPP_GMP_INCLUDE_DIR=C:\Miniconda\envs\test\Library\include -DMPPP_GMP_LIBRARY=C:\Miniconda\envs\test\Library\lib\mpir.lib - cmake --build . --config Release + cmake --build . --config Release -j2 copy Release\heyoka.dll test\Release\ ctest -j4 -V -C Release conda_release_static: diff --git a/README.md b/README.md index 44a9b2d96..6db5534e2 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ heyoka is a C++ library for the integration of ordinary differential equations (ODEs) via Taylor's method, based on automatic differentiation techniques and aggressive just-in-time compilation via [LLVM](https://llvm.org/). Notable features include: -* support for double-precision, extended-precision (80-bit and 128-bit), +* support for single-precision, double-precision, extended-precision (80-bit and 128-bit), and arbitrary-precision floating-point types, * the ability to maintain machine precision accuracy over tens of billions of timesteps, diff --git a/doc/changelog.rst b/doc/changelog.rst index 22d8e63e8..afb92a7bb 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -7,6 +7,8 @@ Changelog New ~~~ +- Add support for single-precision computations + (`#363 `__). - Add model implementing the ELP2000 analytical lunar theory (`#362 `__). diff --git a/doc/index.rst b/doc/index.rst index 520be023f..ed11ce020 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -17,7 +17,7 @@ heyoka is a C++ library for the integration of ordinary differential equations on automatic differentiation techniques and aggressive just-in-time compilation via `LLVM `__. Notable features include: -* support for double-precision, extended-precision (80-bit and 128-bit), +* support for single-precision, double-precision, extended-precision (80-bit and 128-bit), and arbitrary-precision floating-point types, * the ability to maintain machine precision accuracy over tens of billions of timesteps, @@ -110,7 +110,7 @@ license. The authors are Francesco Biscani and Dario Izzo (European Space Agency). .. toctree:: - :maxdepth: 2 + :maxdepth: 1 install.rst basic_tutorials.rst diff --git a/doc/install.rst b/doc/install.rst index d69f1ac7c..038480c54 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -64,7 +64,8 @@ installing from source (the minimum required version is 3.18). Support for extended precision `````````````````````````````` -Whereas in heyoka double-precision computations are always supported, support for extended-precision +Whereas in heyoka single-precision and double-precision computations are always supported via the +``float`` and ``double`` types respectively, support for extended-precision computations varies depending on the software/hardware platform. 80-bit precision @@ -81,7 +82,7 @@ heyoka (and all its dependencies) have been compiled with a compiler supporting ^^^^^^^^^^^^^^^^^ On platforms where ``long double`` is a quadruple-precision floating-point datatype (e.g., 64-bit ARM), -quadruple-precision integrations are always supported. Otherwise, +quadruple-precision integrations are always supported via ``long double``. Otherwise, on platforms such as x86-64, quadruple-precision computations are supported if: * the nonstandard ``__float128`` floating-point type is diff --git a/doc/tut_extended_precision.rst b/doc/tut_extended_precision.rst index 4cabc0bb3..00e0cb77c 100644 --- a/doc/tut_extended_precision.rst +++ b/doc/tut_extended_precision.rst @@ -4,7 +4,7 @@ Computations in extended precision ================================== As hinted in the :ref:`installation instructions `, heyoka supports computations -not only in double precision, but also in extended precision. Specifically, heyoka currently supports: +not only in single and double precision, but also in extended precision. Specifically, heyoka currently supports: * the 80-bit IEEE `extended-precision format `__ (~21 decimal digits), * the 128-bit IEEE `quadruple-precision format `__ (~36 decimal digits). diff --git a/include/heyoka/detail/event_detection.hpp b/include/heyoka/detail/event_detection.hpp index b93845b89..0f7fe2705 100644 --- a/include/heyoka/detail/event_detection.hpp +++ b/include/heyoka/detail/event_detection.hpp @@ -41,6 +41,9 @@ inline T taylor_deduce_cooldown(T, T) static_assert(always_false_v, "Unhandled type"); } +template <> +float taylor_deduce_cooldown(float, float); + template <> double taylor_deduce_cooldown(double, double); diff --git a/include/heyoka/detail/type_traits.hpp b/include/heyoka/detail/type_traits.hpp index 62bb792e8..34848ecd7 100644 --- a/include/heyoka/detail/type_traits.hpp +++ b/include/heyoka/detail/type_traits.hpp @@ -76,6 +76,10 @@ template struct is_supported_fp : std::false_type { }; +template <> +struct is_supported_fp : std::true_type { +}; + template <> struct is_supported_fp : std::true_type { }; diff --git a/include/heyoka/expression.hpp b/include/heyoka/expression.hpp index f7113e225..704dbffc8 100644 --- a/include/heyoka/expression.hpp +++ b/include/heyoka/expression.hpp @@ -81,6 +81,7 @@ class HEYOKA_DLL_PUBLIC expression public: expression(); + explicit expression(float); explicit expression(double); explicit expression(long double); #if defined(HEYOKA_HAVE_REAL128) @@ -125,6 +126,9 @@ HEYOKA_DLL_PUBLIC bool is_fixed(const expression &); inline namespace literals { +HEYOKA_DLL_PUBLIC expression operator""_flt(long double); +HEYOKA_DLL_PUBLIC expression operator""_flt(unsigned long long); + HEYOKA_DLL_PUBLIC expression operator""_dbl(long double); HEYOKA_DLL_PUBLIC expression operator""_dbl(unsigned long long); @@ -257,6 +261,7 @@ HEYOKA_DLL_PUBLIC expression operator+(expression); HEYOKA_DLL_PUBLIC expression operator-(const expression &); HEYOKA_DLL_PUBLIC expression operator+(const expression &, const expression &); +HEYOKA_DLL_PUBLIC expression operator+(const expression &, float); HEYOKA_DLL_PUBLIC expression operator+(const expression &, double); HEYOKA_DLL_PUBLIC expression operator+(const expression &, long double); #if defined(HEYOKA_HAVE_REAL128) @@ -265,6 +270,7 @@ HEYOKA_DLL_PUBLIC expression operator+(const expression &, mppp::real128); #if defined(HEYOKA_HAVE_REAL) HEYOKA_DLL_PUBLIC expression operator+(const expression &, mppp::real); #endif +HEYOKA_DLL_PUBLIC expression operator+(float, const expression &); HEYOKA_DLL_PUBLIC expression operator+(double, const expression &); HEYOKA_DLL_PUBLIC expression operator+(long double, const expression &); #if defined(HEYOKA_HAVE_REAL128) @@ -276,6 +282,7 @@ HEYOKA_DLL_PUBLIC expression operator+(mppp::real, const expression &); HEYOKA_DLL_PUBLIC expression operator-(const expression &, const expression &); HEYOKA_DLL_PUBLIC expression operator-(const expression &, double); +HEYOKA_DLL_PUBLIC expression operator-(const expression &, float); HEYOKA_DLL_PUBLIC expression operator-(const expression &, long double); #if defined(HEYOKA_HAVE_REAL128) HEYOKA_DLL_PUBLIC expression operator-(const expression &, mppp::real128); @@ -283,6 +290,7 @@ HEYOKA_DLL_PUBLIC expression operator-(const expression &, mppp::real128); #if defined(HEYOKA_HAVE_REAL) HEYOKA_DLL_PUBLIC expression operator-(const expression &, mppp::real); #endif +HEYOKA_DLL_PUBLIC expression operator-(float, const expression &); HEYOKA_DLL_PUBLIC expression operator-(double, const expression &); HEYOKA_DLL_PUBLIC expression operator-(long double, const expression &); #if defined(HEYOKA_HAVE_REAL128) @@ -294,6 +302,7 @@ HEYOKA_DLL_PUBLIC expression operator-(mppp::real, const expression &); HEYOKA_DLL_PUBLIC expression operator*(const expression &, const expression &); HEYOKA_DLL_PUBLIC expression operator*(const expression &, double); +HEYOKA_DLL_PUBLIC expression operator*(const expression &, float); HEYOKA_DLL_PUBLIC expression operator*(const expression &, long double); #if defined(HEYOKA_HAVE_REAL128) HEYOKA_DLL_PUBLIC expression operator*(const expression &, mppp::real128); @@ -301,6 +310,7 @@ HEYOKA_DLL_PUBLIC expression operator*(const expression &, mppp::real128); #if defined(HEYOKA_HAVE_REAL) HEYOKA_DLL_PUBLIC expression operator*(const expression &, mppp::real); #endif +HEYOKA_DLL_PUBLIC expression operator*(float, const expression &); HEYOKA_DLL_PUBLIC expression operator*(double, const expression &); HEYOKA_DLL_PUBLIC expression operator*(long double, const expression &); #if defined(HEYOKA_HAVE_REAL128) @@ -311,6 +321,7 @@ HEYOKA_DLL_PUBLIC expression operator*(mppp::real, const expression &); #endif HEYOKA_DLL_PUBLIC expression operator/(const expression &, const expression &); +HEYOKA_DLL_PUBLIC expression operator/(const expression &, float); HEYOKA_DLL_PUBLIC expression operator/(const expression &, double); HEYOKA_DLL_PUBLIC expression operator/(const expression &, long double); #if defined(HEYOKA_HAVE_REAL128) @@ -319,6 +330,7 @@ HEYOKA_DLL_PUBLIC expression operator/(const expression &, mppp::real128); #if defined(HEYOKA_HAVE_REAL) HEYOKA_DLL_PUBLIC expression operator/(const expression &, mppp::real); #endif +HEYOKA_DLL_PUBLIC expression operator/(float, const expression &); HEYOKA_DLL_PUBLIC expression operator/(double, const expression &); HEYOKA_DLL_PUBLIC expression operator/(long double, const expression &); #if defined(HEYOKA_HAVE_REAL128) @@ -329,6 +341,7 @@ HEYOKA_DLL_PUBLIC expression operator/(mppp::real, const expression &); #endif HEYOKA_DLL_PUBLIC expression &operator+=(expression &, const expression &); +HEYOKA_DLL_PUBLIC expression &operator+=(expression &, float); HEYOKA_DLL_PUBLIC expression &operator+=(expression &, double); HEYOKA_DLL_PUBLIC expression &operator+=(expression &, long double); #if defined(HEYOKA_HAVE_REAL128) @@ -339,6 +352,7 @@ HEYOKA_DLL_PUBLIC expression &operator+=(expression &, mppp::real); #endif HEYOKA_DLL_PUBLIC expression &operator-=(expression &, const expression &); +HEYOKA_DLL_PUBLIC expression &operator-=(expression &, float); HEYOKA_DLL_PUBLIC expression &operator-=(expression &, double); HEYOKA_DLL_PUBLIC expression &operator-=(expression &, long double); #if defined(HEYOKA_HAVE_REAL128) @@ -349,6 +363,7 @@ HEYOKA_DLL_PUBLIC expression &operator-=(expression &, mppp::real); #endif HEYOKA_DLL_PUBLIC expression &operator*=(expression &, const expression &); +HEYOKA_DLL_PUBLIC expression &operator*=(expression &, float); HEYOKA_DLL_PUBLIC expression &operator*=(expression &, double); HEYOKA_DLL_PUBLIC expression &operator*=(expression &, long double); #if defined(HEYOKA_HAVE_REAL128) @@ -359,6 +374,7 @@ HEYOKA_DLL_PUBLIC expression &operator*=(expression &, mppp::real); #endif HEYOKA_DLL_PUBLIC expression &operator/=(expression &, const expression &); +HEYOKA_DLL_PUBLIC expression &operator/=(expression &, float); HEYOKA_DLL_PUBLIC expression &operator/=(expression &, double); HEYOKA_DLL_PUBLIC expression &operator/=(expression &, long double); #if defined(HEYOKA_HAVE_REAL128) diff --git a/include/heyoka/llvm_state.hpp b/include/heyoka/llvm_state.hpp index ad6e08901..ab399fabb 100644 --- a/include/heyoka/llvm_state.hpp +++ b/include/heyoka/llvm_state.hpp @@ -61,6 +61,7 @@ struct target_features { bool vsx = false; bool vsx3 = false; // Recommended SIMD sizes. + std::uint32_t simd_size_flt = 1; std::uint32_t simd_size_dbl = 1; std::uint32_t simd_size_ldbl = 1; #if defined(HEYOKA_HAVE_REAL128) @@ -99,6 +100,9 @@ inline std::uint32_t recommended_simd_size() return 0; } +template <> +HEYOKA_DLL_PUBLIC std::uint32_t recommended_simd_size(); + template <> HEYOKA_DLL_PUBLIC std::uint32_t recommended_simd_size(); diff --git a/include/heyoka/math/atan2.hpp b/include/heyoka/math/atan2.hpp index 48d427c2e..c741b2f6b 100644 --- a/include/heyoka/math/atan2.hpp +++ b/include/heyoka/math/atan2.hpp @@ -75,6 +75,7 @@ class HEYOKA_DLL_PUBLIC atan2_impl : public func_base HEYOKA_DLL_PUBLIC expression atan2(expression, expression); +HEYOKA_DLL_PUBLIC expression atan2(expression, float); HEYOKA_DLL_PUBLIC expression atan2(expression, double); HEYOKA_DLL_PUBLIC expression atan2(expression, long double); @@ -90,6 +91,7 @@ HEYOKA_DLL_PUBLIC expression atan2(expression, mppp::real); #endif +HEYOKA_DLL_PUBLIC expression atan2(float, expression); HEYOKA_DLL_PUBLIC expression atan2(double, expression); HEYOKA_DLL_PUBLIC expression atan2(long double, expression); diff --git a/include/heyoka/math/kepDE.hpp b/include/heyoka/math/kepDE.hpp index 4352cd52a..6a4686e3c 100644 --- a/include/heyoka/math/kepDE.hpp +++ b/include/heyoka/math/kepDE.hpp @@ -72,6 +72,7 @@ HEYOKA_DLL_PUBLIC expression kepDE(expression, expression, expression); HEYOKA_DLL_PUBLIC expression kepDE(expression, type, expression); \ HEYOKA_DLL_PUBLIC expression kepDE(type, expression, expression) +HEYOKA_DECLARE_KEPDE_OVERLOADS(float); HEYOKA_DECLARE_KEPDE_OVERLOADS(double); HEYOKA_DECLARE_KEPDE_OVERLOADS(long double); diff --git a/include/heyoka/math/kepE.hpp b/include/heyoka/math/kepE.hpp index c1e2df224..88aa983fe 100644 --- a/include/heyoka/math/kepE.hpp +++ b/include/heyoka/math/kepE.hpp @@ -77,6 +77,7 @@ HEYOKA_DLL_PUBLIC expression kepE(expression, expression); HEYOKA_DLL_PUBLIC expression kepE(expression, type); \ HEYOKA_DLL_PUBLIC expression kepE(type, expression); +HEYOKA_DECLARE_KEPE_OVERLOADS(float); HEYOKA_DECLARE_KEPE_OVERLOADS(double); HEYOKA_DECLARE_KEPE_OVERLOADS(long double); diff --git a/include/heyoka/math/kepF.hpp b/include/heyoka/math/kepF.hpp index b94787aeb..fb2e34387 100644 --- a/include/heyoka/math/kepF.hpp +++ b/include/heyoka/math/kepF.hpp @@ -81,6 +81,7 @@ HEYOKA_DLL_PUBLIC expression kepF(expression, expression, expression); HEYOKA_DLL_PUBLIC expression kepF(expression, type, expression); \ HEYOKA_DLL_PUBLIC expression kepF(type, expression, expression) +HEYOKA_DECLARE_KEPF_OVERLOADS(float); HEYOKA_DECLARE_KEPF_OVERLOADS(double); HEYOKA_DECLARE_KEPF_OVERLOADS(long double); diff --git a/include/heyoka/math/pow.hpp b/include/heyoka/math/pow.hpp index de52a3003..1e0b6d5ac 100644 --- a/include/heyoka/math/pow.hpp +++ b/include/heyoka/math/pow.hpp @@ -114,6 +114,7 @@ pow_eval_algo get_pow_eval_algo(const pow_impl &); } // namespace detail HEYOKA_DLL_PUBLIC expression pow(expression, expression); +HEYOKA_DLL_PUBLIC expression pow(expression, float); HEYOKA_DLL_PUBLIC expression pow(expression, double); HEYOKA_DLL_PUBLIC expression pow(expression, long double); diff --git a/include/heyoka/step_callback.hpp b/include/heyoka/step_callback.hpp index f3465d555..46d58e8c7 100644 --- a/include/heyoka/step_callback.hpp +++ b/include/heyoka/step_callback.hpp @@ -121,7 +121,7 @@ struct HEYOKA_DLL_PUBLIC_INLINE_CLASS step_callback_inner final : step_callback_ void serialize(Archive &ar, unsigned) { ar &boost::serialization::base_object>(*this); - ar &m_value; + ar & m_value; } }; @@ -135,7 +135,7 @@ class HEYOKA_DLL_PUBLIC step_callback_impl template void serialize(Archive &ar, unsigned) { - ar &m_ptr; + ar & m_ptr; } // Meta-programming for the generic ctor. @@ -226,6 +226,9 @@ using step_callback_batch = detail::step_callback_impl> HEYOKA_END_NAMESPACE // Disable Boost.Serialization tracking for the implementation details of step_callback. +BOOST_CLASS_TRACKING(heyoka::detail::step_callback_inner_base>, + boost::serialization::track_never) + BOOST_CLASS_TRACKING(heyoka::detail::step_callback_inner_base>, boost::serialization::track_never) @@ -246,6 +249,9 @@ BOOST_CLASS_TRACKING(heyoka::detail::step_callback_inner_base>, + boost::serialization::track_never) + BOOST_CLASS_TRACKING(heyoka::detail::step_callback_inner_base>, boost::serialization::track_never) diff --git a/include/heyoka/taylor.hpp b/include/heyoka/taylor.hpp index 374caa44b..0ccdb56d2 100644 --- a/include/heyoka/taylor.hpp +++ b/include/heyoka/taylor.hpp @@ -414,6 +414,11 @@ inline std::ostream &operator<<(std::ostream &os, const nt_event_impl &) return os; } +template <> +HEYOKA_DLL_PUBLIC std::ostream &operator<<(std::ostream &, const nt_event_impl &); +template <> +HEYOKA_DLL_PUBLIC std::ostream &operator<<(std::ostream &, const nt_event_impl &); + template <> HEYOKA_DLL_PUBLIC std::ostream &operator<<(std::ostream &, const nt_event_impl &); template <> @@ -535,6 +540,11 @@ inline std::ostream &operator<<(std::ostream &os, const t_event_impl &) return os; } +template <> +HEYOKA_DLL_PUBLIC std::ostream &operator<<(std::ostream &, const t_event_impl &); +template <> +HEYOKA_DLL_PUBLIC std::ostream &operator<<(std::ostream &, const t_event_impl &); + template <> HEYOKA_DLL_PUBLIC std::ostream &operator<<(std::ostream &, const t_event_impl &); template <> @@ -641,6 +651,9 @@ inline std::ostream &operator<<(std::ostream &os, const continuous_output &) return os; } +template <> +HEYOKA_DLL_PUBLIC std::ostream &operator<<(std::ostream &, const continuous_output &); + template <> HEYOKA_DLL_PUBLIC std::ostream &operator<<(std::ostream &, const continuous_output &); @@ -735,6 +748,9 @@ inline std::ostream &operator<<(std::ostream &os, const continuous_output_batch< return os; } +template <> +HEYOKA_DLL_PUBLIC std::ostream &operator<<(std::ostream &, const continuous_output_batch &); + template <> HEYOKA_DLL_PUBLIC std::ostream &operator<<(std::ostream &, const continuous_output_batch &); @@ -1731,6 +1747,9 @@ inline std::ostream &operator<<(std::ostream &os, const taylor_adaptive &) return os; } +template <> +HEYOKA_DLL_PUBLIC std::ostream &operator<<(std::ostream &, const taylor_adaptive &); + template <> HEYOKA_DLL_PUBLIC std::ostream &operator<<(std::ostream &, const taylor_adaptive &); @@ -1759,6 +1778,9 @@ inline std::ostream &operator<<(std::ostream &os, const taylor_adaptive_batch return os; } +template <> +HEYOKA_DLL_PUBLIC std::ostream &operator<<(std::ostream &, const taylor_adaptive_batch &); + template <> HEYOKA_DLL_PUBLIC std::ostream &operator<<(std::ostream &, const taylor_adaptive_batch &); @@ -1789,9 +1811,11 @@ inline constexpr int taylor_adaptive_batch_s11n_version = 1; HEYOKA_END_NAMESPACE // Set the Boost s11n class version for taylor_adaptive and taylor_adaptive_batch. +BOOST_CLASS_VERSION(heyoka::taylor_adaptive, heyoka::detail::taylor_adaptive_s11n_version); BOOST_CLASS_VERSION(heyoka::taylor_adaptive, heyoka::detail::taylor_adaptive_s11n_version); BOOST_CLASS_VERSION(heyoka::taylor_adaptive, heyoka::detail::taylor_adaptive_s11n_version); +BOOST_CLASS_VERSION(heyoka::taylor_adaptive_batch, heyoka::detail::taylor_adaptive_batch_s11n_version); BOOST_CLASS_VERSION(heyoka::taylor_adaptive_batch, heyoka::detail::taylor_adaptive_batch_s11n_version); BOOST_CLASS_VERSION(heyoka::taylor_adaptive_batch, heyoka::detail::taylor_adaptive_batch_s11n_version); diff --git a/src/detail/event_detection.cpp b/src/detail/event_detection.cpp index 9788553a3..39728d754 100644 --- a/src/detail/event_detection.cpp +++ b/src/detail/event_detection.cpp @@ -19,7 +19,6 @@ #include #include #include -#include #include #include @@ -195,12 +194,12 @@ auto poly_eval_1(InputIt a, T x, std::uint32_t n) assert(n >= 2u); // LCOV_EXCL_LINE // Init the return value. - auto ret1 = a[n] * n; + auto ret1 = a[n] * static_cast(n); for (std::uint32_t i = 1; i < n; ++i) { // NOTE: possible optimisation for mppp::real here: // use fmma() directly, once exposed in mp++. - ret1 = a[n - i] * (n - i) + std::move(ret1) * x; + ret1 = a[n - i] * static_cast(n - i) + std::move(ret1) * x; } return ret1; @@ -495,6 +494,12 @@ T taylor_deduce_cooldown_impl(T g_eps, T abs_der) } // namespace +template <> +float taylor_deduce_cooldown(float g_eps, float abs_der) +{ + return taylor_deduce_cooldown_impl(g_eps, abs_der); +} + template <> double taylor_deduce_cooldown(double g_eps, double abs_der) { @@ -1051,8 +1056,7 @@ void taylor_adaptive::ed_data::detect_events(const T &h, std::uint32_t order, // detection altogether without a warning. This is ok, // and non-finite Taylor coefficients will be caught in the // step() implementations anyway. - // NOLINTNEXTLINE(cppcoreguidelines-init-variables) - std::uint32_t fex_check_result; + std::uint32_t fex_check_result{}; m_fex_check(ptr, &h, &back_int, &fex_check_result); if (fex_check_result) { continue; @@ -1295,8 +1299,7 @@ void taylor_adaptive::ed_data::detect_events(const T &h, std::uint32_t order, // Reverse tmp into tmp1, translate tmp1 by 1 with output // in tmp2, and count the sign changes in tmp2. - // NOLINTNEXTLINE(cppcoreguidelines-init-variables) - std::uint32_t n_sc; + std::uint32_t n_sc{}; m_rtscc(tmp1.v.data(), tmp2.v.data(), &n_sc, tmp.v.data()); if (n_sc == 1u) { @@ -1436,6 +1439,7 @@ void taylor_adaptive::ed_data::detect_events(const T &h, std::uint32_t order, // Instantiate the book-keeping structure for event detection // in the scalar integrator. +template struct taylor_adaptive::ed_data; template struct taylor_adaptive::ed_data; template struct taylor_adaptive::ed_data; @@ -1985,8 +1989,7 @@ void taylor_adaptive_batch::ed_data::detect_events(const T *h_ptr, std::uint3 // Reverse tmp into tmp1, translate tmp1 by 1 with output // in tmp2, and count the sign changes in tmp2. - // NOLINTNEXTLINE(cppcoreguidelines-init-variables) - std::uint32_t n_sc; + std::uint32_t n_sc{}; m_rtscc(tmp1.v.data(), tmp2.v.data(), &n_sc, tmp.v.data()); if (n_sc == 1u) { @@ -2137,6 +2140,7 @@ void taylor_adaptive_batch::ed_data::detect_events(const T *h_ptr, std::uint3 // Instantiate the book-keeping structure for event detection // in the batch integrator. +template struct taylor_adaptive_batch::ed_data; template struct taylor_adaptive_batch::ed_data; template struct taylor_adaptive_batch::ed_data; diff --git a/src/detail/num_utils.cpp b/src/detail/num_utils.cpp index bae928b11..0c876614a 100644 --- a/src/detail/num_utils.cpp +++ b/src/detail/num_utils.cpp @@ -46,6 +46,8 @@ T num_zero_like([[maybe_unused]] const T &x) #endif } +template float num_zero_like(const float &); + template double num_zero_like(const double &); template long double num_zero_like(const long double &); @@ -76,6 +78,8 @@ T num_one_like([[maybe_unused]] const T &x) #endif } +template float num_one_like(const float &); + template double num_one_like(const double &); template long double num_one_like(const long double &); @@ -106,6 +110,8 @@ T num_eps_like([[maybe_unused]] const T &x) #endif } +template float num_eps_like(const float &); + template double num_eps_like(const double &); template long double num_eps_like(const long double &); @@ -136,6 +142,8 @@ T num_inf_like([[maybe_unused]] const T &x) #endif } +template float num_inf_like(const float &); + template double num_inf_like(const double &); template long double num_inf_like(const long double &); diff --git a/src/detail/string_conv.cpp b/src/detail/string_conv.cpp index 608da3936..0f5bdbe26 100644 --- a/src/detail/string_conv.cpp +++ b/src/detail/string_conv.cpp @@ -52,7 +52,7 @@ std::uint32_t uname_to_index(const std::string &s) template std::string fp_to_string(const T &x) { - if constexpr (std::is_same_v || std::is_same_v) { + if constexpr (std::is_same_v || std::is_same_v || std::is_same_v) { return fmt::format("{}", x); #if defined(HEYOKA_HAVE_REAL128) } else if constexpr (std::is_same_v) { @@ -68,6 +68,7 @@ std::string fp_to_string(const T &x) } // Explicit instantiations. +template HEYOKA_DLL_PUBLIC std::string fp_to_string(const float &); template HEYOKA_DLL_PUBLIC std::string fp_to_string(const double &); template HEYOKA_DLL_PUBLIC std::string fp_to_string(const long double &); diff --git a/src/detail/vector_math.cpp b/src/detail/vector_math.cpp index bbd1141fe..1f47450a1 100644 --- a/src/detail/vector_math.cpp +++ b/src/detail/vector_math.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -49,9 +50,10 @@ auto make_vfinfo(const char *s_name, std::string v_name, std::uint32_t width, st #if defined(HEYOKA_WITH_SLEEF) // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) -auto add_vfinfo_sleef(vf_map_t &retval, const char *scalar_name, const char *sleef_base_name, const char *sleef_tp, +auto add_vfinfo_sleef(vf_map_t &retval, const char *scalar_name, const char *sleef_base_name, std::string_view sleef_tp, std::uint32_t nargs = 1) { + assert(sleef_tp == "d" || sleef_tp == "f"); assert(retval.find(scalar_name) == retval.end()); assert(nargs > 0u); @@ -62,22 +64,31 @@ auto add_vfinfo_sleef(vf_map_t &retval, const char *scalar_name, const char *sle const auto &features = get_target_features(); + // NOTE: we need to select the SIMD width(s) based on the floating-point type (sleef_tp). + // All supported SIMD extensions start with a minimum width of 2 for double-precision + // and 4 for single-precision, possibly supporting larger widths. So we use these two + // values for the computation. + const std::uint32_t base_simd_width = sleef_tp == "d" ? 2 : 4; + if (features.avx512f) { retval[scalar_name] - = {make_sleef_vfinfo(2, "avx2128"), make_sleef_vfinfo(4, "avx2"), make_sleef_vfinfo(8, "avx512f")}; + = {make_sleef_vfinfo(base_simd_width, "avx2128"), make_sleef_vfinfo(base_simd_width * 2u, "avx2"), + make_sleef_vfinfo(base_simd_width * 4u, "avx512f")}; } else if (features.avx2) { - retval[scalar_name] = {make_sleef_vfinfo(2, "avx2128"), make_sleef_vfinfo(4, "avx2")}; + retval[scalar_name] + = {make_sleef_vfinfo(base_simd_width, "avx2128"), make_sleef_vfinfo(base_simd_width * 2u, "avx2")}; } else if (features.avx) { - retval[scalar_name] = {make_sleef_vfinfo(2, "sse4"), make_sleef_vfinfo(4, "avx")}; + retval[scalar_name] + = {make_sleef_vfinfo(base_simd_width, "sse4"), make_sleef_vfinfo(base_simd_width * 2u, "avx")}; } else if (features.sse2) { - retval[scalar_name] = {make_sleef_vfinfo(2, "sse2")}; + retval[scalar_name] = {make_sleef_vfinfo(base_simd_width, "sse2")}; } else if (features.aarch64) { - retval[scalar_name] = {make_sleef_vfinfo(2, "advsimd")}; + retval[scalar_name] = {make_sleef_vfinfo(base_simd_width, "advsimd")}; } else if (features.vsx) { // NOTE: at this time the sleef conda package for PPC64 does not seem // to provide VSX3 functions. Thus, for now we use only the // VSX implementations. - retval[scalar_name] = {make_sleef_vfinfo(2, "vsx")}; + retval[scalar_name] = {make_sleef_vfinfo(base_simd_width, "vsx")}; } } @@ -96,6 +107,25 @@ auto make_vf_map() // but I am not 100% sure for the other archs. Let's keep this in mind. // NOTE: the same holds for things like abs() and floor(). + // Single-precision. + add_vfinfo_sleef(retval, "llvm.sin.f32", "sin", "f"); + add_vfinfo_sleef(retval, "llvm.cos.f32", "cos", "f"); + add_vfinfo_sleef(retval, "llvm.log.f32", "log", "f"); + add_vfinfo_sleef(retval, "llvm.exp.f32", "exp", "f"); + add_vfinfo_sleef(retval, "llvm.pow.f32", "pow", "f", 2); + add_vfinfo_sleef(retval, "sinhf", "sinh", "f"); + add_vfinfo_sleef(retval, "coshf", "cosh", "f"); + add_vfinfo_sleef(retval, "asinf", "asin", "f"); + add_vfinfo_sleef(retval, "acosf", "acos", "f"); + add_vfinfo_sleef(retval, "asinhf", "asinh", "f"); + add_vfinfo_sleef(retval, "acoshf", "acosh", "f"); + add_vfinfo_sleef(retval, "tanf", "tan", "f"); + add_vfinfo_sleef(retval, "tanhf", "tanh", "f"); + add_vfinfo_sleef(retval, "atanf", "atan", "f"); + add_vfinfo_sleef(retval, "atanhf", "atanh", "f"); + add_vfinfo_sleef(retval, "atan2f", "atan2", "f", 2); + add_vfinfo_sleef(retval, "erff", "erf", "f"); + // Double-precision. add_vfinfo_sleef(retval, "llvm.sin.f64", "sin", "d"); add_vfinfo_sleef(retval, "llvm.cos.f64", "cos", "d"); diff --git a/src/ensemble_propagate.cpp b/src/ensemble_propagate.cpp index 6497ddfe9..8dffc1d02 100644 --- a/src/ensemble_propagate.cpp +++ b/src/ensemble_propagate.cpp @@ -216,6 +216,7 @@ ensemble_propagate_grid_impl(const taylor_adaptive &ta, std::vector grid, std::size_t, T, step_callback &); // NOLINTEND +HEYOKA_ENSEMBLE_PROPAGATE_SCALAR_INST(float) HEYOKA_ENSEMBLE_PROPAGATE_SCALAR_INST(double) HEYOKA_ENSEMBLE_PROPAGATE_SCALAR_INST(long double) @@ -406,6 +407,7 @@ std::vector, std::vector>> ensemble_propa const std::vector &, step_callback_batch &); // NOLINTEND +HEYOKA_ENSEMBLE_PROPAGATE_BATCH_INST(float) HEYOKA_ENSEMBLE_PROPAGATE_BATCH_INST(double) HEYOKA_ENSEMBLE_PROPAGATE_BATCH_INST(long double) diff --git a/src/expression_basic.cpp b/src/expression_basic.cpp index 8192b5e30..8a6078010 100644 --- a/src/expression_basic.cpp +++ b/src/expression_basic.cpp @@ -66,6 +66,8 @@ HEYOKA_BEGIN_NAMESPACE expression::expression() : expression(number{0.}) {} +expression::expression(float x) : expression(number{x}) {} + expression::expression(double x) : expression(number{x}) {} expression::expression(long double x) : expression(number{x}) {} @@ -227,6 +229,16 @@ std::vector copy(const std::vector &v_ex) inline namespace literals { +expression operator""_flt(long double x) +{ + return expression{static_cast(x)}; +} + +expression operator""_flt(unsigned long long n) +{ + return expression{static_cast(n)}; +} + expression operator""_dbl(long double x) { return expression{static_cast(x)}; diff --git a/src/expression_cfunc.cpp b/src/expression_cfunc.cpp index 1214a48c5..c0db04ed0 100644 --- a/src/expression_cfunc.cpp +++ b/src/expression_cfunc.cpp @@ -1749,6 +1749,14 @@ std::vector add_cfunc(llvm_state &s, const std::string &name, const } // Explicit instantiations. +template HEYOKA_DLL_PUBLIC std::vector add_cfunc(llvm_state &, const std::string &, + const std::vector &, std::uint32_t, + bool, bool, bool, long long); +template HEYOKA_DLL_PUBLIC std::vector add_cfunc(llvm_state &, const std::string &, + const std::vector &, + const std::vector &, std::uint32_t, + bool, bool, bool, long long); + template HEYOKA_DLL_PUBLIC std::vector add_cfunc(llvm_state &, const std::string &, const std::vector &, std::uint32_t, bool, bool, bool, long long); diff --git a/src/expression_ops.cpp b/src/expression_ops.cpp index c3c37fe73..714fef3d2 100644 --- a/src/expression_ops.cpp +++ b/src/expression_ops.cpp @@ -37,265 +37,136 @@ expression operator+(expression e) return e; } +// NOTE: in these operators we check for number arguments +// immediately, before forwarding to the underlying implementation. +// We do this in order to avoid accidental promotions and incorrect +// precision propagation due to the use of double-precision constants +// in the implementations of the primitives. expression operator-(const expression &e) { - return prod({expression{number{-1.}}, e}); + if (const auto *nptr = std::get_if(&e.value())) { + return expression{-*nptr}; + } else { + return prod({expression{number{-1.}}, e}); + } } // NOLINTNEXTLINE(misc-no-recursion) expression operator+(const expression &e1, const expression &e2) { - return sum({e1, e2}); + if (std::holds_alternative(e1.value()) && std::holds_alternative(e2.value())) { + return expression{std::get(e1.value()) + std::get(e2.value())}; + } else { + return sum({e1, e2}); + } } // NOLINTNEXTLINE(misc-no-recursion) expression operator-(const expression &e1, const expression &e2) { - return e1 + -e2; + if (std::holds_alternative(e1.value()) && std::holds_alternative(e2.value())) { + return expression{std::get(e1.value()) - std::get(e2.value())}; + } else { + return e1 + -e2; + } } // NOLINTNEXTLINE(misc-no-recursion) expression operator*(const expression &e1, const expression &e2) { - return prod({e1, e2}); + if (std::holds_alternative(e1.value()) && std::holds_alternative(e2.value())) { + return expression{std::get(e1.value()) * std::get(e2.value())}; + } else { + return prod({e1, e2}); + } } // NOLINTNEXTLINE(misc-no-recursion) expression operator/(const expression &e1, const expression &e2) { if (std::holds_alternative(e1.value()) && std::holds_alternative(e2.value())) { - // NOTE: if e1 and e2 are numbers, do immediately constant folding. Otherwise, constant folding - // is first done on pow(e2, -1_dbl) and then on the product, which leads to wrong precision - // propagation in case e1 and e2 have different precisions. return expression{std::get(e1.value()) / std::get(e2.value())}; } else { return prod({e1, pow(e2, -1_dbl)}); } } -expression operator+(const expression &ex, double x) -{ - return ex + expression{x}; -} - -expression operator+(const expression &ex, long double x) -{ - return ex + expression{x}; -} - -#if defined(HEYOKA_HAVE_REAL128) - -expression operator+(const expression &ex, mppp::real128 x) -{ - return ex + expression{x}; -} - -#endif - -#if defined(HEYOKA_HAVE_REAL) - -expression operator+(const expression &ex, mppp::real x) -{ - return ex + expression{std::move(x)}; -} - -#endif - -expression operator+(double x, const expression &ex) -{ - return expression{x} + ex; -} - -expression operator+(long double x, const expression &ex) -{ - return expression{x} + ex; -} - -#if defined(HEYOKA_HAVE_REAL128) - -expression operator+(mppp::real128 x, const expression &ex) -{ - return expression{x} + ex; -} - -#endif - -#if defined(HEYOKA_HAVE_REAL) - -expression operator+(mppp::real x, const expression &ex) -{ - return expression{std::move(x)} + ex; -} - -#endif - -expression operator-(const expression &ex, double x) -{ - return ex - expression{x}; -} - -expression operator-(const expression &ex, long double x) -{ - return ex - expression{x}; -} - -#if defined(HEYOKA_HAVE_REAL128) - -expression operator-(const expression &ex, mppp::real128 x) -{ - return ex - expression{x}; -} - -#endif - -#if defined(HEYOKA_HAVE_REAL) - -expression operator-(const expression &ex, mppp::real x) -{ - return ex - expression{std::move(x)}; -} - -#endif - -expression operator-(double x, const expression &ex) -{ - return expression{x} - ex; -} - -expression operator-(long double x, const expression &ex) -{ - return expression{x} - ex; -} - -#if defined(HEYOKA_HAVE_REAL128) - -expression operator-(mppp::real128 x, const expression &ex) -{ - return expression{x} - ex; -} - -#endif - -#if defined(HEYOKA_HAVE_REAL) - -expression operator-(mppp::real x, const expression &ex) -{ - return expression{std::move(x)} - ex; -} - -#endif - -expression operator*(const expression &ex, double x) -{ - return ex * expression{x}; -} - -expression operator*(const expression &ex, long double x) -{ - return ex * expression{x}; -} - -#if defined(HEYOKA_HAVE_REAL128) - -expression operator*(const expression &ex, mppp::real128 x) -{ - return ex * expression{x}; -} - -#endif - -#if defined(HEYOKA_HAVE_REAL) - -expression operator*(const expression &ex, mppp::real x) -{ - return ex * expression{std::move(x)}; -} - -#endif +#define HEYOKA_EX_BINARY_OP_R(op, type) \ + expression operator op(const expression &ex, type x) \ + { \ + return ex op expression{std::move(x)}; \ + } -expression operator*(double x, const expression &ex) -{ - return expression{x} * ex; -} +#define HEYOKA_EX_BINARY_OP_L(op, type) \ + expression operator op(type x, const expression &ex) \ + { \ + return expression{std::move(x)} op ex; \ + } -expression operator*(long double x, const expression &ex) -{ - return expression{x} * ex; -} +HEYOKA_EX_BINARY_OP_R(+, float) +HEYOKA_EX_BINARY_OP_R(+, double) +HEYOKA_EX_BINARY_OP_R(+, long double) +HEYOKA_EX_BINARY_OP_R(-, float) +HEYOKA_EX_BINARY_OP_R(-, double) +HEYOKA_EX_BINARY_OP_R(-, long double) +HEYOKA_EX_BINARY_OP_R(*, float) +HEYOKA_EX_BINARY_OP_R(*, double) +HEYOKA_EX_BINARY_OP_R(*, long double) +HEYOKA_EX_BINARY_OP_R(/, float) +HEYOKA_EX_BINARY_OP_R(/, double) +HEYOKA_EX_BINARY_OP_R(/, long double) #if defined(HEYOKA_HAVE_REAL128) -expression operator*(mppp::real128 x, const expression &ex) -{ - return expression{x} * ex; -} +HEYOKA_EX_BINARY_OP_R(+, mppp::real128) +HEYOKA_EX_BINARY_OP_R(-, mppp::real128) +HEYOKA_EX_BINARY_OP_R(*, mppp::real128) +HEYOKA_EX_BINARY_OP_R(/, mppp::real128) #endif #if defined(HEYOKA_HAVE_REAL) -expression operator*(mppp::real x, const expression &ex) -{ - return expression{std::move(x)} * ex; -} +HEYOKA_EX_BINARY_OP_R(+, mppp::real) +HEYOKA_EX_BINARY_OP_R(-, mppp::real) +HEYOKA_EX_BINARY_OP_R(*, mppp::real) +HEYOKA_EX_BINARY_OP_R(/, mppp::real) #endif -expression operator/(const expression &ex, double x) -{ - return ex / expression{x}; -} - -expression operator/(const expression &ex, long double x) -{ - return ex / expression{x}; -} +HEYOKA_EX_BINARY_OP_L(+, float) +HEYOKA_EX_BINARY_OP_L(+, double) +HEYOKA_EX_BINARY_OP_L(+, long double) +HEYOKA_EX_BINARY_OP_L(-, float) +HEYOKA_EX_BINARY_OP_L(-, double) +HEYOKA_EX_BINARY_OP_L(-, long double) +HEYOKA_EX_BINARY_OP_L(*, float) +HEYOKA_EX_BINARY_OP_L(*, double) +HEYOKA_EX_BINARY_OP_L(*, long double) +HEYOKA_EX_BINARY_OP_L(/, float) +HEYOKA_EX_BINARY_OP_L(/, double) +HEYOKA_EX_BINARY_OP_L(/, long double) #if defined(HEYOKA_HAVE_REAL128) -expression operator/(const expression &ex, mppp::real128 x) -{ - return ex / expression{x}; -} +HEYOKA_EX_BINARY_OP_L(+, mppp::real128) +HEYOKA_EX_BINARY_OP_L(-, mppp::real128) +HEYOKA_EX_BINARY_OP_L(*, mppp::real128) +HEYOKA_EX_BINARY_OP_L(/, mppp::real128) #endif #if defined(HEYOKA_HAVE_REAL) -expression operator/(const expression &ex, mppp::real x) -{ - return ex / expression{std::move(x)}; -} - -#endif - -expression operator/(double x, const expression &ex) -{ - return expression{x} / ex; -} - -expression operator/(long double x, const expression &ex) -{ - return expression{x} / ex; -} - -#if defined(HEYOKA_HAVE_REAL128) - -expression operator/(mppp::real128 x, const expression &ex) -{ - return expression{x} / ex; -} +HEYOKA_EX_BINARY_OP_L(+, mppp::real) +HEYOKA_EX_BINARY_OP_L(-, mppp::real) +HEYOKA_EX_BINARY_OP_L(*, mppp::real) +HEYOKA_EX_BINARY_OP_L(/, mppp::real) #endif -#if defined(HEYOKA_HAVE_REAL) - -expression operator/(mppp::real x, const expression &ex) -{ - return expression{std::move(x)} / ex; -} - -#endif +#undef HEYOKA_EX_BINARY_OP_R +#undef HEYOKA_EX_BINARY_OP_L expression &operator+=(expression &x, const expression &e) { @@ -322,117 +193,46 @@ expression &operator/=(expression &x, const expression &e) return x = x / e; } -expression &operator+=(expression &ex, double x) -{ - return ex += expression{x}; -} - -expression &operator+=(expression &ex, long double x) -{ - return ex += expression{x}; -} - -#if defined(HEYOKA_HAVE_REAL128) - -expression &operator+=(expression &ex, mppp::real128 x) -{ - return ex += expression{x}; -} - -#endif - -#if defined(HEYOKA_HAVE_REAL) - -expression &operator+=(expression &ex, mppp::real x) -{ - return ex += expression{std::move(x)}; -} - -#endif - -expression &operator-=(expression &ex, double x) -{ - return ex -= expression{x}; -} - -expression &operator-=(expression &ex, long double x) -{ - return ex -= expression{x}; -} - -#if defined(HEYOKA_HAVE_REAL128) - -expression &operator-=(expression &ex, mppp::real128 x) -{ - return ex -= expression{x}; -} - -#endif - -#if defined(HEYOKA_HAVE_REAL) - -expression &operator-=(expression &ex, mppp::real x) -{ - return ex -= expression{std::move(x)}; -} - -#endif - -expression &operator*=(expression &ex, double x) -{ - return ex *= expression{x}; -} - -expression &operator*=(expression &ex, long double x) -{ - return ex *= expression{x}; -} +// NOLINTBEGIN +#define HEYOKA_EX_COMPOUND_OP(op, type) \ + expression &operator op(expression & ex, type x) \ + { \ + return ex op expression{std::move(x)}; \ + } +// NOLINTEND + +HEYOKA_EX_COMPOUND_OP(+=, float) +HEYOKA_EX_COMPOUND_OP(+=, double) +HEYOKA_EX_COMPOUND_OP(+=, long double) +HEYOKA_EX_COMPOUND_OP(-=, float) +HEYOKA_EX_COMPOUND_OP(-=, double) +HEYOKA_EX_COMPOUND_OP(-=, long double) +HEYOKA_EX_COMPOUND_OP(*=, float) +HEYOKA_EX_COMPOUND_OP(*=, double) +HEYOKA_EX_COMPOUND_OP(*=, long double) +HEYOKA_EX_COMPOUND_OP(/=, float) +HEYOKA_EX_COMPOUND_OP(/=, double) +HEYOKA_EX_COMPOUND_OP(/=, long double) #if defined(HEYOKA_HAVE_REAL128) -expression &operator*=(expression &ex, mppp::real128 x) -{ - return ex *= expression{x}; -} +HEYOKA_EX_COMPOUND_OP(+=, mppp::real128) +HEYOKA_EX_COMPOUND_OP(-=, mppp::real128) +HEYOKA_EX_COMPOUND_OP(*=, mppp::real128) +HEYOKA_EX_COMPOUND_OP(/=, mppp::real128) #endif #if defined(HEYOKA_HAVE_REAL) -expression &operator*=(expression &ex, mppp::real x) -{ - return ex *= expression{std::move(x)}; -} +HEYOKA_EX_COMPOUND_OP(+=, mppp::real) +HEYOKA_EX_COMPOUND_OP(-=, mppp::real) +HEYOKA_EX_COMPOUND_OP(*=, mppp::real) +HEYOKA_EX_COMPOUND_OP(/=, mppp::real) #endif -expression &operator/=(expression &ex, double x) -{ - return ex /= expression{x}; -} - -expression &operator/=(expression &ex, long double x) -{ - return ex /= expression{x}; -} - -#if defined(HEYOKA_HAVE_REAL128) - -expression &operator/=(expression &ex, mppp::real128 x) -{ - return ex /= expression{x}; -} - -#endif - -#if defined(HEYOKA_HAVE_REAL) - -expression &operator/=(expression &ex, mppp::real x) -{ - return ex /= expression{std::move(x)}; -} - -#endif +#undef HEYOKA_EX_COMPOUND_OP bool operator==(const expression &e1, const expression &e2) { diff --git a/src/llvm_state.cpp b/src/llvm_state.cpp index e48739be0..2d755158a 100644 --- a/src/llvm_state.cpp +++ b/src/llvm_state.cpp @@ -253,12 +253,14 @@ target_features get_target_features_impl() // Compute the recommended SIMD sizes. if (retval.avx512f || retval.avx2 || retval.avx) { // NOTE: keep the recommended SIMD size to - // 4 also for AVX512 due to perf issues in early + // 4/8 also for AVX512 due to perf issues in early // implementations. Revisit this in the future, possibly // making it conditional on the specific CPU model // in use. + retval.simd_size_flt = 8; retval.simd_size_dbl = 4; } else if (retval.sse2 || retval.aarch64 || retval.vsx || retval.vsx3) { + retval.simd_size_flt = 4; retval.simd_size_dbl = 2; } @@ -298,6 +300,12 @@ const target_features &get_target_features() } // namespace detail +template <> +std::uint32_t recommended_simd_size() +{ + return detail::get_target_features().simd_size_flt; +} + template <> std::uint32_t recommended_simd_size() { diff --git a/src/math/atan2.cpp b/src/math/atan2.cpp index b9407d606..c2ef4a1a5 100644 --- a/src/math/atan2.cpp +++ b/src/math/atan2.cpp @@ -833,6 +833,11 @@ expression atan2(expression y, expression x) } } +expression atan2(expression y, float x) +{ + return atan2(std::move(y), expression(x)); +} + expression atan2(expression y, double x) { return atan2(std::move(y), expression(x)); @@ -861,6 +866,11 @@ expression atan2(expression y, mppp::real x) #endif +expression atan2(float y, expression x) +{ + return atan2(expression(y), std::move(x)); +} + expression atan2(double y, expression x) { return atan2(expression(y), std::move(x)); diff --git a/src/math/kepDE.cpp b/src/math/kepDE.cpp index 49540c1bb..48769a751 100644 --- a/src/math/kepDE.cpp +++ b/src/math/kepDE.cpp @@ -186,6 +186,7 @@ expression kepDE(expression s0, expression c0, expression DM) return kepDE(expression{std::move(s0)}, std::move(c0), std::move(DM)); \ } +HEYOKA_DEFINE_KEPDE_OVERLOADS(float) HEYOKA_DEFINE_KEPDE_OVERLOADS(double) HEYOKA_DEFINE_KEPDE_OVERLOADS(long double) diff --git a/src/math/kepE.cpp b/src/math/kepE.cpp index 37cf42b27..41d6de353 100644 --- a/src/math/kepE.cpp +++ b/src/math/kepE.cpp @@ -868,6 +868,7 @@ expression kepE(expression e, expression M) return kepE(expression{std::move(e)}, std::move(M)); \ } +HEYOKA_DEFINE_KEPE_OVERLOADS(float) HEYOKA_DEFINE_KEPE_OVERLOADS(double) HEYOKA_DEFINE_KEPE_OVERLOADS(long double) diff --git a/src/math/kepF.cpp b/src/math/kepF.cpp index b5f8ad225..47f0ce0c6 100644 --- a/src/math/kepF.cpp +++ b/src/math/kepF.cpp @@ -1785,6 +1785,7 @@ expression kepF(expression h, expression k, expression lam) return kepF(expression{std::move(h)}, std::move(k), std::move(lam)); \ } +HEYOKA_DEFINE_KEPF_OVERLOADS(float) HEYOKA_DEFINE_KEPF_OVERLOADS(double) HEYOKA_DEFINE_KEPF_OVERLOADS(long double) diff --git a/src/math/pow.cpp b/src/math/pow.cpp index f8b6706ba..a285a9e00 100644 --- a/src/math/pow.cpp +++ b/src/math/pow.cpp @@ -1180,6 +1180,11 @@ expression pow(expression b, expression e) return detail::pow_wrapper_impl(std::move(b), std::move(e)); } +expression pow(expression b, float e) +{ + return pow(std::move(b), expression{e}); +} + expression pow(expression b, double e) { return pow(std::move(b), expression{e}); diff --git a/src/step_callback.cpp b/src/step_callback.cpp index c0fad1462..b3a4a9f6e 100644 --- a/src/step_callback.cpp +++ b/src/step_callback.cpp @@ -110,6 +110,10 @@ void swap(step_callback_impl &a, step_callback_impl &b) noexcept } // Explicit instantiations. +template class step_callback_impl>; +template HEYOKA_DLL_PUBLIC void swap(step_callback_impl> &, + step_callback_impl> &); + template class step_callback_impl>; template HEYOKA_DLL_PUBLIC void swap(step_callback_impl> &, step_callback_impl> &); @@ -134,6 +138,10 @@ template HEYOKA_DLL_PUBLIC void swap(step_callback_impl>; +template HEYOKA_DLL_PUBLIC void swap(step_callback_impl> &, + step_callback_impl> &); + template class step_callback_impl>; template HEYOKA_DLL_PUBLIC void swap(step_callback_impl> &, step_callback_impl> &); diff --git a/src/taylor_00.cpp b/src/taylor_00.cpp index c1f013f88..c99cb6ded 100644 --- a/src/taylor_00.cpp +++ b/src/taylor_00.cpp @@ -9,7 +9,6 @@ #include #include -#include #include #include #include @@ -2177,6 +2176,8 @@ const std::vector &taylor_adaptive::update_d_output(T time, bool rel_time) namespace detail { +template class taylor_adaptive_base>; + template class taylor_adaptive_base>; template class taylor_adaptive_base>; @@ -2195,6 +2196,19 @@ template class taylor_adaptive_base>; } // namespace detail +template class taylor_adaptive; + +template HEYOKA_DLL_PUBLIC void +taylor_adaptive::finalise_ctor_impl(const std::vector &, std::vector, std::optional, + std::optional, bool, bool, std::vector, std::vector, + std::vector, bool, std::optional); + +template HEYOKA_DLL_PUBLIC void +taylor_adaptive::finalise_ctor_impl(const std::vector> &, std::vector, + std::optional, std::optional, bool, bool, std::vector, + std::vector, std::vector, bool, + std::optional); + template class taylor_adaptive; template HEYOKA_DLL_PUBLIC void taylor_adaptive::finalise_ctor_impl( @@ -4274,6 +4288,16 @@ void taylor_adaptive_batch::reset_cooldowns(std::uint32_t i) } // Explicit instantiation of the batch implementation classes. +template class taylor_adaptive_batch; + +template HEYOKA_DLL_PUBLIC void taylor_adaptive_batch::finalise_ctor_impl( + const std::vector &, std::vector, std::uint32_t, std::vector, std::optional, bool, + bool, std::vector, std::vector, std::vector, bool); + +template HEYOKA_DLL_PUBLIC void taylor_adaptive_batch::finalise_ctor_impl( + const std::vector> &, std::vector, std::uint32_t, std::vector, + std::optional, bool, bool, std::vector, std::vector, std::vector, bool); + template class taylor_adaptive_batch; template HEYOKA_DLL_PUBLIC void taylor_adaptive_batch::finalise_ctor_impl( diff --git a/src/taylor_01.cpp b/src/taylor_01.cpp index 6eb23ca0b..e957532f5 100644 --- a/src/taylor_01.cpp +++ b/src/taylor_01.cpp @@ -1348,6 +1348,12 @@ std::ostream &taylor_adaptive_batch_stream_impl(std::ostream &os, const taylor_a } // namespace detail +template <> +std::ostream &operator<<(std::ostream &os, const taylor_adaptive &ta) +{ + return detail::taylor_adaptive_stream_impl(os, ta); +} + template <> std::ostream &operator<<(std::ostream &os, const taylor_adaptive &ta) { @@ -1380,6 +1386,12 @@ std::ostream &operator<<(std::ostream &os, const taylor_adaptive &ta #endif +template <> +std::ostream &operator<<(std::ostream &os, const taylor_adaptive_batch &ta) +{ + return detail::taylor_adaptive_batch_stream_impl(os, ta); +} + template <> std::ostream &operator<<(std::ostream &os, const taylor_adaptive_batch &ta) { @@ -1623,6 +1635,18 @@ std::ostream &t_event_impl_stream_impl(std::ostream &os, const expression &eq, e } // namespace +template <> +std::ostream &operator<<(std::ostream &os, const nt_event_impl &e) +{ + return nt_event_impl_stream_impl(os, e.get_expression(), e.get_direction()); +} + +template <> +std::ostream &operator<<(std::ostream &os, const nt_event_impl &e) +{ + return nt_event_impl_stream_impl(os, e.get_expression(), e.get_direction()); +} + template <> std::ostream &operator<<(std::ostream &os, const nt_event_impl &e) { @@ -1673,6 +1697,18 @@ std::ostream &operator<<(std::ostream &os, const nt_event_impl +std::ostream &operator<<(std::ostream &os, const t_event_impl &e) +{ + return t_event_impl_stream_impl(os, e.get_expression(), e.get_direction(), e.get_callback(), e.get_cooldown()); +} + +template <> +std::ostream &operator<<(std::ostream &os, const t_event_impl &e) +{ + return t_event_impl_stream_impl(os, e.get_expression(), e.get_direction(), e.get_callback(), e.get_cooldown()); +} + template <> std::ostream &operator<<(std::ostream &os, const t_event_impl &e) { @@ -1724,6 +1760,12 @@ std::ostream &operator<<(std::ostream &os, const t_event_impl #endif // Explicit instantiation of the implementation classes/functions. +template class nt_event_impl; +template class t_event_impl; + +template class nt_event_impl; +template class t_event_impl; + template class nt_event_impl; template class t_event_impl; @@ -2350,6 +2392,7 @@ std::size_t continuous_output::get_n_steps() const } // Explicit instantiations. +template class continuous_output; template class continuous_output; template class continuous_output; @@ -2397,6 +2440,12 @@ std::ostream &c_out_stream_impl(std::ostream &os, const continuous_output &co } // namespace detail +template <> +std::ostream &operator<<(std::ostream &os, const continuous_output &co) +{ + return detail::c_out_stream_impl(os, co); +} + template <> std::ostream &operator<<(std::ostream &os, const continuous_output &co) { @@ -3096,6 +3145,7 @@ std::size_t continuous_output_batch::get_n_steps() const } // Explicit instantiations. +template class continuous_output_batch; template class continuous_output_batch; template class continuous_output_batch; @@ -3160,6 +3210,12 @@ std::ostream &c_out_batch_stream_impl(std::ostream &os, const continuous_output_ } // namespace detail +template <> +std::ostream &operator<<(std::ostream &os, const continuous_output_batch &co) +{ + return detail::c_out_batch_stream_impl(os, co); +} + template <> std::ostream &operator<<(std::ostream &os, const continuous_output_batch &co) { diff --git a/src/taylor_02.cpp b/src/taylor_02.cpp index b2b7df9e4..9f01c4b34 100644 --- a/src/taylor_02.cpp +++ b/src/taylor_02.cpp @@ -2053,6 +2053,16 @@ taylor_dc_t taylor_add_jet(llvm_state &s, const std::string &name, } // Explicit instantiations. +template HEYOKA_DLL_PUBLIC taylor_dc_t taylor_add_jet(llvm_state &, const std::string &, + const std::vector &, std::uint32_t, + std::uint32_t, bool, bool, const std::vector &, + bool, long long); + +template HEYOKA_DLL_PUBLIC taylor_dc_t taylor_add_jet(llvm_state &, const std::string &, + const std::vector> &, + std::uint32_t, std::uint32_t, bool, bool, + const std::vector &, bool, long long); + template HEYOKA_DLL_PUBLIC taylor_dc_t taylor_add_jet(llvm_state &, const std::string &, const std::vector &, std::uint32_t, std::uint32_t, bool, bool, @@ -2121,6 +2131,8 @@ T taylor_default_max_delta_t() } // Explicit instantiations. +template HEYOKA_DLL_PUBLIC float taylor_default_max_delta_t(); + template HEYOKA_DLL_PUBLIC double taylor_default_max_delta_t(); template HEYOKA_DLL_PUBLIC long double taylor_default_max_delta_t(); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c63522e77..c3e4d0232 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -34,6 +34,7 @@ endfunction() ADD_HEYOKA_TESTCASE(readme_test) ADD_HEYOKA_TESTCASE(llvm_state) ADD_HEYOKA_TESTCASE(expression) +ADD_HEYOKA_TESTCASE(expression_basic_api) ADD_HEYOKA_TESTCASE(expression_diff_tensors) ADD_HEYOKA_TESTCASE(expression_folding) ADD_HEYOKA_TESTCASE(expression_fix) diff --git a/test/acos.cpp b/test/acos.cpp index 3c997b7ca..241fd9167 100644 --- a/test/acos.cpp +++ b/test/acos.cpp @@ -50,7 +50,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(s, "cfunc", {acos(a), acos(b), acos(c), acos(d)}); + + s.compile(); + + auto *cf_ptr + = reinterpret_cast(s.jit_lookup("cfunc")); + + const std::vector ins{.1f, .2f, .3f, .4f}; + std::vector outs(4u, 0.); + + cf_ptr(outs.data(), ins.data(), nullptr, nullptr); + + REQUIRE(outs[0] == approximately(std::acos(.1f))); + REQUIRE(outs[1] == approximately(std::acos(.2f))); + REQUIRE(outs[2] == approximately(std::acos(.3f))); + REQUIRE(outs[3] == approximately(std::acos(.4f))); + + // NOTE: autovec with external scalar functions seems to work + // only since LLVM 16. +#if defined(HEYOKA_WITH_SLEEF) && LLVM_VERSION_MAJOR >= 16 + + const auto &tf = detail::get_target_features(); + + auto ir = s.get_ir(); + + using string_find_iterator = boost::find_iterator; + + auto count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@acosf", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + // NOTE: at the moment we have comprehensive coverage of LLVM versions + // in the CI only for x86_64. + if (tf.sse2) { + // NOTE: occurrences of the scalar version: + // - 4 calls in the strided cfunc, + // - 1 declaration. + REQUIRE(count == 5u); + } + + if (tf.aarch64) { + REQUIRE(count == 5u); + } + + // NOTE: currently no auto-vectorization happens on ppc64 due apparently + // to the way the target machine is being set up by orc/lljit (it works + // fine with the opt tool). When this is resolved, we can test ppc64 too. + + // if (tf.vsx) { + // REQUIRE(count == 5u); + // } + +#endif +} diff --git a/test/acosh.cpp b/test/acosh.cpp index 0d6f53d4e..d70ec1bb8 100644 --- a/test/acosh.cpp +++ b/test/acosh.cpp @@ -50,7 +50,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(s, "cfunc", {acosh(a), acosh(b), acosh(c), acosh(d)}); + + s.compile(); + + auto *cf_ptr + = reinterpret_cast(s.jit_lookup("cfunc")); + + const std::vector ins{1.1f, 1.2f, 1.3f, 1.4f}; + std::vector outs(4u, 0.); + + cf_ptr(outs.data(), ins.data(), nullptr, nullptr); + + REQUIRE(outs[0] == approximately(std::acosh(1.1f))); + REQUIRE(outs[1] == approximately(std::acosh(1.2f))); + REQUIRE(outs[2] == approximately(std::acosh(1.3f))); + REQUIRE(outs[3] == approximately(std::acosh(1.4f))); + + // NOTE: autovec with external scalar functions seems to work + // only since LLVM 16. +#if defined(HEYOKA_WITH_SLEEF) && LLVM_VERSION_MAJOR >= 16 + + const auto &tf = detail::get_target_features(); + + auto ir = s.get_ir(); + + using string_find_iterator = boost::find_iterator; + + auto count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@acoshf", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + // NOTE: at the moment we have comprehensive coverage of LLVM versions + // in the CI only for x86_64. + if (tf.sse2) { + // NOTE: occurrences of the scalar version: + // - 4 calls in the strided cfunc, + // - 1 declaration. + REQUIRE(count == 5u); + } + + // NOTE: LLVM16 is currently the version tested in the CI on arm64. + if (tf.aarch64) { + REQUIRE(count == 5u); + } + + // NOTE: currently no auto-vectorization happens on ppc64 due apparently + // to the way the target machine is being set up by orc/lljit (it works + // fine with the opt tool). When this is resolved, we can test ppc64 too. + + // if (tf.vsx) { + // REQUIRE(count == 5u); + // } + +#endif +} diff --git a/test/asin.cpp b/test/asin.cpp index 805b1d2b5..713df6695 100644 --- a/test/asin.cpp +++ b/test/asin.cpp @@ -50,7 +50,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(s, "cfunc", {asin(a), asin(b), asin(c), asin(d)}); + + s.compile(); + + auto *cf_ptr + = reinterpret_cast(s.jit_lookup("cfunc")); + + const std::vector ins{.1f, .2f, .3f, .4f}; + std::vector outs(4u, 0.); + + cf_ptr(outs.data(), ins.data(), nullptr, nullptr); + + REQUIRE(outs[0] == approximately(std::asin(.1f))); + REQUIRE(outs[1] == approximately(std::asin(.2f))); + REQUIRE(outs[2] == approximately(std::asin(.3f))); + REQUIRE(outs[3] == approximately(std::asin(.4f))); + + // NOTE: autovec with external scalar functions seems to work + // only since LLVM 16. +#if defined(HEYOKA_WITH_SLEEF) && LLVM_VERSION_MAJOR >= 16 + + const auto &tf = detail::get_target_features(); + + auto ir = s.get_ir(); + + using string_find_iterator = boost::find_iterator; + + auto count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@asinf", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + // NOTE: at the moment we have comprehensive coverage of LLVM versions + // in the CI only for x86_64. + if (tf.sse2) { + // NOTE: occurrences of the scalar version: + // - 4 calls in the strided cfunc, + // - 1 declaration. + REQUIRE(count == 5u); + } + + if (tf.aarch64) { + REQUIRE(count == 5u); + } + + // NOTE: currently no auto-vectorization happens on ppc64 due apparently + // to the way the target machine is being set up by orc/lljit (it works + // fine with the opt tool). When this is resolved, we can test ppc64 too. + + // if (tf.vsx) { + // REQUIRE(count == 5u); + // } + +#endif +} diff --git a/test/asinh.cpp b/test/asinh.cpp index ec361e960..f44bfe307 100644 --- a/test/asinh.cpp +++ b/test/asinh.cpp @@ -50,7 +50,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(s, "cfunc", {asinh(a), asinh(b), asinh(c), asinh(d)}); + + s.compile(); + + auto *cf_ptr + = reinterpret_cast(s.jit_lookup("cfunc")); + + const std::vector ins{1.1f, 1.2f, 1.3f, 1.4f}; + std::vector outs(4u, 0.); + + cf_ptr(outs.data(), ins.data(), nullptr, nullptr); + + REQUIRE(outs[0] == approximately(std::asinh(1.1f))); + REQUIRE(outs[1] == approximately(std::asinh(1.2f))); + REQUIRE(outs[2] == approximately(std::asinh(1.3f))); + REQUIRE(outs[3] == approximately(std::asinh(1.4f))); + + // NOTE: autovec with external scalar functions seems to work + // only since LLVM 16. +#if defined(HEYOKA_WITH_SLEEF) && LLVM_VERSION_MAJOR >= 16 + + const auto &tf = detail::get_target_features(); + + auto ir = s.get_ir(); + + using string_find_iterator = boost::find_iterator; + + auto count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@asinhf", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + // NOTE: at the moment we have comprehensive coverage of LLVM versions + // in the CI only for x86_64. + if (tf.sse2) { + // NOTE: occurrences of the scalar version: + // - 4 calls in the strided cfunc, + // - 1 declaration. + REQUIRE(count == 5u); + } + + // NOTE: LLVM16 is currently the version tested in the CI on arm64. + if (tf.aarch64) { + REQUIRE(count == 5u); + } + + // NOTE: currently no auto-vectorization happens on ppc64 due apparently + // to the way the target machine is being set up by orc/lljit (it works + // fine with the opt tool). When this is resolved, we can test ppc64 too. + + // if (tf.vsx) { + // REQUIRE(count == 5u); + // } + +#endif +} diff --git a/test/atan.cpp b/test/atan.cpp index 391f62389..6d69c7b20 100644 --- a/test/atan.cpp +++ b/test/atan.cpp @@ -49,7 +49,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(s, "cfunc", {atan(a), atan(b), atan(c), atan(d)}); + + s.compile(); + + auto *cf_ptr + = reinterpret_cast(s.jit_lookup("cfunc")); + + const std::vector ins{.1f, .2f, .3f, .4f}; + std::vector outs(4u, 0.); + + cf_ptr(outs.data(), ins.data(), nullptr, nullptr); + + REQUIRE(outs[0] == approximately(std::atan(.1f))); + REQUIRE(outs[1] == approximately(std::atan(.2f))); + REQUIRE(outs[2] == approximately(std::atan(.3f))); + REQUIRE(outs[3] == approximately(std::atan(.4f))); + + // NOTE: autovec with external scalar functions seems to work + // only since LLVM 16. +#if defined(HEYOKA_WITH_SLEEF) && LLVM_VERSION_MAJOR >= 16 + + const auto &tf = detail::get_target_features(); + + auto ir = s.get_ir(); + + using string_find_iterator = boost::find_iterator; + + auto count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@atanf", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + // NOTE: at the moment we have comprehensive coverage of LLVM versions + // in the CI only for x86_64. + if (tf.sse2) { + // NOTE: occurrences of the scalar version: + // - 4 calls in the strided cfunc, + // - 1 declaration. + REQUIRE(count == 5u); + } + + // NOTE: LLVM16 is currently the version tested in the CI on arm64. + if (tf.aarch64) { + REQUIRE(count == 5u); + } + + // NOTE: currently no auto-vectorization happens on ppc64 due apparently + // to the way the target machine is being set up by orc/lljit (it works + // fine with the opt tool). When this is resolved, we can test ppc64 too. + + // if (tf.vsx) { + // REQUIRE(count == 5u); + // } + +#endif +} diff --git a/test/atan2.cpp b/test/atan2.cpp index bf512833a..e64618142 100644 --- a/test/atan2.cpp +++ b/test/atan2.cpp @@ -10,7 +10,6 @@ #include #include -#include #include #include #include @@ -62,7 +61,7 @@ using namespace mppp::literals; #endif -const auto fp_types = std::tuple(k.value()).args()[0] == "x"_var); + REQUIRE(std::get(std::get(k.value()).args()[1].value()) == number{1.1f}); + + k = atan2("x"_var, 1.1); REQUIRE(std::get(k.value()).args()[0] == "x"_var); REQUIRE(std::get(std::get(k.value()).args()[1].value()) == number{1.1}); @@ -130,6 +133,10 @@ TEST_CASE("atan2 overloads") REQUIRE(std::get(std::get(k.value()).args()[1].value()) == number{1.1_r256}); #endif + k = atan2(1.1f, "x"_var); + REQUIRE(std::get(k.value()).args()[1] == "x"_var); + REQUIRE(std::get(std::get(k.value()).args()[0].value()) == number{1.1f}); + k = atan2(1.1, "x"_var); REQUIRE(std::get(k.value()).args()[1] == "x"_var); REQUIRE(std::get(std::get(k.value()).args()[0].value()) == number{1.1}); @@ -324,8 +331,8 @@ TEST_CASE("normalise") REQUIRE(normalise(subs(atan2(x, y), {{x, .1_dbl}, {y, .2_dbl}})) == atan2(.1_dbl, .2_dbl)); } -// Test to check vectorisation via the vector-function-abi-variant machinery. -TEST_CASE("vfabi") +// Tests to check vectorisation via the vector-function-abi-variant machinery. +TEST_CASE("vfabi double") { llvm_state s{kw::slp_vectorize = true}; @@ -385,3 +392,66 @@ TEST_CASE("vfabi") #endif } + +TEST_CASE("vfabi float") +{ + llvm_state s{kw::slp_vectorize = true}; + + auto [a, b, c, d] = make_vars("a", "b", "c", "d"); + + add_cfunc(s, "cfunc", {atan2(a, .5f), atan2(b, .6f), atan2(c, .7f), atan2(d, .8f)}); + + s.compile(); + + auto *cf_ptr + = reinterpret_cast(s.jit_lookup("cfunc")); + + const std::vector ins{.1f, .2f, .3f, .4f}; + std::vector outs(4u, 0.); + + cf_ptr(outs.data(), ins.data(), nullptr, nullptr); + + REQUIRE(outs[0] == approximately(std::atan2(.1f, .5f))); + REQUIRE(outs[1] == approximately(std::atan2(.2f, .6f))); + REQUIRE(outs[2] == approximately(std::atan2(.3f, .7f))); + REQUIRE(outs[3] == approximately(std::atan2(.4f, .8f))); + + // NOTE: autovec with external scalar functions seems to work + // only since LLVM 16. +#if defined(HEYOKA_WITH_SLEEF) && LLVM_VERSION_MAJOR >= 16 + + const auto &tf = detail::get_target_features(); + + auto ir = s.get_ir(); + + using string_find_iterator = boost::find_iterator; + + auto count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@atan2f", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + // NOTE: at the moment we have comprehensive coverage of LLVM versions + // in the CI only for x86_64. + if (tf.sse2) { + // NOTE: occurrences of the scalar version: + // - 4 calls in the strided cfunc, + // - 1 declaration. + REQUIRE(count == 5u); + } + + if (tf.aarch64) { + REQUIRE(count == 5u); + } + + // NOTE: currently no auto-vectorization happens on ppc64 due apparently + // to the way the target machine is being set up by orc/lljit (it works + // fine with the opt tool). When this is resolved, we can test ppc64 too. + + // if (tf.vsx) { + // REQUIRE(count == 5u); + // } + +#endif +} diff --git a/test/atanh.cpp b/test/atanh.cpp index 191582bfb..98efb30b0 100644 --- a/test/atanh.cpp +++ b/test/atanh.cpp @@ -49,7 +49,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(s, "cfunc", {atanh(a), atanh(b), atanh(c), atanh(d)}); + + s.compile(); + + auto *cf_ptr + = reinterpret_cast(s.jit_lookup("cfunc")); + + const std::vector ins{.1f, .2f, .3f, .4f}; + std::vector outs(4u, 0.); + + cf_ptr(outs.data(), ins.data(), nullptr, nullptr); + + REQUIRE(outs[0] == approximately(std::atanh(.1f))); + REQUIRE(outs[1] == approximately(std::atanh(.2f))); + REQUIRE(outs[2] == approximately(std::atanh(.3f))); + REQUIRE(outs[3] == approximately(std::atanh(.4f))); + + // NOTE: autovec with external scalar functions seems to work + // only since LLVM 16. +#if defined(HEYOKA_WITH_SLEEF) && LLVM_VERSION_MAJOR >= 16 + + const auto &tf = detail::get_target_features(); + + auto ir = s.get_ir(); + + using string_find_iterator = boost::find_iterator; + + auto count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@atanhf", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + // NOTE: at the moment we have comprehensive coverage of LLVM versions + // in the CI only for x86_64. + if (tf.sse2) { + // NOTE: occurrences of the scalar version: + // - 4 calls in the strided cfunc, + // - 1 declaration. + REQUIRE(count == 5u); + } + + if (tf.aarch64) { + REQUIRE(count == 5u); + } + + // NOTE: currently no auto-vectorization happens on ppc64 due apparently + // to the way the target machine is being set up by orc/lljit (it works + // fine with the opt tool). When this is resolved, we can test ppc64 too. + + // if (tf.vsx) { + // REQUIRE(count == 5u); + // } + +#endif +} diff --git a/test/c_output.cpp b/test/c_output.cpp index b8554f0ce..c282a0da3 100644 --- a/test/c_output.cpp +++ b/test/c_output.cpp @@ -38,7 +38,7 @@ using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tupleget_n_steps() > 0u); // Try slightly outside the bounds. - (*d_out)(-.01); + (*d_out)(fp_t(-.01)); REQUIRE(d_out->get_output()[0] == approximately(sin(fp_t(-0.01)))); REQUIRE(d_out->get_output()[1] == approximately(cos(fp_t(-0.01)))); - (*d_out)(10.01); + (*d_out)(fp_t(10.01)); REQUIRE(d_out->get_output()[0] == approximately(sin(fp_t(10.01)))); REQUIRE(d_out->get_output()[1] == approximately(cos(fp_t(10.01)))); @@ -200,10 +200,10 @@ TEST_CASE("scalar") REQUIRE(d_out->get_n_steps() > 0u); // Try slightly outside the bounds. - (*d_out)(.01); + (*d_out)(fp_t(.01)); REQUIRE(d_out->get_output()[0] == approximately(sin(fp_t(0.01)))); REQUIRE(d_out->get_output()[1] == approximately(cos(fp_t(0.01)))); - (*d_out)(-10.01); + (*d_out)(fp_t(-10.01)); REQUIRE(d_out->get_output()[0] == approximately(sin(fp_t(-10.01)))); REQUIRE(d_out->get_output()[1] == approximately(cos(fp_t(-10.01)))); @@ -345,7 +345,7 @@ TEST_CASE("batch") // The vector of final times. std::vector final_tm; for (auto i = 0u; i < batch_size; ++i) { - final_tm.push_back(10. + fp_t(i) / 100); + final_tm.push_back(fp_t(10.) + fp_t(i) / 100); } // Create a random batch grid. @@ -357,7 +357,7 @@ TEST_CASE("batch") std::uniform_real_distribution rdist(1e-6, 10. + i / 100. - 1e-6); for (auto j = 0u; j < n_points - 2u; ++j) { - tmp[j] = rdist(rng); + tmp[j] = static_cast(rdist(rng)); } std::sort(tmp.begin(), tmp.end()); @@ -424,7 +424,7 @@ TEST_CASE("batch") // Try slightly outside the bounds. for (auto j = 0u; j < batch_size; ++j) { - loc_time[j] = -0.01; + loc_time[j] = fp_t(-0.01); } (*d_out)(loc_time); for (auto j = 0u; j < batch_size; ++j) { @@ -434,7 +434,7 @@ TEST_CASE("batch") == approximately(-ic[j] * sin(loc_time[j]) + ic[batch_size + j] * cos(loc_time[j]))); } for (auto j = 0u; j < batch_size; ++j) { - loc_time[j] = final_tm[j] + 0.01; + loc_time[j] = final_tm[j] + fp_t(0.01); } (*d_out)(loc_time); for (auto j = 0u; j < batch_size; ++j) { @@ -560,7 +560,7 @@ TEST_CASE("batch") // Try slightly outside the bounds. for (auto j = 0u; j < batch_size; ++j) { - loc_time[j] = 0.01; + loc_time[j] = fp_t(0.01); } (*d_out)(loc_time); for (auto j = 0u; j < batch_size; ++j) { @@ -570,7 +570,7 @@ TEST_CASE("batch") == approximately(-ic[j] * sin(loc_time[j]) + ic[batch_size + j] * cos(loc_time[j]))); } for (auto j = 0u; j < batch_size; ++j) { - loc_time[j] = final_tm[j] - 0.01; + loc_time[j] = final_tm[j] - fp_t(0.01); } (*d_out)(loc_time); for (auto j = 0u; j < batch_size; ++j) { diff --git a/test/constants.cpp b/test/constants.cpp index c2aee8f14..4c8916d6c 100644 --- a/test/constants.cpp +++ b/test/constants.cpp @@ -50,7 +50,7 @@ using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(s, "cfunc", {cos(a), cos(b), cos(c), cos(d)}); + + s.compile(); + + auto *cf_ptr + = reinterpret_cast(s.jit_lookup("cfunc")); + + const std::vector ins{1., 2., 3., 4.}; + std::vector outs(4u, 0.); + + cf_ptr(outs.data(), ins.data(), nullptr, nullptr); + + REQUIRE(outs[0] == approximately(std::cos(1.f))); + REQUIRE(outs[1] == approximately(std::cos(2.f))); + REQUIRE(outs[2] == approximately(std::cos(3.f))); + REQUIRE(outs[3] == approximately(std::cos(4.f))); + +#if defined(HEYOKA_WITH_SLEEF) + + const auto &tf = detail::get_target_features(); + + auto ir = s.get_ir(); + + using string_find_iterator = boost::find_iterator; + + auto count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@llvm.cos.f32", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + // NOTE: at the moment we have comprehensive coverage of LLVM versions + // in the CI only for x86_64. + if (tf.sse2) { + // NOTE: occurrences of the scalar version: + // - 4 calls in the strided cfunc, + // - 1 declaration. + REQUIRE(count == 5u); + } + +#if LLVM_VERSION_MAJOR >= 16 + + // NOTE: LLVM16 is currently the version tested in the CI on arm64. + if (tf.aarch64) { + REQUIRE(count == 5u); + } + +#endif + + // NOTE: currently no auto-vectorization happens on ppc64 due apparently + // to the way the target machine is being set up by orc/lljit (it works + // fine with the opt tool). When this is resolved, we can test ppc64 too. + + // if (tf.vsx) { + // REQUIRE(count == 5u); + // } + +#endif +} diff --git a/test/cosh.cpp b/test/cosh.cpp index 31b58c9c8..c6ca5c8b9 100644 --- a/test/cosh.cpp +++ b/test/cosh.cpp @@ -50,7 +50,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(s, "cfunc", {cosh(a), cosh(b), cosh(c), cosh(d)}); + + s.compile(); + + auto *cf_ptr + = reinterpret_cast(s.jit_lookup("cfunc")); + + const std::vector ins{1., 2., 3., 4.}; + std::vector outs(4u, 0.); + + cf_ptr(outs.data(), ins.data(), nullptr, nullptr); + + REQUIRE(outs[0] == approximately(std::cosh(1.f))); + REQUIRE(outs[1] == approximately(std::cosh(2.f))); + REQUIRE(outs[2] == approximately(std::cosh(3.f))); + REQUIRE(outs[3] == approximately(std::cosh(4.f))); + + // NOTE: autovec with external scalar functions seems to work + // only since LLVM 16. +#if defined(HEYOKA_WITH_SLEEF) && LLVM_VERSION_MAJOR >= 16 + + const auto &tf = detail::get_target_features(); + + auto ir = s.get_ir(); + + using string_find_iterator = boost::find_iterator; + + auto count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@coshf", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + // NOTE: at the moment we have comprehensive coverage of LLVM versions + // in the CI only for x86_64. + if (tf.sse2) { + // NOTE: occurrences of the scalar version: + // - 4 calls in the strided cfunc, + // - 1 declaration. + REQUIRE(count == 5u); + } + + if (tf.aarch64) { + REQUIRE(count == 5u); + } + + // NOTE: currently no auto-vectorization happens on ppc64 due apparently + // to the way the target machine is being set up by orc/lljit (it works + // fine with the opt tool). When this is resolved, we can test ppc64 too. + + // if (tf.vsx) { + // REQUIRE(count == 5u); + // } + +#endif +} diff --git a/test/dfloat_time.cpp b/test/dfloat_time.cpp index 06f5ed57f..beff5ab4c 100644 --- a/test/dfloat_time.cpp +++ b/test/dfloat_time.cpp @@ -37,7 +37,7 @@ const int ntrials = 100; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(rdist(rng)); ta.set_time(0); ta.get_state_data()[0] = 0; @@ -93,7 +93,7 @@ TEST_CASE("scalar test") err = 0; for (auto i = 0; i < ntrials; ++i) { - const auto v0 = fp_t(1) + rdist(rng); + const auto v0 = fp_t(1) + static_cast(rdist(rng)); ta.set_time(0); ta.get_state_data()[0] = 0; @@ -115,7 +115,7 @@ TEST_CASE("scalar test") err = 0; for (auto i = 0; i < ntrials; ++i) { - const auto v0 = fp_t(1) + rdist(rng); + const auto v0 = fp_t(1) + static_cast(rdist(rng)); ta.set_time(0); ta.get_state_data()[0] = 0; @@ -139,7 +139,7 @@ TEST_CASE("scalar test") err = 0; for (auto i = 0; i < ntrials; ++i) { - const auto v0 = fp_t(1) + rdist(rng); + const auto v0 = fp_t(1) + static_cast(rdist(rng)); ta.set_time(0); ta.get_state_data()[0] = 0; @@ -182,8 +182,8 @@ TEST_CASE("batch test") std::uniform_real_distribution rdist(-1e-9, 1e-9); for (auto i = 0; i < ntrials; ++i) { - const auto v0 = fp_t(1) + rdist(rng); - const auto v1 = fp_t(1) + rdist(rng); + const auto v0 = fp_t(1) + static_cast(rdist(rng)); + const auto v1 = fp_t(1) + static_cast(rdist(rng)); ta.set_time({fp_t(0), 0}); ta.get_state_data()[0] = 0; @@ -212,8 +212,8 @@ TEST_CASE("batch test") final_time = std::vector{fp_t(-10000.), fp_t(-11000.)}; for (auto i = 0; i < ntrials; ++i) { - const auto v0 = fp_t(1) + rdist(rng); - const auto v1 = fp_t(1) + rdist(rng); + const auto v0 = fp_t(1) + static_cast(rdist(rng)); + const auto v1 = fp_t(1) + static_cast(rdist(rng)); ta.set_time({fp_t(0), 0}); ta.get_state_data()[0] = 0; @@ -240,8 +240,8 @@ TEST_CASE("batch test") err = 0; for (auto i = 0; i < ntrials; ++i) { - const auto v0 = fp_t(1) + rdist(rng); - const auto v1 = fp_t(1) + rdist(rng); + const auto v0 = fp_t(1) + static_cast(rdist(rng)); + const auto v1 = fp_t(1) + static_cast(rdist(rng)); ta.set_time({fp_t(0), 0}); ta.get_state_data()[0] = 0; @@ -270,8 +270,8 @@ TEST_CASE("batch test") err = 0; for (auto i = 0; i < ntrials; ++i) { - const auto v0 = fp_t(1) + rdist(rng); - const auto v1 = fp_t(1) + rdist(rng); + const auto v0 = fp_t(1) + static_cast(rdist(rng)); + const auto v1 = fp_t(1) + static_cast(rdist(rng)); ta.set_time({fp_t(0), 0}); ta.get_state_data()[0] = 0; diff --git a/test/div.cpp b/test/div.cpp index 9334edea4..99180f668 100644 --- a/test/div.cpp +++ b/test/div.cpp @@ -46,7 +46,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(s, "cfunc", {erf(a), erf(b), erf(c), erf(d)}); + + s.compile(); + + auto *cf_ptr + = reinterpret_cast(s.jit_lookup("cfunc")); + + const std::vector ins{.1f, .2f, .3f, .4f}; + std::vector outs(4u, 0.); + + cf_ptr(outs.data(), ins.data(), nullptr, nullptr); + + REQUIRE(outs[0] == approximately(std::erf(.1f))); + REQUIRE(outs[1] == approximately(std::erf(.2f))); + REQUIRE(outs[2] == approximately(std::erf(.3f))); + REQUIRE(outs[3] == approximately(std::erf(.4f))); + + // NOTE: autovec with external scalar functions seems to work + // only since LLVM 16. +#if defined(HEYOKA_WITH_SLEEF) && LLVM_VERSION_MAJOR >= 16 + + const auto &tf = detail::get_target_features(); + + auto ir = s.get_ir(); + + using string_find_iterator = boost::find_iterator; + + auto count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@erff", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + // NOTE: at the moment we have comprehensive coverage of LLVM versions + // in the CI only for x86_64. + if (tf.sse2) { + // NOTE: occurrences of the scalar version: + // - 4 calls in the strided cfunc, + // - 1 declaration. + REQUIRE(count == 5u); + } + + if (tf.aarch64) { + REQUIRE(count == 5u); + } + + // NOTE: currently no auto-vectorization happens on ppc64 due apparently + // to the way the target machine is being set up by orc/lljit (it works + // fine with the opt tool). When this is resolved, we can test ppc64 too. + + // if (tf.vsx) { + // REQUIRE(count == 5u); + // } + +#endif +} diff --git a/test/exp.cpp b/test/exp.cpp index 1aec93baf..768234042 100644 --- a/test/exp.cpp +++ b/test/exp.cpp @@ -50,7 +50,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(s, "cfunc", {exp(a), exp(b), exp(c), exp(d)}); + + s.compile(); + + auto *cf_ptr + = reinterpret_cast(s.jit_lookup("cfunc")); + + const std::vector ins{1., 2., 3., 4.}; + std::vector outs(4u, 0.); + + cf_ptr(outs.data(), ins.data(), nullptr, nullptr); + + REQUIRE(outs[0] == approximately(std::exp(1.f))); + REQUIRE(outs[1] == approximately(std::exp(2.f))); + REQUIRE(outs[2] == approximately(std::exp(3.f))); + REQUIRE(outs[3] == approximately(std::exp(4.f))); + +#if defined(HEYOKA_WITH_SLEEF) + + const auto &tf = detail::get_target_features(); + + auto ir = s.get_ir(); + + using string_find_iterator = boost::find_iterator; + + auto count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@llvm.exp.f32", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + // NOTE: at the moment we have comprehensive coverage of LLVM versions + // in the CI only for x86_64. + if (tf.sse2) { + // NOTE: occurrences of the scalar version: + // - 4 calls in the strided cfunc, + // - 1 declaration. + REQUIRE(count == 5u); + } + +#if LLVM_VERSION_MAJOR >= 16 + + // NOTE: LLVM16 is currently the version tested in the CI on arm64. + if (tf.aarch64) { + REQUIRE(count == 5u); + } + +#endif + + // NOTE: currently no auto-vectorization happens on ppc64 due apparently + // to the way the target machine is being set up by orc/lljit (it works + // fine with the opt tool). When this is resolved, we can test ppc64 too. + + // if (tf.vsx) { + // REQUIRE(count == 5u); + // } + +#endif +} diff --git a/test/expression_basic_api.cpp b/test/expression_basic_api.cpp new file mode 100644 index 000000000..4fc75f88b --- /dev/null +++ b/test/expression_basic_api.cpp @@ -0,0 +1,331 @@ +// Copyright 2020, 2021, 2022, 2023 Francesco Biscani (bluescarni@gmail.com), Dario Izzo (dario.izzo@gmail.com) +// +// This file is part of the heyoka library. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include + +#include + +#if defined(HEYOKA_HAVE_REAL128) + +#include + +#endif + +#if defined(HEYOKA_HAVE_REAL) + +#include + +#endif + +#include +#include + +#include "catch.hpp" + +using namespace heyoka; + +TEST_CASE("number ctors") +{ + REQUIRE(std::get(expression{1.1f}.value()) == number{1.1f}); + REQUIRE(std::get(expression{1.1}.value()) == number{1.1}); + REQUIRE(std::get(expression{1.1l}.value()) == number{1.1l}); + +#if defined(HEYOKA_HAVE_REAL128) + + REQUIRE(std::get(expression{mppp::real128{"1.1"}}.value()) == number{mppp::real128{"1.1"}}); + +#endif + +#if defined(HEYOKA_HAVE_REAL) + + REQUIRE(std::get(expression{mppp::real{"1.1", 123}}.value()) == number{mppp::real{"1.1", 123}}); + +#endif +} + +TEST_CASE("literals") +{ + REQUIRE(1.1_flt == expression{1.1f}); + REQUIRE(1111111111111111111_flt == expression{1111111111111111111.f}); + + REQUIRE(1.1_dbl == expression{1.1}); + REQUIRE(1111111111111111111_dbl == expression{1111111111111111111.}); + + REQUIRE(1.1_ldbl == expression{1.1l}); + REQUIRE(1111111111111111111_ldbl == expression{1111111111111111111.l}); + +#if defined(HEYOKA_HAVE_REAL128) + + using namespace mppp::literals; + + REQUIRE(1.1_f128 == expression{1.1_rq}); + REQUIRE(1111111111111111111_f128 == expression{1111111111111111111._rq}); + +#endif +} + +TEST_CASE("number binary ops") +{ + + REQUIRE(1_flt + 1.1f == expression{1.f + 1.1f}); + REQUIRE(1.1f + 1_flt == expression{1.f + 1.1f}); + + REQUIRE(1_dbl + 1.1 == expression{1. + 1.1}); + REQUIRE(1.1 + 1_dbl == expression{1. + 1.1}); + + REQUIRE(1_ldbl + 1.1l == expression{1.l + 1.1l}); + REQUIRE(1.1l + 1_ldbl == expression{1.l + 1.1l}); + +#if defined(HEYOKA_HAVE_REAL128) + + using namespace mppp::literals; + + REQUIRE(1_f128 + 1.1_rq == expression{1._rq + 1.1_rq}); + REQUIRE(1.1_rq + 1_f128 == expression{1._rq + 1.1_rq}); + +#endif + +#if defined(HEYOKA_HAVE_REAL) + + REQUIRE(1_dbl + mppp::real{"1.1", 345} == expression{1. + mppp::real{"1.1", 345}}); + REQUIRE(mppp::real{"1.1", 345} + 1_dbl == expression{1. + mppp::real{"1.1", 345}}); + +#endif + + REQUIRE(1_flt - 1.1f == expression{1.f - 1.1f}); + REQUIRE(1.1f - 1_flt == expression{1.1f - 1.f}); + + REQUIRE(1_dbl - 1.1 == expression{1. - 1.1}); + REQUIRE(1.1 - 1_dbl == expression{1.1 - 1.}); + + REQUIRE(1_ldbl - 1.1l == expression{1.l - 1.1l}); + REQUIRE(1.1l - 1_ldbl == expression{1.1l - 1.l}); + +#if defined(HEYOKA_HAVE_REAL128) + + REQUIRE(1_f128 - 1.1_rq == expression{1._rq - 1.1_rq}); + REQUIRE(1.1_rq - 1_f128 == expression{1.1_rq - 1._rq}); + +#endif + +#if defined(HEYOKA_HAVE_REAL) + + REQUIRE(1_dbl - mppp::real{"1.1", 345} == expression{1. - mppp::real{"1.1", 345}}); + REQUIRE(mppp::real{"1.1", 345} - 1_dbl == expression{mppp::real{"1.1", 345} - 1.}); + +#endif + + REQUIRE(1_flt * 1.1f == expression{1.f * 1.1f}); + REQUIRE(1.1f * 1_flt == expression{1.1f * 1.f}); + + REQUIRE(1_dbl * 1.1 == expression{1. * 1.1}); + REQUIRE(1.1 * 1_dbl == expression{1.1 * 1.}); + + REQUIRE(1_ldbl * 1.1l == expression{1.l * 1.1l}); + REQUIRE(1.1l * 1_ldbl == expression{1.1l * 1.l}); + +#if defined(HEYOKA_HAVE_REAL128) + + REQUIRE(1_f128 * 1.1_rq == expression{1._rq * 1.1_rq}); + REQUIRE(1.1_rq * 1_f128 == expression{1.1_rq * 1._rq}); + +#endif + +#if defined(HEYOKA_HAVE_REAL) + + REQUIRE(1_dbl * mppp::real{"1.1", 345} == expression{1. * mppp::real{"1.1", 345}}); + REQUIRE(mppp::real{"1.1", 345} * 1_dbl == expression{mppp::real{"1.1", 345} * 1.}); + +#endif + + REQUIRE(1_flt / 1.1f == expression{1.f / 1.1f}); + REQUIRE(1.1f / 1_flt == expression{1.1f / 1.f}); + + REQUIRE(1_dbl / 1.1 == expression{1. / 1.1}); + REQUIRE(1.1 / 1_dbl == expression{1.1 / 1.}); + + REQUIRE(1_ldbl / 1.1l == expression{1.l / 1.1l}); + REQUIRE(1.1l / 1_ldbl == expression{1.1l / 1.l}); + +#if defined(HEYOKA_HAVE_REAL128) + + REQUIRE(1_f128 / 1.1_rq == expression{1._rq / 1.1_rq}); + REQUIRE(1.1_rq / 1_f128 == expression{1.1_rq / 1._rq}); + +#endif + +#if defined(HEYOKA_HAVE_REAL) + + REQUIRE(1_dbl / mppp::real{"1.1", 345} == expression{1. / mppp::real{"1.1", 345}}); + REQUIRE(mppp::real{"1.1", 345} / 1_dbl == expression{mppp::real{"1.1", 345} / 1.}); + +#endif +} + +TEST_CASE("number compound ops") +{ + { + auto ex = 1_flt; + ex += 1.1f; + REQUIRE(ex == 1_flt + 1.1f); + } + + { + auto ex = 1_dbl; + ex += 1.1; + REQUIRE(ex == 1_dbl + 1.1); + } + + { + auto ex = 1_ldbl; + ex += 1.1l; + REQUIRE(ex == 1_ldbl + 1.1l); + } + +#if defined(HEYOKA_HAVE_REAL128) + + { + using namespace mppp::literals; + + auto ex = 1_f128; + ex += 1.1_rq; + REQUIRE(ex == 1_f128 + 1.1_rq); + } + +#endif + +#if defined(HEYOKA_HAVE_REAL) + + { + auto ex = 1_dbl; + ex += mppp::real{"1.1", 345}; + REQUIRE(ex == 1_dbl + mppp::real{"1.1", 345}); + } + +#endif + + { + auto ex = 1_flt; + ex -= 1.1f; + REQUIRE(ex == 1_flt - 1.1f); + } + + { + auto ex = 1_dbl; + ex -= 1.1; + REQUIRE(ex == 1_dbl - 1.1); + } + + { + auto ex = 1_ldbl; + ex -= 1.1l; + REQUIRE(ex == 1_ldbl - 1.1l); + } + +#if defined(HEYOKA_HAVE_REAL128) + + { + using namespace mppp::literals; + + auto ex = 1_f128; + ex -= 1.1_rq; + REQUIRE(ex == 1_f128 - 1.1_rq); + } + +#endif + +#if defined(HEYOKA_HAVE_REAL) + + { + auto ex = 1_dbl; + ex -= mppp::real{"1.1", 345}; + REQUIRE(ex == 1_dbl - mppp::real{"1.1", 345}); + } + +#endif + + { + auto ex = 1_flt; + ex *= 1.1f; + REQUIRE(ex == 1_flt * 1.1f); + } + + { + auto ex = 1_dbl; + ex *= 1.1; + REQUIRE(ex == 1_dbl * 1.1); + } + + { + auto ex = 1_ldbl; + ex *= 1.1l; + REQUIRE(ex == 1_ldbl * 1.1l); + } + +#if defined(HEYOKA_HAVE_REAL128) + + { + using namespace mppp::literals; + + auto ex = 1_f128; + ex *= 1.1_rq; + REQUIRE(ex == 1_f128 * 1.1_rq); + } + +#endif + +#if defined(HEYOKA_HAVE_REAL) + + { + auto ex = 1_dbl; + ex *= mppp::real{"1.1", 345}; + REQUIRE(ex == 1_dbl * mppp::real{"1.1", 345}); + } + +#endif + + { + auto ex = 1_flt; + ex /= 1.1f; + REQUIRE(ex == 1_flt / 1.1f); + } + + { + auto ex = 1_dbl; + ex /= 1.1; + REQUIRE(ex == 1_dbl / 1.1); + } + + { + auto ex = 1_ldbl; + ex /= 1.1l; + REQUIRE(ex == 1_ldbl / 1.1l); + } + +#if defined(HEYOKA_HAVE_REAL128) + + { + using namespace mppp::literals; + + auto ex = 1_f128; + ex /= 1.1_rq; + REQUIRE(ex == 1_f128 / 1.1_rq); + } + +#endif + +#if defined(HEYOKA_HAVE_REAL) + + { + auto ex = 1_dbl; + ex /= mppp::real{"1.1", 345}; + REQUIRE(ex == 1_dbl / mppp::real{"1.1", 345}); + } + +#endif +} diff --git a/test/kepDE.cpp b/test/kepDE.cpp index 5ba8f0d17..b1b8d5b13 100644 --- a/test/kepDE.cpp +++ b/test/kepDE.cpp @@ -48,7 +48,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(h_dist(rng)); // Generate a k such that h**2+k**2<1. - const auto max_abs_k = std::sqrt(1. - h_val * h_val); - std::uniform_real_distribution k_dist(std::nextafter(-max_abs_k, 0.), max_abs_k); + const auto max_abs_k = sqrt(fp_t(1) - h_val * h_val); + std::uniform_real_distribution k_dist(static_cast(nextafter(-max_abs_k, fp_t(0))), + static_cast(max_abs_k)); auto k_val = static_cast(k_dist(rng)); return std::make_pair(static_cast(h_val), std::move(k_val)); @@ -335,7 +339,7 @@ TEST_CASE("cfunc") // Generate the hs and ks. auto [hval, kval] = generate_hk(); // Generate the lam. - auto lamval = lam_dist(rng); + auto lamval = static_cast(lam_dist(rng)); ins[i] = hval; ins[i + batch_size] = kval; @@ -359,8 +363,8 @@ TEST_CASE("cfunc") auto hval = ins[i]; auto kval = ins[i + batch_size]; auto lamval = ins[i + 2u * batch_size]; - REQUIRE(eps_close(cos(lamval), cos(Fval + hval * (1. - cos(Fval)) - kval * sin(Fval)))); - REQUIRE(eps_close(sin(lamval), sin(Fval + hval * (1. - cos(Fval)) - kval * sin(Fval)))); + REQUIRE(eps_close(cos(lamval), cos(Fval + hval * (fp_t(1) - cos(Fval)) - kval * sin(Fval)))); + REQUIRE(eps_close(sin(lamval), sin(Fval + hval * (fp_t(1) - cos(Fval)) - kval * sin(Fval)))); // Second output. REQUIRE(!isnan(outs[i + batch_size])); @@ -368,17 +372,17 @@ TEST_CASE("cfunc") hval = pars[i]; kval = pars[i + batch_size]; lamval = ins[i + 2u * batch_size]; - REQUIRE(eps_close(cos(lamval), cos(Fval + hval * (1. - cos(Fval)) - kval * sin(Fval)))); - REQUIRE(eps_close(sin(lamval), sin(Fval + hval * (1. - cos(Fval)) - kval * sin(Fval)))); + REQUIRE(eps_close(cos(lamval), cos(Fval + hval * (fp_t(1) - cos(Fval)) - kval * sin(Fval)))); + REQUIRE(eps_close(sin(lamval), sin(Fval + hval * (fp_t(1) - cos(Fval)) - kval * sin(Fval)))); // Third output. REQUIRE(!isnan(outs[i + batch_size * 2u])); Fval = outs[i + batch_size * 2u]; - hval = .5; - kval = .3; + hval = fp_t(.5); + kval = fp_t(.3); lamval = ins[i + 2u * batch_size]; - REQUIRE(eps_close(cos(lamval), cos(Fval + hval * (1. - cos(Fval)) - kval * sin(Fval)))); - REQUIRE(eps_close(sin(lamval), sin(Fval + hval * (1. - cos(Fval)) - kval * sin(Fval)))); + REQUIRE(eps_close(cos(lamval), cos(Fval + hval * (fp_t(1) - cos(Fval)) - kval * sin(Fval)))); + REQUIRE(eps_close(sin(lamval), sin(Fval + hval * (fp_t(1) - cos(Fval)) - kval * sin(Fval)))); } } } diff --git a/test/kepE.cpp b/test/kepE.cpp index 0156a221c..e041668fc 100644 --- a/test/kepE.cpp +++ b/test/kepE.cpp @@ -67,7 +67,7 @@ using namespace mppp::literals; #endif -const auto fp_types = std::tuple(k.value()).args()[0] == "x"_var); + REQUIRE(std::get(std::get(k.value()).args()[1].value()) == number{1.1f}); + + k = kepE("x"_var, 1.1); REQUIRE(std::get(k.value()).args()[0] == "x"_var); REQUIRE(std::get(std::get(k.value()).args()[1].value()) == number{1.1}); @@ -150,12 +154,16 @@ TEST_CASE("kepE overloads") REQUIRE(std::get(std::get(k.value()).args()[1].value()) == number{mppp::real128{"1.1"}}); #endif -#if defined(HEYOKA_HAVE_REAL128) +#if defined(HEYOKA_HAVE_REAL) k = kepE("x"_var, 1.1_r256); REQUIRE(std::get(k.value()).args()[0] == "x"_var); REQUIRE(std::get(std::get(k.value()).args()[1].value()) == number{1.1_r256}); #endif + k = kepE(1.1f, "x"_var); + REQUIRE(std::get(k.value()).args()[1] == "x"_var); + REQUIRE(std::get(std::get(k.value()).args()[0].value()) == number{1.1f}); + k = kepE(1.1, "x"_var); REQUIRE(std::get(k.value()).args()[1] == "x"_var); REQUIRE(std::get(std::get(k.value()).args()[0].value()) == number{1.1}); diff --git a/test/kepF.cpp b/test/kepF.cpp index 668faba42..f6a1fbe6a 100644 --- a/test/kepF.cpp +++ b/test/kepF.cpp @@ -49,7 +49,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(lamval); // Generate another pair of hs and ks for the pars. std::tie(hval, kval) = generate_hk(); @@ -383,8 +384,8 @@ TEST_CASE("cfunc") // Third output. REQUIRE(!isnan(outs[i + batch_size * 2u])); Fval = outs[i + batch_size * 2u]; - hval = .5; - kval = .3; + hval = fp_t(.5); + kval = fp_t(.3); lamval = ins[i + 2u * batch_size]; REQUIRE(eps_close(cos(lamval), cos(Fval + hval * cos(Fval) - kval * sin(Fval)))); REQUIRE(eps_close(sin(lamval), sin(Fval + hval * cos(Fval) - kval * sin(Fval)))); diff --git a/test/llvm_helpers.cpp b/test/llvm_helpers.cpp index f46b6f0c4..69bab4988 100644 --- a/test/llvm_helpers.cpp +++ b/test/llvm_helpers.cpp @@ -54,7 +54,7 @@ using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple rdist(-10., 10.); std::vector values(batch_size); - std::generate(values.begin(), values.end(), [&rdist]() { return rdist(rng); }); + std::generate(values.begin(), values.end(), [&rdist]() { return static_cast(rdist(rng)); }); std::vector signs(batch_size); f_ptr(signs.data(), values.data()); @@ -360,7 +360,7 @@ TEST_CASE("sincos batch") // Setup the argument and the output values. std::vector x_vec(batch_size), s_vec(x_vec), c_vec(x_vec); for (auto i = 0u; i < batch_size; ++i) { - x_vec[i] = i + 1u; + x_vec[i] = static_cast(i + 1u); } f_ptr(x_vec.data(), s_vec.data(), c_vec.data()); @@ -437,7 +437,6 @@ TEST_CASE("sincos mp") TEST_CASE("inv_kep_E_scalar") { using detail::llvm_add_inv_kep_E_wrapper; - namespace bmt = boost::math::tools; using std::cos; using std::isnan; using std::sin; @@ -461,7 +460,7 @@ TEST_CASE("inv_kep_E_scalar") // First set of tests with zero eccentricity. for (auto i = 0; i < ntrials; ++i) { - const fp_t M = M_dist(rng); + const fp_t M = static_cast(M_dist(rng)); const fp_t e = 0; fp_t E; @@ -472,8 +471,8 @@ TEST_CASE("inv_kep_E_scalar") // Non-zero eccentricities. for (auto i = 0; i < ntrials * 10; ++i) { - const fp_t M = M_dist(rng); - const fp_t e = e_dist(rng); + const fp_t M = static_cast(M_dist(rng)); + const fp_t e = static_cast(e_dist(rng)); fp_t E; f_ptr(&E, &e, &M); @@ -494,8 +493,8 @@ TEST_CASE("inv_kep_E_scalar") // Test invalid inputs. { - fp_t M = 1.23; - fp_t e = -.1; + fp_t M = static_cast(1.23); + fp_t e = static_cast(-.1); fp_t E; f_ptr(&E, &e, &M); @@ -504,7 +503,7 @@ TEST_CASE("inv_kep_E_scalar") } { - fp_t M = 1.23; + fp_t M = static_cast(1.23); fp_t e = 1.; fp_t E; @@ -514,7 +513,7 @@ TEST_CASE("inv_kep_E_scalar") } { - fp_t M = 1.23; + fp_t M = static_cast(1.23); fp_t e = std::numeric_limits::infinity(); fp_t E; @@ -524,7 +523,7 @@ TEST_CASE("inv_kep_E_scalar") } { - fp_t M = 1.23; + fp_t M = static_cast(1.23); fp_t e = -std::numeric_limits::infinity(); fp_t E; @@ -534,7 +533,7 @@ TEST_CASE("inv_kep_E_scalar") } { - fp_t M = 1.23; + fp_t M = static_cast(1.23); fp_t e = std::numeric_limits::quiet_NaN(); fp_t E; @@ -545,7 +544,7 @@ TEST_CASE("inv_kep_E_scalar") { fp_t M = std::numeric_limits::infinity(); - fp_t e = .1; + fp_t e = static_cast(.1); fp_t E; f_ptr(&E, &e, &M); @@ -555,7 +554,7 @@ TEST_CASE("inv_kep_E_scalar") { fp_t M = -std::numeric_limits::infinity(); - fp_t e = .2; + fp_t e = static_cast(.2); fp_t E; f_ptr(&E, &e, &M); @@ -565,7 +564,7 @@ TEST_CASE("inv_kep_E_scalar") { fp_t M = std::numeric_limits::quiet_NaN(); - fp_t e = .1; + fp_t e = static_cast(.1); fp_t E; f_ptr(&E, &e, &M); @@ -581,7 +580,6 @@ TEST_CASE("inv_kep_E_scalar") TEST_CASE("inv_kep_E_batch") { using detail::llvm_add_inv_kep_E_wrapper; - namespace bmt = boost::math::tools; using std::cos; using std::isnan; using std::sin; @@ -610,7 +608,7 @@ TEST_CASE("inv_kep_E_batch") // First set of tests with zero eccentricity. for (auto i = 0; i < ntrials; ++i) { for (auto j = 0u; j < batch_size; ++j) { - M_vec[j] = M_dist(rng); + M_vec[j] = static_cast(M_dist(rng)); } f_ptr(ret_vec.data(), e_vec.data(), M_vec.data()); @@ -622,8 +620,8 @@ TEST_CASE("inv_kep_E_batch") // Non-zero eccentricities. for (auto i = 0; i < ntrials * 10; ++i) { for (auto j = 0u; j < batch_size; ++j) { - M_vec[j] = M_dist(rng); - e_vec[j] = e_dist(rng); + M_vec[j] = static_cast(M_dist(rng)); + e_vec[j] = static_cast(e_dist(rng)); } f_ptr(ret_vec.data(), e_vec.data(), M_vec.data()); @@ -635,12 +633,12 @@ TEST_CASE("inv_kep_E_batch") // Test invalid inputs. { for (auto j = 0u; j < batch_size; ++j) { - M_vec[j] = M_dist(rng); + M_vec[j] = static_cast(M_dist(rng)); if (j == 1u) { - e_vec[j] = -.1; + e_vec[j] = static_cast(-.1); } else { - e_vec[j] = e_dist(rng); + e_vec[j] = static_cast(e_dist(rng)); } } @@ -657,12 +655,12 @@ TEST_CASE("inv_kep_E_batch") { for (auto j = 0u; j < batch_size; ++j) { - M_vec[j] = M_dist(rng); + M_vec[j] = static_cast(M_dist(rng)); if (j == 1u) { e_vec[j] = 1; } else { - e_vec[j] = e_dist(rng); + e_vec[j] = static_cast(e_dist(rng)); } } @@ -679,12 +677,12 @@ TEST_CASE("inv_kep_E_batch") { for (auto j = 0u; j < batch_size; ++j) { - M_vec[j] = M_dist(rng); + M_vec[j] = static_cast(M_dist(rng)); if (j == 1u) { e_vec[j] = std::numeric_limits::infinity(); } else { - e_vec[j] = e_dist(rng); + e_vec[j] = static_cast(e_dist(rng)); } } @@ -701,12 +699,12 @@ TEST_CASE("inv_kep_E_batch") { for (auto j = 0u; j < batch_size; ++j) { - M_vec[j] = M_dist(rng); + M_vec[j] = static_cast(M_dist(rng)); if (j == 1u) { e_vec[j] = -std::numeric_limits::infinity(); } else { - e_vec[j] = e_dist(rng); + e_vec[j] = static_cast(e_dist(rng)); } } @@ -723,12 +721,12 @@ TEST_CASE("inv_kep_E_batch") { for (auto j = 0u; j < batch_size; ++j) { - M_vec[j] = M_dist(rng); + M_vec[j] = static_cast(M_dist(rng)); if (j == 1u) { e_vec[j] = std::numeric_limits::quiet_NaN(); } else { - e_vec[j] = e_dist(rng); + e_vec[j] = static_cast(e_dist(rng)); } } @@ -745,12 +743,12 @@ TEST_CASE("inv_kep_E_batch") { for (auto j = 0u; j < batch_size; ++j) { - e_vec[j] = e_dist(rng); + e_vec[j] = static_cast(e_dist(rng)); if (j == 1u) { M_vec[j] = std::numeric_limits::infinity(); } else { - M_vec[j] = M_dist(rng); + M_vec[j] = static_cast(M_dist(rng)); } } @@ -767,12 +765,12 @@ TEST_CASE("inv_kep_E_batch") { for (auto j = 0u; j < batch_size; ++j) { - e_vec[j] = e_dist(rng); + e_vec[j] = static_cast(e_dist(rng)); if (j == 1u) { M_vec[j] = -std::numeric_limits::infinity(); } else { - M_vec[j] = M_dist(rng); + M_vec[j] = static_cast(M_dist(rng)); } } @@ -789,12 +787,12 @@ TEST_CASE("inv_kep_E_batch") { for (auto j = 0u; j < batch_size; ++j) { - e_vec[j] = e_dist(rng); + e_vec[j] = static_cast(e_dist(rng)); if (j == 1u) { M_vec[j] = std::numeric_limits::quiet_NaN(); } else { - M_vec[j] = M_dist(rng); + M_vec[j] = static_cast(M_dist(rng)); } } @@ -1408,13 +1406,13 @@ TEST_CASE("minmax") if (idist(rng) && idist(rng) && idist(rng)) { av[j] = std::numeric_limits::quiet_NaN(); } else { - av[j] = rdist(rng); + av[j] = static_cast(rdist(rng)); } if (idist(rng) && idist(rng) && idist(rng)) { bv[j] = std::numeric_limits::quiet_NaN(); } else { - bv[j] = rdist(rng); + bv[j] = static_cast(rdist(rng)); } } @@ -1569,9 +1567,9 @@ TEST_CASE("fma batch") // Setup the arguments and the output value. std::vector ret_vec(batch_size), a_vec(ret_vec), b_vec(ret_vec), c_vec(ret_vec); for (auto i = 0u; i < batch_size; ++i) { - a_vec[i] = i + 1u; - b_vec[i] = a_vec[i] * 10 * (i + 1u); - c_vec[i] = b_vec[i] * 10 * (i + 1u); + a_vec[i] = static_cast(i + 1u); + b_vec[i] = a_vec[i] * 10 * static_cast(i + 1u); + c_vec[i] = b_vec[i] * 10 * static_cast(i + 1u); } f_ptr(ret_vec.data(), a_vec.data(), b_vec.data(), c_vec.data()); @@ -2610,7 +2608,7 @@ TEST_CASE("dl modulus scalar") using mp_fp_t = bmp::number::digits * 2, bmp::digit_base_2>>; - std::uniform_real_distribution op_dist(-1e6, 1e6), quo_dist(.1, 10.); + std::uniform_real_distribution op_dist(fp_t(-1e6), fp_t(1e6)), quo_dist(fp_t(.1), fp_t(10.)); for (auto i = 0; i < ntrials; ++i) { auto x = fp_t(op_dist(rng)), y = fp_t(quo_dist(rng)); @@ -2695,7 +2693,7 @@ TEST_CASE("dl modulus batch") using mp_fp_t = bmp::number::digits * 2, bmp::digit_base_2>>; - std::uniform_real_distribution op_dist(-1e6, 1e6), quo_dist(.1, 10.); + std::uniform_real_distribution op_dist(fp_t(-1e6), fp_t(1e6)), quo_dist(fp_t(.1), fp_t(10.)); std::vector x_vec(batch_size), y_vec(x_vec), a_hi_vec(x_vec), a_lo_vec(x_vec), b_hi_vec(x_vec), b_lo_vec(x_vec); @@ -2739,7 +2737,10 @@ TEST_CASE("get_alignment") auto &context = s.context(); auto &builder = s.builder(); - auto *tp = detail::to_llvm_type(context); + auto *tp = detail::to_llvm_type(context); + REQUIRE(detail::get_alignment(md, tp) == alignof(float)); + + tp = detail::to_llvm_type(context); REQUIRE(detail::get_alignment(md, tp) == alignof(double)); #if !defined(HEYOKA_ARCH_PPC) diff --git a/test/llvm_state.cpp b/test/llvm_state.cpp index e3ebbbdbe..1292af5db 100644 --- a/test/llvm_state.cpp +++ b/test/llvm_state.cpp @@ -49,6 +49,7 @@ using namespace heyoka_test; TEST_CASE("simd size") { + REQUIRE(recommended_simd_size() > 0u); REQUIRE(recommended_simd_size() > 0u); REQUIRE(recommended_simd_size() > 0u); @@ -63,6 +64,7 @@ TEST_CASE("simd size") #if defined(__GNUC__) #if defined(__amd64__) || defined(__aarch64__) + REQUIRE(recommended_simd_size() >= 4u); REQUIRE(recommended_simd_size() >= 2u); #endif diff --git a/test/log.cpp b/test/log.cpp index 136110ac8..4d5a88626 100644 --- a/test/log.cpp +++ b/test/log.cpp @@ -49,7 +49,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(s, "cfunc", {log(a), log(b), log(c), log(d)}); + + s.compile(); + + auto *cf_ptr + = reinterpret_cast(s.jit_lookup("cfunc")); + + const std::vector ins{1., 2., 3., 4.}; + std::vector outs(4u, 0.); + + cf_ptr(outs.data(), ins.data(), nullptr, nullptr); + + REQUIRE(outs[0] == approximately(std::log(1.f))); + REQUIRE(outs[1] == approximately(std::log(2.f))); + REQUIRE(outs[2] == approximately(std::log(3.f))); + REQUIRE(outs[3] == approximately(std::log(4.f))); + +#if defined(HEYOKA_WITH_SLEEF) + + const auto &tf = detail::get_target_features(); + + auto ir = s.get_ir(); + + using string_find_iterator = boost::find_iterator; + + auto count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@llvm.log.f32", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + // NOTE: at the moment we have comprehensive coverage of LLVM versions + // in the CI only for x86_64. + if (tf.sse2) { + // NOTE: occurrences of the scalar version: + // - 4 calls in the strided cfunc, + // - 1 declaration. + REQUIRE(count == 5u); + } + +#if LLVM_VERSION_MAJOR >= 16 + + // NOTE: LLVM16 is currently the version tested in the CI on arm64. + if (tf.aarch64) { + REQUIRE(count == 5u); + } + +#endif + + // NOTE: currently no auto-vectorization happens on ppc64 due apparently + // to the way the target machine is being set up by orc/lljit (it works + // fine with the opt tool). When this is resolved, we can test ppc64 too. + + // if (tf.vsx) { + // REQUIRE(count == 5u); + // } + +#endif +} diff --git a/test/neg.cpp b/test/neg.cpp index 558d6dce1..d724d413f 100644 --- a/test/neg.cpp +++ b/test/neg.cpp @@ -46,7 +46,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(k.value()).args()[0] == "x"_var); + REQUIRE(std::get(std::get(k.value()).args()[1].value()) == number{1.1f}); + + k = pow("x"_var, 1.1); + REQUIRE(std::get(k.value()).args()[0] == "x"_var); + REQUIRE(std::get(std::get(k.value()).args()[1].value()) == number{1.1}); + + k = pow("x"_var, 1.1l); + REQUIRE(std::get(k.value()).args()[0] == "x"_var); + REQUIRE(std::get(std::get(k.value()).args()[1].value()) == number{1.1l}); + +#if defined(HEYOKA_HAVE_REAL128) + k = pow("x"_var, mppp::real128{"1.1"}); + REQUIRE(std::get(k.value()).args()[0] == "x"_var); + REQUIRE(std::get(std::get(k.value()).args()[1].value()) == number{mppp::real128{"1.1"}}); +#endif + +#if defined(HEYOKA_HAVE_REAL) + k = pow("x"_var, 1.1_r256); + REQUIRE(std::get(k.value()).args()[0] == "x"_var); + REQUIRE(std::get(std::get(k.value()).args()[1].value()) == number{1.1_r256}); +#endif +} + +// Tests to check vectorisation via the vector-function-abi-variant machinery. +TEST_CASE("vfabi double") { llvm_state s{kw::slp_vectorize = true}; @@ -482,3 +509,68 @@ TEST_CASE("vfabi") #endif } + +TEST_CASE("vfabi float") +{ + llvm_state s{kw::slp_vectorize = true}; + + auto [a, b, c, d] = make_vars("a", "b", "c", "d"); + + add_cfunc(s, "cfunc", {pow(a, .6f), pow(b, .7f), pow(c, .8f), pow(d, .9f)}); + + s.compile(); + + auto *cf_ptr + = reinterpret_cast(s.jit_lookup("cfunc")); + + const std::vector ins{.1f, .2f, .3f, .4f}; + std::vector outs(4u, 0.); + + cf_ptr(outs.data(), ins.data(), nullptr, nullptr); + + REQUIRE(outs[0] == approximately(std::pow(.1f, .6f))); + REQUIRE(outs[1] == approximately(std::pow(.2f, .7f))); + REQUIRE(outs[2] == approximately(std::pow(.3f, .8f))); + REQUIRE(outs[3] == approximately(std::pow(.4f, .9f))); + +#if defined(HEYOKA_WITH_SLEEF) + + const auto &tf = detail::get_target_features(); + + auto ir = s.get_ir(); + + using string_find_iterator = boost::find_iterator; + + auto count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@llvm.pow.f32", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + // NOTE: at the moment we have comprehensive coverage of LLVM versions + // in the CI only for x86_64. + if (tf.sse2) { + // NOTE: occurrences of the scalar version: + // - 4 calls in the strided cfunc, + // - 1 declaration. + REQUIRE(count == 5u); + } + +#if LLVM_VERSION_MAJOR >= 16 + + if (tf.aarch64) { + REQUIRE(count == 5u); + } + +#endif + + // NOTE: currently no auto-vectorization happens on ppc64 due apparently + // to the way the target machine is being set up by orc/lljit (it works + // fine with the opt tool). When this is resolved, we can test ppc64 too. + + // if (tf.vsx) { + // REQUIRE(count == 5u); + // } + +#endif +} diff --git a/test/prod.cpp b/test/prod.cpp index 43ae82b97..b6b93b615 100644 --- a/test/prod.cpp +++ b/test/prod.cpp @@ -54,7 +54,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(x_dist(rng)); // Generate the pars. - pars[i] = x_dist(rng); - pars[i + batch_size] = x_dist(rng); + pars[i] = static_cast(x_dist(rng)); + pars[i + batch_size] = static_cast(x_dist(rng)); } cf_ptr(outs.data(), ins.data(), pars.data(), nullptr); @@ -480,11 +480,11 @@ TEST_CASE("cfunc leaky") for (auto niter = 0; niter < 100; ++niter) { for (auto i = 0u; i < batch_size; ++i) { // Generate the xs. - ins[i] = x_dist(rng); + ins[i] = static_cast(x_dist(rng)); // Generate the pars. - pars[i] = x_dist(rng); - pars[i + batch_size] = x_dist(rng); + pars[i] = static_cast(x_dist(rng)); + pars[i + batch_size] = static_cast(x_dist(rng)); } cf_ptr(outs.data(), ins.data(), pars.data(), nullptr); diff --git a/test/sigmoid.cpp b/test/sigmoid.cpp index c65d31a2e..b4e05d865 100644 --- a/test/sigmoid.cpp +++ b/test/sigmoid.cpp @@ -47,7 +47,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(s, "cfunc", {sin(a), sin(b), sin(c), sin(d)}); + + s.compile(); + + auto *cf_ptr + = reinterpret_cast(s.jit_lookup("cfunc")); + + const std::vector ins{1., 2., 3., 4.}; + std::vector outs(4u, 0.); + + cf_ptr(outs.data(), ins.data(), nullptr, nullptr); + + REQUIRE(outs[0] == approximately(std::sin(1.f))); + REQUIRE(outs[1] == approximately(std::sin(2.f))); + REQUIRE(outs[2] == approximately(std::sin(3.f))); + REQUIRE(outs[3] == approximately(std::sin(4.f))); + +#if defined(HEYOKA_WITH_SLEEF) + + const auto &tf = detail::get_target_features(); + + auto ir = s.get_ir(); + + using string_find_iterator = boost::find_iterator; + + auto count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@llvm.sin.f32", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + // NOTE: at the moment we have comprehensive coverage of LLVM versions + // in the CI only for x86_64. + if (tf.sse2) { + // NOTE: occurrences of the scalar version: + // - 4 calls in the strided cfunc, + // - 1 declaration. + REQUIRE(count == 5u); + } + +#if LLVM_VERSION_MAJOR >= 16 + + // NOTE: LLVM16 is currently the version tested in the CI on arm64. + if (tf.aarch64) { + REQUIRE(count == 5u); + } + +#endif + + // NOTE: currently no auto-vectorization happens on ppc64 due apparently + // to the way the target machine is being set up by orc/lljit (it works + // fine with the opt tool). When this is resolved, we can test ppc64 too. + + // if (tf.vsx) { + // REQUIRE(count == 5u); + // } + + // Some more extensive testing specific to x86, only for this function. + auto [e, f, g, h, i] = make_vars("e", "f", "g", "h", "i"); + + llvm_state s2{kw::slp_vectorize = true}; + + add_cfunc(s2, "cfunc1", {sin(a), sin(b), sin(c), sin(d), sin(e), sin(f), sin(g), sin(h)}); + add_cfunc(s2, "cfunc2", {sin(a), sin(b), sin(c), sin(d), sin(e), sin(f), sin(g), sin(h), sin(i)}); + + s2.compile(); + + auto *cf1_ptr + = reinterpret_cast(s2.jit_lookup("cfunc1")); + auto *cf2_ptr + = reinterpret_cast(s2.jit_lookup("cfunc2")); + + const std::vector ins2{1., 2., 3., 4., 5., 6., 7., 8., 9.}; + std::vector outs2(9u, 0.); + + cf1_ptr(outs2.data(), ins2.data(), nullptr, nullptr); + + REQUIRE(outs2[0] == approximately(std::sin(1.f))); + REQUIRE(outs2[1] == approximately(std::sin(2.f))); + REQUIRE(outs2[2] == approximately(std::sin(3.f))); + REQUIRE(outs2[3] == approximately(std::sin(4.f))); + REQUIRE(outs2[4] == approximately(std::sin(5.f))); + REQUIRE(outs2[5] == approximately(std::sin(6.f))); + REQUIRE(outs2[6] == approximately(std::sin(7.f))); + REQUIRE(outs2[7] == approximately(std::sin(8.f))); + + cf2_ptr(outs2.data(), ins2.data(), nullptr, nullptr); + + REQUIRE(outs2[0] == approximately(std::sin(1.f))); + REQUIRE(outs2[1] == approximately(std::sin(2.f))); + REQUIRE(outs2[2] == approximately(std::sin(3.f))); + REQUIRE(outs2[3] == approximately(std::sin(4.f))); + REQUIRE(outs2[4] == approximately(std::sin(5.f))); + REQUIRE(outs2[5] == approximately(std::sin(6.f))); + REQUIRE(outs2[6] == approximately(std::sin(7.f))); + REQUIRE(outs2[7] == approximately(std::sin(8.f))); + REQUIRE(outs2[8] == approximately(std::sin(9.f))); + + ir = s2.get_ir(); + + count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@llvm.sin.f32", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + if (tf.avx) { + // NOTE: occurrences of the scalar version: + // - 8 + 9 calls in the strided cfuncs, + // - 1 declaration, + // - 1 call to deal with the remainder in the + // 9-argument version. + REQUIRE(count == 19u); + } + + // NOTE: this next test seems to work properly starting + // from LLVM 13. +#if LLVM_VERSION_MAJOR >= 13 + + // Check that the autovec works also on batch sizes which do not correspond + // exactly to an available vector width. + llvm_state s3{kw::slp_vectorize = true}; + + add_cfunc(s3, "cfunc", {sin(a)}, kw::batch_size = 5u); + + s3.compile(); + + auto *cf3_ptr + = reinterpret_cast(s3.jit_lookup("cfunc")); + + std::vector ins3 = {1., 2., 3., 4., 5.}, outs3 = {0., 0., 0., 0., 0.}; + + cf3_ptr(outs3.data(), ins3.data(), nullptr, nullptr); + + REQUIRE(outs3[0] == approximately(std::sin(1.f))); + REQUIRE(outs3[1] == approximately(std::sin(2.f))); + REQUIRE(outs3[2] == approximately(std::sin(3.f))); + REQUIRE(outs3[3] == approximately(std::sin(4.f))); + REQUIRE(outs3[4] == approximately(std::sin(5.f))); + + ir = s3.get_ir(); + + count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@llvm.sin.f32", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + if (tf.sse2) { + // NOTE: occurrences of the scalar version: + // - 1 call in the remainder of the unstrided cfunc, + // - 1 call in the remainder of the strided cfunc, + // - 1 declaration. + REQUIRE(count == 3u); + } + +#if LLVM_VERSION_MAJOR >= 16 + + if (tf.aarch64) { + REQUIRE(count == 3u); + } + +#endif + +#endif + +#endif +} diff --git a/test/sinh.cpp b/test/sinh.cpp index adaee85d4..a0a218c82 100644 --- a/test/sinh.cpp +++ b/test/sinh.cpp @@ -50,7 +50,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(s, "cfunc", {sinh(a), sinh(b), sinh(c), sinh(d)}); + + s.compile(); + + auto *cf_ptr + = reinterpret_cast(s.jit_lookup("cfunc")); + + const std::vector ins{1., 2., 3., 4.}; + std::vector outs(4u, 0.); + + cf_ptr(outs.data(), ins.data(), nullptr, nullptr); + + REQUIRE(outs[0] == approximately(std::sinh(1.f))); + REQUIRE(outs[1] == approximately(std::sinh(2.f))); + REQUIRE(outs[2] == approximately(std::sinh(3.f))); + REQUIRE(outs[3] == approximately(std::sinh(4.f))); + + // NOTE: autovec with external scalar functions seems to work + // only since LLVM 16. +#if defined(HEYOKA_WITH_SLEEF) && LLVM_VERSION_MAJOR >= 16 + + const auto &tf = detail::get_target_features(); + + auto ir = s.get_ir(); + + using string_find_iterator = boost::find_iterator; + + auto count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@sinhf", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + // NOTE: at the moment we have comprehensive coverage of LLVM versions + // in the CI only for x86_64. + if (tf.sse2) { + // NOTE: occurrences of the scalar version: + // - 4 calls in the strided cfunc, + // - 1 declaration. + REQUIRE(count == 5u); + } + + if (tf.aarch64) { + REQUIRE(count == 5u); + } + + // NOTE: currently no auto-vectorization happens on ppc64 due apparently + // to the way the target machine is being set up by orc/lljit (it works + // fine with the opt tool). When this is resolved, we can test ppc64 too. + + // if (tf.vsx) { + // REQUIRE(count == 3u); + // } + + // Some more extensive testing specific to x86, only for this function. + auto [e, f, g, h, i] = make_vars("e", "f", "g", "h", "i"); + + llvm_state s2{kw::slp_vectorize = true}; + + add_cfunc(s2, "cfunc1", {sinh(a), sinh(b), sinh(c), sinh(d), sinh(e), sinh(f), sinh(g), sinh(h)}); + add_cfunc(s2, "cfunc2", {sinh(a), sinh(b), sinh(c), sinh(d), sinh(e), sinh(f), sinh(g), sinh(h), sinh(i)}); + + s2.compile(); + + auto *cf1_ptr + = reinterpret_cast(s2.jit_lookup("cfunc1")); + auto *cf2_ptr + = reinterpret_cast(s2.jit_lookup("cfunc2")); + + const std::vector ins2{1., 2., 3., 4., 5., 6., 7., 8., 9.}; + std::vector outs2(9u, 0.); + + cf1_ptr(outs2.data(), ins2.data(), nullptr, nullptr); + + REQUIRE(outs2[0] == approximately(std::sinh(1.f))); + REQUIRE(outs2[1] == approximately(std::sinh(2.f))); + REQUIRE(outs2[2] == approximately(std::sinh(3.f))); + REQUIRE(outs2[3] == approximately(std::sinh(4.f))); + REQUIRE(outs2[4] == approximately(std::sinh(5.f))); + REQUIRE(outs2[5] == approximately(std::sinh(6.f))); + REQUIRE(outs2[6] == approximately(std::sinh(7.f))); + REQUIRE(outs2[7] == approximately(std::sinh(8.f))); + + cf2_ptr(outs2.data(), ins2.data(), nullptr, nullptr); + + REQUIRE(outs2[0] == approximately(std::sinh(1.f))); + REQUIRE(outs2[1] == approximately(std::sinh(2.f))); + REQUIRE(outs2[2] == approximately(std::sinh(3.f))); + REQUIRE(outs2[3] == approximately(std::sinh(4.f))); + REQUIRE(outs2[4] == approximately(std::sinh(5.f))); + REQUIRE(outs2[5] == approximately(std::sinh(6.f))); + REQUIRE(outs2[6] == approximately(std::sinh(7.f))); + REQUIRE(outs2[7] == approximately(std::sinh(8.f))); + REQUIRE(outs2[8] == approximately(std::sinh(9.f))); + + ir = s2.get_ir(); + + count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@sinhf", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + if (tf.avx) { + // NOTE: occurrences of the scalar version: + // - 8 + 9 calls in the strided cfuncs, + // - 1 declaration, + // - 1 call to deal with the remainder in the + // 9-argument version. + REQUIRE(count == 19u); + } + + // Check that the autovec works also on batch sizes which do not correspond + // exactly to an available vector width. + llvm_state s3{kw::slp_vectorize = true}; + + add_cfunc(s3, "cfunc", {sinh(a)}, kw::batch_size = 5u); + + s3.compile(); + + auto *cf3_ptr + = reinterpret_cast(s3.jit_lookup("cfunc")); + + std::vector ins3 = {1., 2., 3., 4., 5.}, outs3 = {0., 0., 0., 0., 0.}; + + cf3_ptr(outs3.data(), ins3.data(), nullptr, nullptr); + + REQUIRE(outs3[0] == approximately(std::sinh(1.f))); + REQUIRE(outs3[1] == approximately(std::sinh(2.f))); + REQUIRE(outs3[2] == approximately(std::sinh(3.f))); + REQUIRE(outs3[3] == approximately(std::sinh(4.f))); + REQUIRE(outs3[4] == approximately(std::sinh(5.f))); + + ir = s3.get_ir(); + + count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@sinhf", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + if (tf.sse2) { + // NOTE: occurrences of the scalar version: + // - 1 call in the remainder of the unstrided cfunc, + // - 1 call in the remainder of the strided cfunc, + // - 1 declaration. + REQUIRE(count == 3u); + } + + if (tf.aarch64) { + REQUIRE(count == 3u); + } + +#endif +} diff --git a/test/sqrt.cpp b/test/sqrt.cpp index b4e28b000..daa6a55f3 100644 --- a/test/sqrt.cpp +++ b/test/sqrt.cpp @@ -50,7 +50,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(s, "cfunc", {sqrt(a), sqrt(b), sqrt(c), sqrt(d)}); + + s.compile(); + + auto *cf_ptr + = reinterpret_cast(s.jit_lookup("cfunc")); + + const std::vector ins{1., 2., 3., 4.}; + std::vector outs(4u, 0.); + + cf_ptr(outs.data(), ins.data(), nullptr, nullptr); + + REQUIRE(outs[0] == approximately(std::sqrt(1.f))); + REQUIRE(outs[1] == approximately(std::sqrt(2.f))); + REQUIRE(outs[2] == approximately(std::sqrt(3.f))); + REQUIRE(outs[3] == approximately(std::sqrt(4.f))); + +#if defined(HEYOKA_WITH_SLEEF) + + const auto &tf = detail::get_target_features(); + + auto ir = s.get_ir(); + + using string_find_iterator = boost::find_iterator; + + auto count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@llvm.sqrt.f32", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + // NOTE: at the moment we have comprehensive coverage of LLVM versions + // in the CI only for x86_64. + if (tf.sse2) { + // NOTE: occurrences of the scalar version: + // - 4 calls in the strided cfunc, + // - 1 declaration. + REQUIRE(count == 5u); + } + +#if LLVM_VERSION_MAJOR >= 16 + + // NOTE: LLVM16 is currently the version tested in the CI on arm64. + if (tf.aarch64) { + REQUIRE(count == 5u); + } + +#endif + + // NOTE: currently no auto-vectorization happens on ppc64 due apparently + // to the way the target machine is being set up by orc/lljit (it works + // fine with the opt tool). When this is resolved, we can test ppc64 too. + + // if (tf.vsx) { + // REQUIRE(count == 5u); + // } + +#endif +} diff --git a/test/square.cpp b/test/square.cpp index f9fcd8db2..cb71f3e26 100644 --- a/test/square.cpp +++ b/test/square.cpp @@ -55,7 +55,7 @@ auto square_wrapper(const heyoka::expression &x) return pow(x, 2.); } -const auto fp_types = std::tuple(.5) * .5, fp_t(100))); + REQUIRE(outs[i + batch_size] == approximately(static_cast(.5) * fp_t(.5), fp_t(100))); REQUIRE(outs[i + 2u * batch_size] == approximately(pars[i] * pars[i], fp_t(100))); } } diff --git a/test/sub.cpp b/test/sub.cpp index 3affb2a0b..6897268cd 100644 --- a/test/sub.cpp +++ b/test/sub.cpp @@ -46,7 +46,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple &args) return sum(new_args); } -const auto fp_types = std::tuple(.5) * 0.5, fp_t(100))); + == approximately(ins[i] * ins[i] + static_cast(.5) * fp_t(0.5), fp_t(100))); REQUIRE(outs[i + 2u * batch_size] == approximately(pars[i] * pars[i] + ins[i + batch_size] * ins[i + batch_size], fp_t(100))); } diff --git a/test/tan.cpp b/test/tan.cpp index 64bfc6750..66788af21 100644 --- a/test/tan.cpp +++ b/test/tan.cpp @@ -49,7 +49,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(s, "cfunc", {tan(a), tan(b), tan(c), tan(d)}); + + s.compile(); + + auto *cf_ptr + = reinterpret_cast(s.jit_lookup("cfunc")); + + const std::vector ins{1., 2., 3., 4.}; + std::vector outs(4u, 0.); + + cf_ptr(outs.data(), ins.data(), nullptr, nullptr); + + REQUIRE(outs[0] == approximately(std::tan(1.f))); + REQUIRE(outs[1] == approximately(std::tan(2.f))); + REQUIRE(outs[2] == approximately(std::tan(3.f))); + REQUIRE(outs[3] == approximately(std::tan(4.f))); + + // NOTE: autovec with external scalar functions seems to work + // only since LLVM 16. +#if defined(HEYOKA_WITH_SLEEF) && LLVM_VERSION_MAJOR >= 16 + + const auto &tf = detail::get_target_features(); + + auto ir = s.get_ir(); + + using string_find_iterator = boost::find_iterator; + + auto count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@tanf", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + // NOTE: at the moment we have comprehensive coverage of LLVM versions + // in the CI only for x86_64. + if (tf.sse2) { + // NOTE: occurrences of the scalar version: + // - 4 calls in the strided cfunc, + // - 1 declaration. + REQUIRE(count == 5u); + } + + if (tf.aarch64) { + REQUIRE(count == 5u); + } + + // NOTE: currently no auto-vectorization happens on ppc64 due apparently + // to the way the target machine is being set up by orc/lljit (it works + // fine with the opt tool). When this is resolved, we can test ppc64 too. + + // if (tf.vsx) { + // REQUIRE(count == 5u); + // } + +#endif +} diff --git a/test/tanh.cpp b/test/tanh.cpp index 52c9ca51b..43b6f6b2b 100644 --- a/test/tanh.cpp +++ b/test/tanh.cpp @@ -49,7 +49,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(s, "cfunc", {tanh(a), tanh(b), tanh(c), tanh(d)}); + + s.compile(); + + auto *cf_ptr + = reinterpret_cast(s.jit_lookup("cfunc")); + + const std::vector ins{1., 2., 3., 4.}; + std::vector outs(4u, 0.); + + cf_ptr(outs.data(), ins.data(), nullptr, nullptr); + + REQUIRE(outs[0] == approximately(std::tanh(1.f))); + REQUIRE(outs[1] == approximately(std::tanh(2.f))); + REQUIRE(outs[2] == approximately(std::tanh(3.f))); + REQUIRE(outs[3] == approximately(std::tanh(4.f))); + + // NOTE: autovec with external scalar functions seems to work + // only since LLVM 16. +#if defined(HEYOKA_WITH_SLEEF) && LLVM_VERSION_MAJOR >= 16 + + const auto &tf = detail::get_target_features(); + + auto ir = s.get_ir(); + + using string_find_iterator = boost::find_iterator; + + auto count = 0u; + for (auto it = boost::make_find_iterator(ir, boost::first_finder("@tanhf", boost::is_iequal())); + it != string_find_iterator(); ++it) { + ++count; + } + + // NOTE: at the moment we have comprehensive coverage of LLVM versions + // in the CI only for x86_64. + if (tf.sse2) { + // NOTE: occurrences of the scalar version: + // - 4 calls in the strided cfunc, + // - 1 declaration. + REQUIRE(count == 5u); + } + + if (tf.aarch64) { + REQUIRE(count == 5u); + } + + // NOTE: currently no auto-vectorization happens on ppc64 due apparently + // to the way the target machine is being set up by orc/lljit (it works + // fine with the opt tool). When this is resolved, we can test ppc64 too. + + // if (tf.vsx) { + // REQUIRE(count == 5u); + // } + +#endif +} diff --git a/test/taylor_acos.cpp b/test/taylor_acos.cpp index e1c25fb60..05905e285 100644 --- a/test/taylor_acos.cpp +++ b/test/taylor_acos.cpp @@ -40,7 +40,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple jet{fp_t{2}, fp_t{-4}, fp_t{3}, fp_t{5}}; jet.resize(8); - std::vector pars{fp_t{.5}, fp_t{.5}, fp_t{.3}, fp_t{.3}}; + std::vector pars{fp_t{.5}, fp_t{.5}, fp_t(.3), fp_t(.3)}; jptr(jet.data(), pars.data(), nullptr); @@ -247,8 +247,8 @@ TEST_CASE("taylor acos") REQUIRE(jet[2] == 3); REQUIRE(jet[3] == 5); - REQUIRE(jet[4] == approximately(acos(fp_t{.3}))); - REQUIRE(jet[5] == approximately(acos(fp_t{.3}))); + REQUIRE(jet[4] == approximately(acos(fp_t(.3)))); + REQUIRE(jet[5] == approximately(acos(fp_t(.3)))); REQUIRE(jet[6] == approximately(jet[0] + jet[2])); REQUIRE(jet[7] == approximately(jet[1] + jet[3])); @@ -424,13 +424,13 @@ TEST_CASE("taylor acos") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(4); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .3); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(acos(jet[1]))); REQUIRE(jet[3] == approximately(acos(jet[0]))); } @@ -444,16 +444,16 @@ TEST_CASE("taylor acos") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{-.1}, fp_t{.3}, fp_t{-.4}}; + std::vector jet{fp_t(.2), fp_t(-.1), fp_t(.3), fp_t(-.4)}; jet.resize(8); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == -.1); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(-.1)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == -.4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(-.4)); REQUIRE(jet[4] == approximately(acos(jet[2]))); REQUIRE(jet[5] == approximately(acos(jet[3]))); @@ -471,13 +471,13 @@ TEST_CASE("taylor acos") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(6); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .3); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(acos(jet[1]))); REQUIRE(jet[3] == approximately(acos(jet[0]))); REQUIRE(jet[4] == approximately(fp_t{1} / 2 * (-1 / sqrt(1 - jet[1] * jet[1]) * jet[3]))); @@ -493,16 +493,16 @@ TEST_CASE("taylor acos") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{-.1}, fp_t{.3}, fp_t{-.4}}; + std::vector jet{fp_t(.2), fp_t(-.1), fp_t(.3), fp_t(-.4)}; jet.resize(12); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == -.1); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(-.1)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == -.4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(-.4)); REQUIRE(jet[4] == approximately(acos(jet[2]))); REQUIRE(jet[5] == approximately(acos(jet[3]))); @@ -526,18 +526,18 @@ TEST_CASE("taylor acos") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{-.1}, fp_t{-.5}, fp_t{.3}, fp_t{-.4}, fp_t{.6}}; + std::vector jet{fp_t(.2), fp_t(-.1), fp_t{-.5}, fp_t(.3), fp_t(-.4), fp_t(.6)}; jet.resize(24); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == -.1); - REQUIRE(jet[2] == -.5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(-.1)); + REQUIRE(jet[2] == fp_t(-.5)); - REQUIRE(jet[3] == .3); - REQUIRE(jet[4] == -.4); - REQUIRE(jet[5] == .6); + REQUIRE(jet[3] == fp_t(.3)); + REQUIRE(jet[4] == fp_t(-.4)); + REQUIRE(jet[5] == fp_t(.6)); REQUIRE(jet[6] == approximately(acos(jet[3]))); REQUIRE(jet[7] == approximately(acos(jet[4]))); diff --git a/test/taylor_acosh.cpp b/test/taylor_acosh.cpp index 1fc5fcf63..12e3dbb04 100644 --- a/test/taylor_acosh.cpp +++ b/test/taylor_acosh.cpp @@ -40,7 +40,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple jet{fp_t{2}, fp_t{-4}, fp_t{3}, fp_t{5}}; jet.resize(8); - std::vector pars{fp_t{1.5}, fp_t{1.5}, fp_t{1.3}, fp_t{1.3}}; + std::vector pars{fp_t{1.5}, fp_t{1.5}, fp_t(1.3), fp_t(1.3)}; jptr(jet.data(), pars.data(), nullptr); @@ -277,8 +277,8 @@ TEST_CASE("taylor acosh") REQUIRE(jet[2] == 3); REQUIRE(jet[3] == 5); - REQUIRE(jet[4] == approximately(acosh(fp_t{1.3}))); - REQUIRE(jet[5] == approximately(acosh(fp_t{1.3}))); + REQUIRE(jet[4] == approximately(acosh(fp_t(1.3)))); + REQUIRE(jet[5] == approximately(acosh(fp_t(1.3)))); REQUIRE(jet[6] == approximately(jet[0] + jet[2])); REQUIRE(jet[7] == approximately(jet[1] + jet[3])); @@ -454,13 +454,13 @@ TEST_CASE("taylor acosh") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{1.2}, fp_t{1.3}}; + std::vector jet{fp_t(1.2), fp_t(1.3)}; jet.resize(4); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == 1.2); - REQUIRE(jet[1] == 1.3); + REQUIRE(jet[0] == fp_t(1.2)); + REQUIRE(jet[1] == fp_t(1.3)); REQUIRE(jet[2] == approximately(acosh(jet[1]))); REQUIRE(jet[3] == approximately(acosh(jet[0]))); } @@ -474,16 +474,16 @@ TEST_CASE("taylor acosh") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{1.2}, fp_t{1.1}, fp_t{1.3}, fp_t{1.4}}; + std::vector jet{fp_t(1.2), fp_t(1.1), fp_t(1.3), fp_t(1.4)}; jet.resize(8); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == 1.2); - REQUIRE(jet[1] == 1.1); + REQUIRE(jet[0] == fp_t(1.2)); + REQUIRE(jet[1] == fp_t(1.1)); - REQUIRE(jet[2] == 1.3); - REQUIRE(jet[3] == 1.4); + REQUIRE(jet[2] == fp_t(1.3)); + REQUIRE(jet[3] == fp_t(1.4)); REQUIRE(jet[4] == approximately(acosh(jet[2]))); REQUIRE(jet[5] == approximately(acosh(jet[3]))); @@ -501,13 +501,13 @@ TEST_CASE("taylor acosh") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{1.2}, fp_t{1.3}}; + std::vector jet{fp_t(1.2), fp_t(1.3)}; jet.resize(6); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == 1.2); - REQUIRE(jet[1] == 1.3); + REQUIRE(jet[0] == fp_t(1.2)); + REQUIRE(jet[1] == fp_t(1.3)); REQUIRE(jet[2] == approximately(acosh(jet[1]))); REQUIRE(jet[3] == approximately(acosh(jet[0]))); REQUIRE(jet[4] == approximately(fp_t{1} / 2 * (1 / sqrt(-1 + jet[1] * jet[1]) * jet[3]))); @@ -523,16 +523,16 @@ TEST_CASE("taylor acosh") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{1.2}, fp_t{1.1}, fp_t{1.3}, fp_t{1.4}}; + std::vector jet{fp_t(1.2), fp_t(1.1), fp_t(1.3), fp_t(1.4)}; jet.resize(12); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == 1.2); - REQUIRE(jet[1] == 1.1); + REQUIRE(jet[0] == fp_t(1.2)); + REQUIRE(jet[1] == fp_t(1.1)); - REQUIRE(jet[2] == 1.3); - REQUIRE(jet[3] == 1.4); + REQUIRE(jet[2] == fp_t(1.3)); + REQUIRE(jet[3] == fp_t(1.4)); REQUIRE(jet[4] == approximately(acosh(jet[2]))); REQUIRE(jet[5] == approximately(acosh(jet[3]))); @@ -556,18 +556,18 @@ TEST_CASE("taylor acosh") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{1.2}, fp_t{1.1}, fp_t{1.5}, fp_t{1.3}, fp_t{1.4}, fp_t{1.6}}; + std::vector jet{fp_t(1.2), fp_t(1.1), fp_t{1.5}, fp_t(1.3), fp_t(1.4), fp_t(1.6)}; jet.resize(24); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == 1.2); - REQUIRE(jet[1] == 1.1); - REQUIRE(jet[2] == 1.5); + REQUIRE(jet[0] == fp_t(1.2)); + REQUIRE(jet[1] == fp_t(1.1)); + REQUIRE(jet[2] == fp_t(1.5)); - REQUIRE(jet[3] == 1.3); - REQUIRE(jet[4] == 1.4); - REQUIRE(jet[5] == 1.6); + REQUIRE(jet[3] == fp_t(1.3)); + REQUIRE(jet[4] == fp_t(1.4)); + REQUIRE(jet[5] == fp_t(1.6)); REQUIRE(jet[6] == approximately(acosh(jet[3]))); REQUIRE(jet[7] == approximately(acosh(jet[4]))); diff --git a/test/taylor_adaptive.cpp b/test/taylor_adaptive.cpp index fcf151f5f..70ff84201 100644 --- a/test/taylor_adaptive.cpp +++ b/test/taylor_adaptive.cpp @@ -72,7 +72,7 @@ auto &horner_eval(Out &ret, const P &p, int order, const T &eval) return ret; } -const auto fp_types = std::tuple::t_event_t; auto ta = taylor_adaptive{ - {prime(x) = v, prime(v) = -9.8 * sin(x)}, {-0.0001, 0.025}, kw::t_events = {ev_t(x)}}; + {prime(x) = v, prime(v) = -9.8 * sin(x)}, {fp_t(-0.0001), fp_t(0.025)}, kw::t_events = {ev_t(x)}}; int n_invoked = 0; auto pcb = [&n_invoked](auto &) { diff --git a/test/taylor_adaptive_batch.cpp b/test/taylor_adaptive_batch.cpp index 1af4bf3b4..94233b0a8 100644 --- a/test/taylor_adaptive_batch.cpp +++ b/test/taylor_adaptive_batch.cpp @@ -55,7 +55,7 @@ using namespace heyoka; namespace hy = heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple{{prime(x) = v - par[1], prime(v) = -9.8 * sin(x + par[0])}, - {0., 0.01, 0.5, 0.51}, + {fp_t(0.), fp_t(0.01), fp_t(0.5), fp_t(0.51)}, 2u, - kw::pars = std::vector{-1e-4, -1.1e-4}}; + kw::pars = std::vector{fp_t(-1e-4), fp_t(-1.1e-4)}}; std::ostringstream oss; @@ -1188,7 +1188,7 @@ TEST_CASE("stream output") { auto tad = taylor_adaptive_batch{{prime(x) = v - par[1], prime(v) = -9.8 * sin(x + par[0])}, - {0., 0.01, 0.5, 0.51}, + {fp_t(0.), fp_t(0.01), fp_t(0.5), fp_t(0.51)}, 2u, kw::t_events = {t_ev_t(x)}}; @@ -1205,7 +1205,7 @@ TEST_CASE("stream output") { auto tad = taylor_adaptive_batch{{prime(x) = v - par[1], prime(v) = -9.8 * sin(x + par[0])}, - {0., 0.01, 0.5, 0.51}, + {fp_t(0.), fp_t(0.01), fp_t(0.5), fp_t(0.51)}, 2u, kw::nt_events = {nt_ev_t(x, [](auto &, fp_t, int, std::uint32_t) {})}}; @@ -1222,7 +1222,7 @@ TEST_CASE("stream output") { auto tad = taylor_adaptive_batch{{prime(x) = v - par[1], prime(v) = -9.8 * sin(x + par[0])}, - {0., 0.01, 0.5, 0.51}, + {fp_t(0.), fp_t(0.01), fp_t(0.5), fp_t(0.51)}, 2u, kw::t_events = {t_ev_t(x)}, kw::nt_events = {nt_ev_t(x, [](auto &, fp_t, int, std::uint32_t) {})}}; @@ -1816,10 +1816,11 @@ TEST_CASE("callback ste") auto [x, v] = make_vars("x", "v"); using ev_t = typename taylor_adaptive_batch::t_event_t; - auto ta = taylor_adaptive_batch{{prime(x) = v, prime(v) = -9.8 * sin(x)}, - {-1, -0.0001, -1, -1, 0.025, 0.026, 0.027, 0.028}, - 4, - kw::t_events = {ev_t(x)}}; + auto ta = taylor_adaptive_batch{ + {prime(x) = v, prime(v) = -9.8 * sin(x)}, + {fp_t(-1), fp_t(-0.0001), fp_t(-1), fp_t(-1), fp_t(0.025), fp_t(0.026), fp_t(0.027), fp_t(0.028)}, + 4, + kw::t_events = {ev_t(x)}}; int n_invoked = 0; auto pcb = [&n_invoked](auto &) { diff --git a/test/taylor_asin.cpp b/test/taylor_asin.cpp index 62dd2ab08..f538cca97 100644 --- a/test/taylor_asin.cpp +++ b/test/taylor_asin.cpp @@ -40,7 +40,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple jet{fp_t{2}, fp_t{-4}, fp_t{3}, fp_t{5}}; jet.resize(8); - std::vector pars{fp_t{.5}, fp_t{.5}, fp_t{.3}, fp_t{.3}}; + std::vector pars{fp_t{.5}, fp_t{.5}, fp_t(.3), fp_t(.3)}; jptr(jet.data(), pars.data(), nullptr); @@ -245,8 +245,8 @@ TEST_CASE("taylor asin") REQUIRE(jet[2] == 3); REQUIRE(jet[3] == 5); - REQUIRE(jet[4] == approximately(asin(fp_t{.3}))); - REQUIRE(jet[5] == approximately(asin(fp_t{.3}))); + REQUIRE(jet[4] == approximately(asin(fp_t(.3)))); + REQUIRE(jet[5] == approximately(asin(fp_t(.3)))); REQUIRE(jet[6] == approximately(jet[0] + jet[2])); REQUIRE(jet[7] == approximately(jet[1] + jet[3])); @@ -422,13 +422,13 @@ TEST_CASE("taylor asin") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(4); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .3); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(asin(jet[1]))); REQUIRE(jet[3] == approximately(asin(jet[0]))); } @@ -442,16 +442,16 @@ TEST_CASE("taylor asin") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{-.1}, fp_t{.3}, fp_t{-.4}}; + std::vector jet{fp_t(.2), fp_t(-.1), fp_t(.3), fp_t(-.4)}; jet.resize(8); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == -.1); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(-.1)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == -.4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(-.4)); REQUIRE(jet[4] == approximately(asin(jet[2]))); REQUIRE(jet[5] == approximately(asin(jet[3]))); @@ -469,13 +469,13 @@ TEST_CASE("taylor asin") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(6); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .3); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(asin(jet[1]))); REQUIRE(jet[3] == approximately(asin(jet[0]))); REQUIRE(jet[4] == approximately(fp_t{1} / 2 * (1 / sqrt(1 - jet[1] * jet[1]) * jet[3]))); @@ -491,16 +491,16 @@ TEST_CASE("taylor asin") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{-.1}, fp_t{.3}, fp_t{-.4}}; + std::vector jet{fp_t(.2), fp_t(-.1), fp_t(.3), fp_t(-.4)}; jet.resize(12); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == -.1); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(-.1)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == -.4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(-.4)); REQUIRE(jet[4] == approximately(asin(jet[2]))); REQUIRE(jet[5] == approximately(asin(jet[3]))); @@ -524,18 +524,18 @@ TEST_CASE("taylor asin") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{-.1}, fp_t{-.5}, fp_t{.3}, fp_t{-.4}, fp_t{.6}}; + std::vector jet{fp_t(.2), fp_t(-.1), fp_t{-.5}, fp_t(.3), fp_t(-.4), fp_t(.6)}; jet.resize(24); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == -.1); - REQUIRE(jet[2] == -.5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(-.1)); + REQUIRE(jet[2] == fp_t(-.5)); - REQUIRE(jet[3] == .3); - REQUIRE(jet[4] == -.4); - REQUIRE(jet[5] == .6); + REQUIRE(jet[3] == fp_t(.3)); + REQUIRE(jet[4] == fp_t(-.4)); + REQUIRE(jet[5] == fp_t(.6)); REQUIRE(jet[6] == approximately(asin(jet[3]))); REQUIRE(jet[7] == approximately(asin(jet[4]))); diff --git a/test/taylor_asinh.cpp b/test/taylor_asinh.cpp index dbff36720..18b6b2bd0 100644 --- a/test/taylor_asinh.cpp +++ b/test/taylor_asinh.cpp @@ -40,7 +40,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple jet{fp_t{2}, fp_t{-4}, fp_t{3}, fp_t{5}}; jet.resize(8); - std::vector pars{fp_t{.5}, fp_t{.5}, fp_t{.3}, fp_t{.3}}; + std::vector pars{fp_t{.5}, fp_t{.5}, fp_t(.3), fp_t(.3)}; jptr(jet.data(), pars.data(), nullptr); @@ -275,8 +275,8 @@ TEST_CASE("taylor asinh") REQUIRE(jet[2] == 3); REQUIRE(jet[3] == 5); - REQUIRE(jet[4] == approximately(asinh(fp_t{.3}))); - REQUIRE(jet[5] == approximately(asinh(fp_t{.3}))); + REQUIRE(jet[4] == approximately(asinh(fp_t(.3)))); + REQUIRE(jet[5] == approximately(asinh(fp_t(.3)))); REQUIRE(jet[6] == approximately(jet[0] + jet[2])); REQUIRE(jet[7] == approximately(jet[1] + jet[3])); @@ -452,13 +452,13 @@ TEST_CASE("taylor asinh") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(4); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .3); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(asinh(jet[1]))); REQUIRE(jet[3] == approximately(asinh(jet[0]))); } @@ -472,16 +472,16 @@ TEST_CASE("taylor asinh") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{-.1}, fp_t{.3}, fp_t{-.4}}; + std::vector jet{fp_t(.2), fp_t(-.1), fp_t(.3), fp_t(-.4)}; jet.resize(8); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == -.1); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(-.1)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == -.4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(-.4)); REQUIRE(jet[4] == approximately(asinh(jet[2]))); REQUIRE(jet[5] == approximately(asinh(jet[3]))); @@ -499,13 +499,13 @@ TEST_CASE("taylor asinh") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(6); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .3); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(asinh(jet[1]))); REQUIRE(jet[3] == approximately(asinh(jet[0]))); REQUIRE(jet[4] == approximately(fp_t{1} / 2 * (1 / sqrt(1 + jet[1] * jet[1]) * jet[3]))); @@ -521,16 +521,16 @@ TEST_CASE("taylor asinh") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{-.1}, fp_t{.3}, fp_t{-.4}}; + std::vector jet{fp_t(.2), fp_t(-.1), fp_t(.3), fp_t(-.4)}; jet.resize(12); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == -.1); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(-.1)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == -.4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(-.4)); REQUIRE(jet[4] == approximately(asinh(jet[2]))); REQUIRE(jet[5] == approximately(asinh(jet[3]))); @@ -554,18 +554,18 @@ TEST_CASE("taylor asinh") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{-.1}, fp_t{-.5}, fp_t{.3}, fp_t{-.4}, fp_t{.6}}; + std::vector jet{fp_t(.2), fp_t(-.1), fp_t{-.5}, fp_t(.3), fp_t(-.4), fp_t(.6)}; jet.resize(24); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == -.1); - REQUIRE(jet[2] == -.5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(-.1)); + REQUIRE(jet[2] == fp_t(-.5)); - REQUIRE(jet[3] == .3); - REQUIRE(jet[4] == -.4); - REQUIRE(jet[5] == .6); + REQUIRE(jet[3] == fp_t(.3)); + REQUIRE(jet[4] == fp_t(-.4)); + REQUIRE(jet[5] == fp_t(.6)); REQUIRE(jet[6] == approximately(asinh(jet[3]))); REQUIRE(jet[7] == approximately(asinh(jet[4]))); diff --git a/test/taylor_atan.cpp b/test/taylor_atan.cpp index 013d9d577..39597b03d 100644 --- a/test/taylor_atan.cpp +++ b/test/taylor_atan.cpp @@ -39,7 +39,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple jet{fp_t{2}, fp_t{-4}, fp_t{3}, fp_t{5}}; jet.resize(8); - std::vector pars{fp_t{.5}, fp_t{.5}, fp_t{.3}, fp_t{.3}}; + std::vector pars{fp_t{.5}, fp_t{.5}, fp_t(.3), fp_t(.3)}; jptr(jet.data(), pars.data(), nullptr); @@ -242,8 +242,8 @@ TEST_CASE("taylor atan") REQUIRE(jet[2] == 3); REQUIRE(jet[3] == 5); - REQUIRE(jet[4] == approximately(atan(fp_t{.3}))); - REQUIRE(jet[5] == approximately(atan(fp_t{.3}))); + REQUIRE(jet[4] == approximately(atan(fp_t(.3)))); + REQUIRE(jet[5] == approximately(atan(fp_t(.3)))); REQUIRE(jet[6] == approximately(jet[0] + jet[2])); REQUIRE(jet[7] == approximately(jet[1] + jet[3])); @@ -419,13 +419,13 @@ TEST_CASE("taylor atan") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(4); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .3); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(atan(jet[1]))); REQUIRE(jet[3] == approximately(atan(jet[0]))); } @@ -439,16 +439,16 @@ TEST_CASE("taylor atan") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{-.1}, fp_t{.3}, fp_t{-.4}}; + std::vector jet{fp_t(.2), fp_t(-.1), fp_t(.3), fp_t(-.4)}; jet.resize(8); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == -.1); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(-.1)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == -.4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(-.4)); REQUIRE(jet[4] == approximately(atan(jet[2]))); REQUIRE(jet[5] == approximately(atan(jet[3]))); @@ -466,13 +466,13 @@ TEST_CASE("taylor atan") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(6); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .3); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(atan(jet[1]))); REQUIRE(jet[3] == approximately(atan(jet[0]))); REQUIRE(jet[4] == approximately(fp_t{1} / 2 * (1 / (1 + jet[1] * jet[1]) * jet[3]))); @@ -488,16 +488,16 @@ TEST_CASE("taylor atan") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{-.1}, fp_t{.3}, fp_t{-.4}}; + std::vector jet{fp_t(.2), fp_t(-.1), fp_t(.3), fp_t(-.4)}; jet.resize(12); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == -.1); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(-.1)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == -.4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(-.4)); REQUIRE(jet[4] == approximately(atan(jet[2]))); REQUIRE(jet[5] == approximately(atan(jet[3]))); @@ -521,18 +521,18 @@ TEST_CASE("taylor atan") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{-.1}, fp_t{-.5}, fp_t{.3}, fp_t{-.4}, fp_t{.6}}; + std::vector jet{fp_t(.2), fp_t(-.1), fp_t{-.5}, fp_t(.3), fp_t(-.4), fp_t(.6)}; jet.resize(24); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == -.1); - REQUIRE(jet[2] == -.5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(-.1)); + REQUIRE(jet[2] == fp_t(-.5)); - REQUIRE(jet[3] == .3); - REQUIRE(jet[4] == -.4); - REQUIRE(jet[5] == .6); + REQUIRE(jet[3] == fp_t(.3)); + REQUIRE(jet[4] == fp_t(-.4)); + REQUIRE(jet[5] == fp_t(.6)); REQUIRE(jet[6] == approximately(atan(jet[3]))); REQUIRE(jet[7] == approximately(atan(jet[4]))); diff --git a/test/taylor_atan2.cpp b/test/taylor_atan2.cpp index 9e8677912..980107ad3 100644 --- a/test/taylor_atan2.cpp +++ b/test/taylor_atan2.cpp @@ -41,7 +41,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(4); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == fp_t{.2}); - REQUIRE(jet[1] == fp_t{.3}); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(atan2(jet[1], a))); REQUIRE(jet[3] == approximately(atan2(jet[0], b))); } @@ -406,15 +406,15 @@ TEST_CASE("taylor atan2") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(4); std::vector pars{a}; jptr(jet.data(), pars.data(), nullptr); - REQUIRE(jet[0] == fp_t{.2}); - REQUIRE(jet[1] == fp_t{.3}); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(atan2(jet[1], a))); REQUIRE(jet[3] == approximately(atan2(jet[0], b))); } @@ -429,16 +429,16 @@ TEST_CASE("taylor atan2") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.3}, fp_t{.4}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.3), fp_t(.4)}; jet.resize(8); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == .4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(.4)); REQUIRE(jet[4] == approximately(atan2(jet[2], b))); REQUIRE(jet[5] == approximately(atan2(jet[3], b))); @@ -457,18 +457,18 @@ TEST_CASE("taylor atan2") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.3}, fp_t{.4}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.3), fp_t(.4)}; jet.resize(8); std::vector pars{fp_t{0}, fp_t{0}, b, b}; jptr(jet.data(), pars.data(), nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == .4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(.4)); REQUIRE(jet[4] == approximately(atan2(jet[2], b))); REQUIRE(jet[5] == approximately(atan2(jet[3], b))); @@ -487,13 +487,13 @@ TEST_CASE("taylor atan2") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(6); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .3); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(atan2(jet[1], b))); REQUIRE(jet[3] == approximately(atan2(jet[0], b))); REQUIRE(jet[4] == approximately(fp_t{1} / 2 * b * jet[3] / (jet[1] * jet[1] + b * b))); @@ -510,16 +510,16 @@ TEST_CASE("taylor atan2") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.3}, fp_t{.4}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.3), fp_t(.4)}; jet.resize(12); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == .4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(.4)); REQUIRE(jet[4] == approximately(atan2(jet[2], b))); REQUIRE(jet[5] == approximately(atan2(jet[3], b))); @@ -544,18 +544,18 @@ TEST_CASE("taylor atan2") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.1}, fp_t{.3}, fp_t{.4}, fp_t{.6}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.1), fp_t(.3), fp_t(.4), fp_t(.6)}; jet.resize(24); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); - REQUIRE(jet[2] == .1); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); + REQUIRE(jet[2] == fp_t(.1)); - REQUIRE(jet[3] == .3); - REQUIRE(jet[4] == .4); - REQUIRE(jet[5] == .6); + REQUIRE(jet[3] == fp_t(.3)); + REQUIRE(jet[4] == fp_t(.4)); + REQUIRE(jet[5] == fp_t(.6)); REQUIRE(jet[6] == approximately(atan2(jet[3], b))); REQUIRE(jet[7] == approximately(atan2(jet[4], b))); @@ -621,13 +621,13 @@ TEST_CASE("taylor atan2") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(4); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == fp_t{.2}); - REQUIRE(jet[1] == fp_t{.3}); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(atan2(a, jet[1]))); REQUIRE(jet[3] == approximately(atan2(c, jet[0]))); } @@ -642,15 +642,15 @@ TEST_CASE("taylor atan2") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(4); std::vector pars{a}; jptr(jet.data(), pars.data(), nullptr); - REQUIRE(jet[0] == fp_t{.2}); - REQUIRE(jet[1] == fp_t{.3}); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(atan2(a, jet[1]))); REQUIRE(jet[3] == approximately(atan2(c, jet[0]))); } @@ -665,16 +665,16 @@ TEST_CASE("taylor atan2") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.3}, fp_t{.4}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.3), fp_t(.4)}; jet.resize(8); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == .4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(.4)); REQUIRE(jet[4] == approximately(atan2(a, jet[2]))); REQUIRE(jet[5] == approximately(atan2(a, jet[3]))); @@ -693,18 +693,18 @@ TEST_CASE("taylor atan2") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.3}, fp_t{.4}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.3), fp_t(.4)}; jet.resize(8); std::vector pars{fp_t{0}, fp_t{0}, c, c}; jptr(jet.data(), pars.data(), nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == .4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(.4)); REQUIRE(jet[4] == approximately(atan2(a, jet[2]))); REQUIRE(jet[5] == approximately(atan2(a, jet[3]))); @@ -723,13 +723,13 @@ TEST_CASE("taylor atan2") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(6); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .3); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(atan2(a, jet[1]))); REQUIRE(jet[3] == approximately(atan2(c, jet[0]))); REQUIRE(jet[4] == approximately(-fp_t{1} / 2 * a * jet[3] / (jet[1] * jet[1] + a * a))); @@ -746,16 +746,16 @@ TEST_CASE("taylor atan2") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.3}, fp_t{.4}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.3), fp_t(.4)}; jet.resize(12); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == .4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(.4)); REQUIRE(jet[4] == approximately(atan2(a, jet[2]))); REQUIRE(jet[5] == approximately(atan2(a, jet[3]))); @@ -780,18 +780,18 @@ TEST_CASE("taylor atan2") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.1}, fp_t{.3}, fp_t{.4}, fp_t{.6}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.1), fp_t(.3), fp_t(.4), fp_t(.6)}; jet.resize(24); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); - REQUIRE(jet[2] == .1); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); + REQUIRE(jet[2] == fp_t(.1)); - REQUIRE(jet[3] == .3); - REQUIRE(jet[4] == .4); - REQUIRE(jet[5] == .6); + REQUIRE(jet[3] == fp_t(.3)); + REQUIRE(jet[4] == fp_t(.4)); + REQUIRE(jet[5] == fp_t(.6)); REQUIRE(jet[6] == approximately(atan2(a, jet[3]))); REQUIRE(jet[7] == approximately(atan2(a, jet[4]))); @@ -858,13 +858,13 @@ TEST_CASE("taylor atan2") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(4); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == fp_t{.2}); - REQUIRE(jet[1] == fp_t{.3}); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(atan2(jet[0], jet[1]))); REQUIRE(jet[3] == approximately(atan2(jet[1], jet[0]))); } @@ -878,16 +878,16 @@ TEST_CASE("taylor atan2") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.3}, fp_t{.4}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.3), fp_t(.4)}; jet.resize(8); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == .4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(.4)); REQUIRE(jet[4] == approximately(atan2(jet[0], jet[2]))); REQUIRE(jet[5] == approximately(atan2(jet[1], jet[3]))); @@ -905,13 +905,13 @@ TEST_CASE("taylor atan2") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(6); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .3); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(atan2(jet[0], jet[1]))); REQUIRE(jet[3] == approximately(atan2(jet[1], jet[0]))); REQUIRE(jet[4] @@ -931,16 +931,16 @@ TEST_CASE("taylor atan2") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.3}, fp_t{.4}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.3), fp_t(.4)}; jet.resize(12); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == .4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(.4)); REQUIRE(jet[4] == approximately(atan2(jet[0], jet[2]))); REQUIRE(jet[5] == approximately(atan2(jet[1], jet[3]))); @@ -972,18 +972,18 @@ TEST_CASE("taylor atan2") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.1}, fp_t{.3}, fp_t{.4}, fp_t{.6}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.1), fp_t(.3), fp_t(.4), fp_t(.6)}; jet.resize(24); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); - REQUIRE(jet[2] == .1); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); + REQUIRE(jet[2] == fp_t(.1)); - REQUIRE(jet[3] == .3); - REQUIRE(jet[4] == .4); - REQUIRE(jet[5] == .6); + REQUIRE(jet[3] == fp_t(.3)); + REQUIRE(jet[4] == fp_t(.4)); + REQUIRE(jet[5] == fp_t(.6)); REQUIRE(jet[6] == approximately(atan2(jet[0], jet[3]))); REQUIRE(jet[7] == approximately(atan2(jet[1], jet[4]))); diff --git a/test/taylor_atanh.cpp b/test/taylor_atanh.cpp index d407b4276..405e6747d 100644 --- a/test/taylor_atanh.cpp +++ b/test/taylor_atanh.cpp @@ -39,7 +39,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple jet{fp_t{2}, fp_t{-4}, fp_t{3}, fp_t{5}}; jet.resize(8); - std::vector pars{fp_t{.5}, fp_t{.5}, fp_t{.3}, fp_t{.3}}; + std::vector pars{fp_t{.5}, fp_t{.5}, fp_t(.3), fp_t(.3)}; jptr(jet.data(), pars.data(), nullptr); @@ -271,8 +271,8 @@ TEST_CASE("taylor atanh") REQUIRE(jet[2] == 3); REQUIRE(jet[3] == 5); - REQUIRE(jet[4] == approximately(atanh(fp_t{.3}))); - REQUIRE(jet[5] == approximately(atanh(fp_t{.3}))); + REQUIRE(jet[4] == approximately(atanh(fp_t(.3)))); + REQUIRE(jet[5] == approximately(atanh(fp_t(.3)))); REQUIRE(jet[6] == approximately(jet[0] + jet[2])); REQUIRE(jet[7] == approximately(jet[1] + jet[3])); @@ -448,13 +448,13 @@ TEST_CASE("taylor atanh") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(4); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .3); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(atanh(jet[1]))); REQUIRE(jet[3] == approximately(atanh(jet[0]))); } @@ -468,16 +468,16 @@ TEST_CASE("taylor atanh") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{-.1}, fp_t{.3}, fp_t{-.4}}; + std::vector jet{fp_t(.2), fp_t(-.1), fp_t(.3), fp_t(-.4)}; jet.resize(8); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == -.1); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(-.1)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == -.4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(-.4)); REQUIRE(jet[4] == approximately(atanh(jet[2]))); REQUIRE(jet[5] == approximately(atanh(jet[3]))); @@ -495,13 +495,13 @@ TEST_CASE("taylor atanh") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(6); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .3); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(atanh(jet[1]))); REQUIRE(jet[3] == approximately(atanh(jet[0]))); REQUIRE(jet[4] == approximately(fp_t{1} / 2 * (1 / (1 - jet[1] * jet[1]) * jet[3]))); @@ -517,16 +517,16 @@ TEST_CASE("taylor atanh") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{-.1}, fp_t{.3}, fp_t{-.4}}; + std::vector jet{fp_t(.2), fp_t(-.1), fp_t(.3), fp_t(-.4)}; jet.resize(12); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == -.1); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(-.1)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == -.4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(-.4)); REQUIRE(jet[4] == approximately(atanh(jet[2]))); REQUIRE(jet[5] == approximately(atanh(jet[3]))); @@ -550,18 +550,18 @@ TEST_CASE("taylor atanh") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{-.1}, fp_t{-.5}, fp_t{.3}, fp_t{-.4}, fp_t{.6}}; + std::vector jet{fp_t(.2), fp_t(-.1), fp_t{-.5}, fp_t(.3), fp_t(-.4), fp_t(.6)}; jet.resize(24); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == -.1); - REQUIRE(jet[2] == -.5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(-.1)); + REQUIRE(jet[2] == fp_t(-.5)); - REQUIRE(jet[3] == .3); - REQUIRE(jet[4] == -.4); - REQUIRE(jet[5] == .6); + REQUIRE(jet[3] == fp_t(.3)); + REQUIRE(jet[4] == fp_t(-.4)); + REQUIRE(jet[5] == fp_t(.6)); REQUIRE(jet[6] == approximately(atanh(jet[3]))); REQUIRE(jet[7] == approximately(atanh(jet[4]))); diff --git a/test/taylor_const_sys.cpp b/test/taylor_const_sys.cpp index e89e65392..6a44a658d 100644 --- a/test/taylor_const_sys.cpp +++ b/test/taylor_const_sys.cpp @@ -37,7 +37,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple || !skip_batch_ld) { @@ -547,11 +547,11 @@ TEST_CASE("taylor erf") REQUIRE(jet[6] == approximately(erf(jet[0]))); REQUIRE(jet[7] == approximately(erf(jet[1]))); - REQUIRE(jet[8] == approximately(fp_t{1} / 2 * ((2. / sqrt(pi) * exp(-jet[2] * jet[2])) * jet[6]))); - REQUIRE(jet[9] == approximately(fp_t{1} / 2 * ((2. / sqrt(pi) * exp(-jet[3] * jet[3])) * jet[7]))); + REQUIRE(jet[8] == approximately(fp_t{1} / 2 * ((2 / sqrt(pi) * exp(-jet[2] * jet[2])) * jet[6]))); + REQUIRE(jet[9] == approximately(fp_t{1} / 2 * ((2 / sqrt(pi) * exp(-jet[3] * jet[3])) * jet[7]))); - REQUIRE(jet[10] == approximately(fp_t{1} / 2 * ((2. / sqrt(pi) * exp(-jet[0] * jet[0])) * jet[4]))); - REQUIRE(jet[11] == approximately(fp_t{1} / 2 * ((2. / sqrt(pi) * exp(-jet[1] * jet[1])) * jet[5]))); + REQUIRE(jet[10] == approximately(fp_t{1} / 2 * ((2 / sqrt(pi) * exp(-jet[0] * jet[0])) * jet[4]))); + REQUIRE(jet[11] == approximately(fp_t{1} / 2 * ((2 / sqrt(pi) * exp(-jet[1] * jet[1])) * jet[5]))); } if constexpr (!std::is_same_v || !skip_batch_ld) { @@ -584,39 +584,39 @@ TEST_CASE("taylor erf") REQUIRE(jet[10] == approximately(erf(jet[1]))); REQUIRE(jet[11] == approximately(erf(jet[2]))); - REQUIRE(jet[12] == approximately(fp_t{1} / 2 * (2. / sqrt(pi) * exp(-jet[3] * jet[3]) * jet[9]))); - REQUIRE(jet[13] == approximately(fp_t{1} / 2 * (2. / sqrt(pi) * exp(-jet[4] * jet[4]) * jet[10]))); - REQUIRE(jet[14] == approximately(fp_t{1} / 2 * (2. / sqrt(pi) * exp(-jet[5] * jet[5]) * jet[11]))); + REQUIRE(jet[12] == approximately(fp_t{1} / 2 * (2 / sqrt(pi) * exp(-jet[3] * jet[3]) * jet[9]))); + REQUIRE(jet[13] == approximately(fp_t{1} / 2 * (2 / sqrt(pi) * exp(-jet[4] * jet[4]) * jet[10]))); + REQUIRE(jet[14] == approximately(fp_t{1} / 2 * (2 / sqrt(pi) * exp(-jet[5] * jet[5]) * jet[11]))); - REQUIRE(jet[15] == approximately(fp_t{1} / 2 * (2. / sqrt(pi) * exp(-jet[0] * jet[0]) * jet[6]))); - REQUIRE(jet[16] == approximately(fp_t{1} / 2 * (2. / sqrt(pi) * exp(-jet[1] * jet[1]) * jet[7]))); - REQUIRE(jet[17] == approximately(fp_t{1} / 2 * (2. / sqrt(pi) * exp(-jet[2] * jet[2]) * jet[8]))); + REQUIRE(jet[15] == approximately(fp_t{1} / 2 * (2 / sqrt(pi) * exp(-jet[0] * jet[0]) * jet[6]))); + REQUIRE(jet[16] == approximately(fp_t{1} / 2 * (2 / sqrt(pi) * exp(-jet[1] * jet[1]) * jet[7]))); + REQUIRE(jet[17] == approximately(fp_t{1} / 2 * (2 / sqrt(pi) * exp(-jet[2] * jet[2]) * jet[8]))); REQUIRE(jet[18] - == approximately(fp_t{1} / 6 * 2. / sqrt(pi) - * (-2. * exp(-jet[3] * jet[3]) * jet[3] * jet[9] * jet[9] - + exp(-jet[3] * jet[3]) * 2. / sqrt(pi) * exp(-jet[0] * jet[0]) * jet[6]))); + == approximately(fp_t{1} / 6 * 2 / sqrt(pi) + * (-2 * exp(-jet[3] * jet[3]) * jet[3] * jet[9] * jet[9] + + exp(-jet[3] * jet[3]) * 2 / sqrt(pi) * exp(-jet[0] * jet[0]) * jet[6]))); REQUIRE(jet[19] - == approximately(fp_t{1} / 6 * 2. / sqrt(pi) - * (-2. * exp(-jet[4] * jet[4]) * jet[4] * jet[10] * jet[10] - + exp(-jet[4] * jet[4]) * 2. / sqrt(pi) * exp(-jet[1] * jet[1]) * jet[7]))); + == approximately(fp_t{1} / 6 * 2 / sqrt(pi) + * (-2 * exp(-jet[4] * jet[4]) * jet[4] * jet[10] * jet[10] + + exp(-jet[4] * jet[4]) * 2 / sqrt(pi) * exp(-jet[1] * jet[1]) * jet[7]))); REQUIRE(jet[20] - == approximately(fp_t{1} / 6 * 2. / sqrt(pi) - * (-2. * exp(-jet[5] * jet[5]) * jet[5] * jet[11] * jet[11] - + exp(-jet[5] * jet[5]) * 2. / sqrt(pi) * exp(-jet[2] * jet[2]) * jet[8]))); + == approximately(fp_t{1} / 6 * 2 / sqrt(pi) + * (-2 * exp(-jet[5] * jet[5]) * jet[5] * jet[11] * jet[11] + + exp(-jet[5] * jet[5]) * 2 / sqrt(pi) * exp(-jet[2] * jet[2]) * jet[8]))); REQUIRE(jet[21] - == approximately(fp_t{1} / 6 * 2. / sqrt(pi) - * (-2. * exp(-jet[0] * jet[0]) * jet[0] * jet[6] * jet[6] - + exp(-jet[0] * jet[0]) * 2. / sqrt(pi) * exp(-jet[3] * jet[3]) * jet[9]))); + == approximately(fp_t{1} / 6 * 2 / sqrt(pi) + * (-2 * exp(-jet[0] * jet[0]) * jet[0] * jet[6] * jet[6] + + exp(-jet[0] * jet[0]) * 2 / sqrt(pi) * exp(-jet[3] * jet[3]) * jet[9]))); REQUIRE(jet[22] - == approximately(fp_t{1} / 6 * 2. / sqrt(pi) - * (-2. * exp(-jet[1] * jet[1]) * jet[1] * jet[7] * jet[7] - + exp(-jet[1] * jet[1]) * 2. / sqrt(pi) * exp(-jet[4] * jet[4]) * jet[10]))); + == approximately(fp_t{1} / 6 * 2 / sqrt(pi) + * (-2 * exp(-jet[1] * jet[1]) * jet[1] * jet[7] * jet[7] + + exp(-jet[1] * jet[1]) * 2 / sqrt(pi) * exp(-jet[4] * jet[4]) * jet[10]))); REQUIRE(jet[23] - == approximately(fp_t{1} / 6 * 2. / sqrt(pi) - * (-2. * exp(-jet[2] * jet[2]) * jet[2] * jet[8] * jet[8] - + exp(-jet[2] * jet[2]) * 2. / sqrt(pi) * exp(-jet[5] * jet[5]) * jet[11]))); + == approximately(fp_t{1} / 6 * 2 / sqrt(pi) + * (-2 * exp(-jet[2] * jet[2]) * jet[2] * jet[8] * jet[8] + + exp(-jet[2] * jet[2]) * 2 / sqrt(pi) * exp(-jet[5] * jet[5]) * jet[11]))); } if constexpr (!std::is_same_v || !skip_batch_ld) { diff --git a/test/taylor_exp.cpp b/test/taylor_exp.cpp index 8619d7648..3820174e8 100644 --- a/test/taylor_exp.cpp +++ b/test/taylor_exp.cpp @@ -35,7 +35,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(4); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == fp_t{.2}); - REQUIRE(jet[1] == fp_t{.3}); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(bmt_inv_kep_E(jet[1], a))); REQUIRE(jet[3] == approximately(bmt_inv_kep_E(jet[0], b))); } @@ -389,15 +389,15 @@ TEST_CASE("taylor kepE") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(4); std::vector pars{a}; jptr(jet.data(), pars.data(), nullptr); - REQUIRE(jet[0] == fp_t{.2}); - REQUIRE(jet[1] == fp_t{.3}); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(bmt_inv_kep_E(jet[1], a))); REQUIRE(jet[3] == approximately(bmt_inv_kep_E(jet[0], b))); } @@ -412,16 +412,16 @@ TEST_CASE("taylor kepE") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.3}, fp_t{.4}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.3), fp_t(.4)}; jet.resize(8); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == .4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(.4)); REQUIRE(jet[4] == approximately(bmt_inv_kep_E(jet[2], b))); REQUIRE(jet[5] == approximately(bmt_inv_kep_E(jet[3], b))); @@ -440,18 +440,18 @@ TEST_CASE("taylor kepE") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.3}, fp_t{.4}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.3), fp_t(.4)}; jet.resize(8); std::vector pars{fp_t{0}, fp_t{0}, b, b}; jptr(jet.data(), pars.data(), nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == .4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(.4)); REQUIRE(jet[4] == approximately(bmt_inv_kep_E(jet[2], b))); REQUIRE(jet[5] == approximately(bmt_inv_kep_E(jet[3], b))); @@ -470,13 +470,13 @@ TEST_CASE("taylor kepE") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(6); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .3); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(bmt_inv_kep_E(jet[1], b))); REQUIRE(jet[3] == approximately(bmt_inv_kep_E(jet[0], b))); REQUIRE(jet[4] == approximately(fp_t{1} / 2 * jet[3] * sin(jet[2]) / (1 - jet[1] * cos(jet[2])))); @@ -493,16 +493,16 @@ TEST_CASE("taylor kepE") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.3}, fp_t{.4}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.3), fp_t(.4)}; jet.resize(12); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == .4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(.4)); REQUIRE(jet[4] == approximately(bmt_inv_kep_E(jet[2], b))); REQUIRE(jet[5] == approximately(bmt_inv_kep_E(jet[3], b))); @@ -527,18 +527,18 @@ TEST_CASE("taylor kepE") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.1}, fp_t{.3}, fp_t{.4}, fp_t{.6}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.1), fp_t(.3), fp_t(.4), fp_t(.6)}; jet.resize(24); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); - REQUIRE(jet[2] == .1); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); + REQUIRE(jet[2] == fp_t(.1)); - REQUIRE(jet[3] == .3); - REQUIRE(jet[4] == .4); - REQUIRE(jet[5] == .6); + REQUIRE(jet[3] == fp_t(.3)); + REQUIRE(jet[4] == fp_t(.4)); + REQUIRE(jet[5] == fp_t(.6)); REQUIRE(jet[6] == approximately(bmt_inv_kep_E(jet[3], b))); REQUIRE(jet[7] == approximately(bmt_inv_kep_E(jet[4], b))); @@ -626,13 +626,13 @@ TEST_CASE("taylor kepE") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(4); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == fp_t{.2}); - REQUIRE(jet[1] == fp_t{.3}); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(bmt_inv_kep_E(a, jet[1]))); REQUIRE(jet[3] == approximately(bmt_inv_kep_E(c, jet[0]))); } @@ -647,15 +647,15 @@ TEST_CASE("taylor kepE") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(4); std::vector pars{a}; jptr(jet.data(), pars.data(), nullptr); - REQUIRE(jet[0] == fp_t{.2}); - REQUIRE(jet[1] == fp_t{.3}); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(bmt_inv_kep_E(a, jet[1]))); REQUIRE(jet[3] == approximately(bmt_inv_kep_E(c, jet[0]))); } @@ -670,16 +670,16 @@ TEST_CASE("taylor kepE") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.3}, fp_t{.4}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.3), fp_t(.4)}; jet.resize(8); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == .4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(.4)); REQUIRE(jet[4] == approximately(bmt_inv_kep_E(a, jet[2]))); REQUIRE(jet[5] == approximately(bmt_inv_kep_E(a, jet[3]))); @@ -698,18 +698,18 @@ TEST_CASE("taylor kepE") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.3}, fp_t{.4}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.3), fp_t(.4)}; jet.resize(8); std::vector pars{fp_t{0}, fp_t{0}, c, c}; jptr(jet.data(), pars.data(), nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == .4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(.4)); REQUIRE(jet[4] == approximately(bmt_inv_kep_E(a, jet[2]))); REQUIRE(jet[5] == approximately(bmt_inv_kep_E(a, jet[3]))); @@ -728,13 +728,13 @@ TEST_CASE("taylor kepE") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(6); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .3); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(bmt_inv_kep_E(a, jet[1]))); REQUIRE(jet[3] == approximately(bmt_inv_kep_E(c, jet[0]))); REQUIRE(jet[4] == approximately(fp_t{1} / 2 * jet[3] / (1 - a * cos(jet[2])))); @@ -751,16 +751,16 @@ TEST_CASE("taylor kepE") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.3}, fp_t{.4}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.3), fp_t(.4)}; jet.resize(12); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == .4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(.4)); REQUIRE(jet[4] == approximately(bmt_inv_kep_E(a, jet[2]))); REQUIRE(jet[5] == approximately(bmt_inv_kep_E(a, jet[3]))); @@ -785,18 +785,18 @@ TEST_CASE("taylor kepE") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.1}, fp_t{.3}, fp_t{.4}, fp_t{.6}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.1), fp_t(.3), fp_t(.4), fp_t(.6)}; jet.resize(24); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); - REQUIRE(jet[2] == .1); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); + REQUIRE(jet[2] == fp_t(.1)); - REQUIRE(jet[3] == .3); - REQUIRE(jet[4] == .4); - REQUIRE(jet[5] == .6); + REQUIRE(jet[3] == fp_t(.3)); + REQUIRE(jet[4] == fp_t(.4)); + REQUIRE(jet[5] == fp_t(.6)); REQUIRE(jet[6] == approximately(bmt_inv_kep_E(a, jet[3]))); REQUIRE(jet[7] == approximately(bmt_inv_kep_E(a, jet[4]))); @@ -859,13 +859,13 @@ TEST_CASE("taylor kepE") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(4); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == fp_t{.2}); - REQUIRE(jet[1] == fp_t{.3}); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(bmt_inv_kep_E(jet[0], jet[1]))); REQUIRE(jet[3] == approximately(bmt_inv_kep_E(jet[1], jet[0]))); } @@ -879,16 +879,16 @@ TEST_CASE("taylor kepE") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.3}, fp_t{.4}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.3), fp_t(.4)}; jet.resize(8); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == .4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(.4)); REQUIRE(jet[4] == approximately(bmt_inv_kep_E(jet[0], jet[2]))); REQUIRE(jet[5] == approximately(bmt_inv_kep_E(jet[1], jet[3]))); @@ -906,13 +906,13 @@ TEST_CASE("taylor kepE") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.3}}; + std::vector jet{fp_t(.2), fp_t(.3)}; jet.resize(6); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .3); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.3)); REQUIRE(jet[2] == approximately(bmt_inv_kep_E(jet[0], jet[1]))); REQUIRE(jet[3] == approximately(bmt_inv_kep_E(jet[1], jet[0]))); REQUIRE(jet[4] @@ -930,16 +930,16 @@ TEST_CASE("taylor kepE") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.3}, fp_t{.4}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.3), fp_t(.4)}; jet.resize(12); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); - REQUIRE(jet[2] == .3); - REQUIRE(jet[3] == .4); + REQUIRE(jet[2] == fp_t(.3)); + REQUIRE(jet[3] == fp_t(.4)); REQUIRE(jet[4] == approximately(bmt_inv_kep_E(jet[0], jet[2]))); REQUIRE(jet[5] == approximately(bmt_inv_kep_E(jet[1], jet[3]))); @@ -967,18 +967,18 @@ TEST_CASE("taylor kepE") auto jptr = reinterpret_cast(s.jit_lookup("jet")); - std::vector jet{fp_t{.2}, fp_t{.5}, fp_t{.1}, fp_t{.3}, fp_t{.4}, fp_t{.6}}; + std::vector jet{fp_t(.2), fp_t{.5}, fp_t(.1), fp_t(.3), fp_t(.4), fp_t(.6)}; jet.resize(24); jptr(jet.data(), nullptr, nullptr); - REQUIRE(jet[0] == .2); - REQUIRE(jet[1] == .5); - REQUIRE(jet[2] == .1); + REQUIRE(jet[0] == fp_t(.2)); + REQUIRE(jet[1] == fp_t(.5)); + REQUIRE(jet[2] == fp_t(.1)); - REQUIRE(jet[3] == .3); - REQUIRE(jet[4] == .4); - REQUIRE(jet[5] == .6); + REQUIRE(jet[3] == fp_t(.3)); + REQUIRE(jet[4] == fp_t(.4)); + REQUIRE(jet[5] == fp_t(.6)); REQUIRE(jet[6] == approximately(bmt_inv_kep_E(jet[0], jet[3]))); REQUIRE(jet[7] == approximately(bmt_inv_kep_E(jet[1], jet[4]))); diff --git a/test/taylor_kepF.cpp b/test/taylor_kepF.cpp index 8377426de..e9fe741fd 100644 --- a/test/taylor_kepF.cpp +++ b/test/taylor_kepF.cpp @@ -31,7 +31,7 @@ using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple #include -#include #include #include #include @@ -42,7 +41,7 @@ using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple ? 1e-6 : 1e-10; + auto ta = taylor_adaptive{ {prime(x) = v, prime(v) = -9.8 * sin(x)}, {fp_t(0), fp_t(.25)}, kw::opt_level = opt_level, kw::high_accuracy = high_accuracy, kw::compact_mode = compact_mode, - kw::nt_events = {ev_t(v * v - 1e-10, - [&counter, &cur_time](taylor_adaptive &ta_, fp_t t, int) { + kw::nt_events = {ev_t(v * v - small_delta, + [&counter, &cur_time, small_delta](taylor_adaptive &ta_, fp_t t, int) { using std::abs; // Make sure the callbacks are called in order. @@ -343,7 +345,7 @@ TEST_CASE("taylor nte multizero") ta_.update_d_output(t); const auto vel = ta_.get_d_output()[1]; - REQUIRE(abs(vel * vel - 1e-10) < std::numeric_limits::epsilon()); + REQUIRE(abs(vel * vel - small_delta) < std::numeric_limits::epsilon()); ++counter; @@ -380,58 +382,58 @@ TEST_CASE("taylor nte multizero") cur_time = 0; // Run the same test with sub-eps tolerance too. - ta = taylor_adaptive{{prime(x) = v, prime(v) = -9.8 * sin(x)}, - {fp_t(0), fp_t(.25)}, - kw::tol = std::numeric_limits::epsilon() / 100, - kw::opt_level = opt_level, - kw::high_accuracy = high_accuracy, - kw::compact_mode = compact_mode, - kw::nt_events - = {ev_t(v * v - 1e-10, - [&counter, &cur_time](taylor_adaptive &ta_, fp_t t, int) { - using std::abs; + ta = taylor_adaptive{ + {prime(x) = v, prime(v) = -9.8 * sin(x)}, + {fp_t(0), fp_t(.25)}, + kw::tol = std::numeric_limits::epsilon() / 100, + kw::opt_level = opt_level, + kw::high_accuracy = high_accuracy, + kw::compact_mode = compact_mode, + kw::nt_events = {ev_t(v * v - small_delta, + [&counter, &cur_time, small_delta](taylor_adaptive &ta_, fp_t t, int) { + using std::abs; - // Make sure the callbacks are called in order. - REQUIRE(t > cur_time); + // Make sure the callbacks are called in order. + REQUIRE(t > cur_time); - // Ensure the state of ta has - // been propagated until after the - // event. - REQUIRE(ta_.get_time() > t); + // Ensure the state of ta has + // been propagated until after the + // event. + REQUIRE(ta_.get_time() > t); - REQUIRE((counter % 3u == 0u || counter % 3u == 2u)); + REQUIRE((counter % 3u == 0u || counter % 3u == 2u)); - ta_.update_d_output(t); + ta_.update_d_output(t); - const auto vel = ta_.get_d_output()[1]; - REQUIRE(abs(vel * vel - 1e-10) < std::numeric_limits::epsilon()); + const auto vel = ta_.get_d_output()[1]; + REQUIRE(abs(vel * vel - small_delta) < std::numeric_limits::epsilon()); - ++counter; + ++counter; - cur_time = t; - }), - ev_t(v, [&counter, &cur_time](taylor_adaptive &ta_, fp_t t, int) { - using std::abs; + cur_time = t; + }), + ev_t(v, [&counter, &cur_time](taylor_adaptive &ta_, fp_t t, int) { + using std::abs; - // Make sure the callbacks are called in order. - REQUIRE(t > cur_time); + // Make sure the callbacks are called in order. + REQUIRE(t > cur_time); - // Ensure the state of ta has - // been propagated until after the - // event. - REQUIRE(ta_.get_time() > t); + // Ensure the state of ta has + // been propagated until after the + // event. + REQUIRE(ta_.get_time() > t); - REQUIRE((counter % 3u == 1u)); + REQUIRE((counter % 3u == 1u)); - ta_.update_d_output(t); + ta_.update_d_output(t); - const auto vel = ta_.get_d_output()[1]; - REQUIRE(abs(vel) <= std::numeric_limits::epsilon() * 100); + const auto vel = ta_.get_d_output()[1]; + REQUIRE(abs(vel) <= std::numeric_limits::epsilon() * 100); - ++counter; + ++counter; - cur_time = t; - })}}; + cur_time = t; + })}}; REQUIRE(std::get<0>(ta.propagate_until(fp_t(4))) == taylor_outcome::time_limit); @@ -447,64 +449,64 @@ TEST_CASE("taylor nte multizero") // - 0 0 // - 0 1 0 // - 0 0 - ta = taylor_adaptive{{prime(x) = v, prime(v) = -9.8 * sin(x)}, - {fp_t(0), fp_t(.25)}, - kw::opt_level = opt_level, - kw::high_accuracy = high_accuracy, - kw::compact_mode = compact_mode, - kw::nt_events - = {ev_t(v * v - 1e-10, - [&counter, &cur_time](taylor_adaptive &ta_, fp_t t, int) { - using std::abs; + ta = taylor_adaptive{ + {prime(x) = v, prime(v) = -9.8 * sin(x)}, + {fp_t(0), fp_t(.25)}, + kw::opt_level = opt_level, + kw::high_accuracy = high_accuracy, + kw::compact_mode = compact_mode, + kw::nt_events = {ev_t(v * v - small_delta, + [&counter, &cur_time, small_delta](taylor_adaptive &ta_, fp_t t, int) { + using std::abs; - // Make sure the callbacks are called in order. - REQUIRE(t > cur_time); + // Make sure the callbacks are called in order. + REQUIRE(t > cur_time); - // Ensure the state of ta has - // been propagated until after the - // event. - REQUIRE(ta_.get_time() > t); + // Ensure the state of ta has + // been propagated until after the + // event. + REQUIRE(ta_.get_time() > t); - REQUIRE((counter == 0u || (counter >= 2u && counter <= 6u) - || (counter >= 7u && counter <= 9u))); + REQUIRE((counter == 0u || (counter >= 2u && counter <= 6u) + || (counter >= 7u && counter <= 9u))); - ta_.update_d_output(t); + ta_.update_d_output(t); - const auto vel = ta_.get_d_output()[1]; - REQUIRE(abs(vel * vel - 1e-10) < std::numeric_limits::epsilon()); + const auto vel = ta_.get_d_output()[1]; + REQUIRE(abs(vel * vel - small_delta) < std::numeric_limits::epsilon()); - ++counter; + ++counter; - cur_time = t; - }), - ev_t( - v, + cur_time = t; + }), + ev_t( + v, - [&counter, &cur_time](taylor_adaptive &ta_, fp_t t, int d_sgn) { - using std::abs; + [&counter, &cur_time](taylor_adaptive &ta_, fp_t t, int d_sgn) { + using std::abs; - REQUIRE(d_sgn == -1); + REQUIRE(d_sgn == -1); - // Make sure the callbacks are called in order. - REQUIRE(t > cur_time); + // Make sure the callbacks are called in order. + REQUIRE(t > cur_time); - // Ensure the state of ta has - // been propagated until after the - // event. - REQUIRE(ta_.get_time() > t); + // Ensure the state of ta has + // been propagated until after the + // event. + REQUIRE(ta_.get_time() > t); - REQUIRE((counter == 1u || counter == 6u)); + REQUIRE((counter == 1u || counter == 6u)); - ta_.update_d_output(t); + ta_.update_d_output(t); - const auto vel = ta_.get_d_output()[1]; - REQUIRE(abs(vel) <= std::numeric_limits::epsilon() * 100); + const auto vel = ta_.get_d_output()[1]; + REQUIRE(abs(vel) <= std::numeric_limits::epsilon() * 100); - ++counter; + ++counter; - cur_time = t; - }, - kw::direction = event_direction::negative)}}; + cur_time = t; + }, + kw::direction = event_direction::negative)}}; REQUIRE(std::get<0>(ta.propagate_until(fp_t(4))) == taylor_outcome::time_limit); @@ -514,65 +516,65 @@ TEST_CASE("taylor nte multizero") cur_time = 0; // Sub-eps tolerance too. - ta = taylor_adaptive{{prime(x) = v, prime(v) = -9.8 * sin(x)}, - {fp_t(0), fp_t(.25)}, - kw::tol = std::numeric_limits::epsilon() / 100, - kw::opt_level = opt_level, - kw::high_accuracy = high_accuracy, - kw::compact_mode = compact_mode, - kw::nt_events - = {ev_t(v * v - 1e-10, - [&counter, &cur_time](taylor_adaptive &ta_, fp_t t, int) { - using std::abs; + ta = taylor_adaptive{ + {prime(x) = v, prime(v) = -9.8 * sin(x)}, + {fp_t(0), fp_t(.25)}, + kw::tol = std::numeric_limits::epsilon() / 100, + kw::opt_level = opt_level, + kw::high_accuracy = high_accuracy, + kw::compact_mode = compact_mode, + kw::nt_events = {ev_t(v * v - small_delta, + [&counter, &cur_time, small_delta](taylor_adaptive &ta_, fp_t t, int) { + using std::abs; - // Make sure the callbacks are called in order. - REQUIRE(t > cur_time); + // Make sure the callbacks are called in order. + REQUIRE(t > cur_time); - // Ensure the state of ta has - // been propagated until after the - // event. - REQUIRE(ta_.get_time() > t); + // Ensure the state of ta has + // been propagated until after the + // event. + REQUIRE(ta_.get_time() > t); - REQUIRE((counter == 0u || (counter >= 2u && counter <= 6u) - || (counter >= 7u && counter <= 9u))); + REQUIRE((counter == 0u || (counter >= 2u && counter <= 6u) + || (counter >= 7u && counter <= 9u))); - ta_.update_d_output(t); + ta_.update_d_output(t); - const auto vel = ta_.get_d_output()[1]; - REQUIRE(abs(vel * vel - 1e-10) < std::numeric_limits::epsilon()); + const auto vel = ta_.get_d_output()[1]; + REQUIRE(abs(vel * vel - small_delta) < std::numeric_limits::epsilon()); - ++counter; + ++counter; - cur_time = t; - }), - ev_t( - v, + cur_time = t; + }), + ev_t( + v, - [&counter, &cur_time](taylor_adaptive &ta_, fp_t t, int d_sgn) { - using std::abs; + [&counter, &cur_time](taylor_adaptive &ta_, fp_t t, int d_sgn) { + using std::abs; - REQUIRE(d_sgn == -1); + REQUIRE(d_sgn == -1); - // Make sure the callbacks are called in order. - REQUIRE(t > cur_time); + // Make sure the callbacks are called in order. + REQUIRE(t > cur_time); - // Ensure the state of ta has - // been propagated until after the - // event. - REQUIRE(ta_.get_time() > t); + // Ensure the state of ta has + // been propagated until after the + // event. + REQUIRE(ta_.get_time() > t); - REQUIRE((counter == 1u || counter == 6u)); + REQUIRE((counter == 1u || counter == 6u)); - ta_.update_d_output(t); + ta_.update_d_output(t); - const auto vel = ta_.get_d_output()[1]; - REQUIRE(abs(vel) <= std::numeric_limits::epsilon() * 100); + const auto vel = ta_.get_d_output()[1]; + REQUIRE(abs(vel) <= std::numeric_limits::epsilon() * 100); - ++counter; + ++counter; - cur_time = t; - }, - kw::direction = event_direction::negative)}}; + cur_time = t; + }, + kw::direction = event_direction::negative)}}; REQUIRE(std::get<0>(ta.propagate_until(fp_t(4))) == taylor_outcome::time_limit); @@ -602,6 +604,9 @@ TEST_CASE("taylor nte multizero negative timestep") fp_t cur_time(0); + // NOTE: don't make the small delta too smal in single-precision. + const auto small_delta = std::is_same_v ? 1e-6 : 1e-10; + // In this test, we define two events: // - the velocity is smaller in absolute // value than a small limit, @@ -616,8 +621,8 @@ TEST_CASE("taylor nte multizero negative timestep") kw::opt_level = opt_level, kw::high_accuracy = high_accuracy, kw::compact_mode = compact_mode, - kw::nt_events = {ev_t(v * v - 1e-10, - [&counter, &cur_time](taylor_adaptive &ta_, fp_t t, int) { + kw::nt_events = {ev_t(v * v - small_delta, + [&counter, &cur_time, small_delta](taylor_adaptive &ta_, fp_t t, int) { using std::abs; // Make sure the callbacks are called in order. @@ -633,7 +638,7 @@ TEST_CASE("taylor nte multizero negative timestep") ta_.update_d_output(t); const auto vel = ta_.get_d_output()[1]; - REQUIRE(abs(vel * vel - 1e-10) < std::numeric_limits::epsilon()); + REQUIRE(abs(vel * vel - small_delta) < std::numeric_limits::epsilon()); ++counter; @@ -788,6 +793,7 @@ struct s11n_callback { } }; +HEYOKA_S11N_CALLABLE_EXPORT(s11n_callback, void, taylor_adaptive &, float, int) HEYOKA_S11N_CALLABLE_EXPORT(s11n_callback, void, taylor_adaptive &, double, int) HEYOKA_S11N_CALLABLE_EXPORT(s11n_callback, void, taylor_adaptive &, long double, int) diff --git a/test/taylor_pow.cpp b/test/taylor_pow.cpp index 3344b04c3..eded5ea72 100644 --- a/test/taylor_pow.cpp +++ b/test/taylor_pow.cpp @@ -40,7 +40,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple sys, unsigned opt_level, bool } } +float sigmoid(float x) +{ + return 1 / (1 + std::exp(-x)); +} + double sigmoid(double x) { return 1. / (1. + std::exp(-x)); diff --git a/test/taylor_sincos.cpp b/test/taylor_sincos.cpp index f9a59cd97..8efebd5fd 100644 --- a/test/taylor_sincos.cpp +++ b/test/taylor_sincos.cpp @@ -35,7 +35,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple sys, unsigned opt_level, bool jptr_scalar(jet_scalar.data(), nullptr, nullptr); for (auto i = 2u; i < 8u; ++i) { - REQUIRE(jet_scalar[i] == approximately(jet_batch[i * batch_size + batch_idx])); + REQUIRE(jet_scalar[i] == approximately(jet_batch[i * batch_size + batch_idx], T(1000))); } } } diff --git a/test/taylor_sqrt.cpp b/test/taylor_sqrt.cpp index 08f8022c1..7bd95285c 100644 --- a/test/taylor_sqrt.cpp +++ b/test/taylor_sqrt.cpp @@ -35,7 +35,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple &args) return sum(new_args); } -const auto fp_types = std::tuple::t_event_t; using nt_ev_t = typename taylor_adaptive::nt_event_t; + // NOTE: don't make the small delta too smal in single-precision. + const auto small_delta = std::is_same_v ? 1e-6 : 1e-10; + // NOTE: test also sub-eps tolerance. for (auto cur_tol : {std::numeric_limits::epsilon(), std::numeric_limits::epsilon() / 100}) { auto counter_nt = 0u, counter_t = 0u; @@ -248,24 +251,25 @@ TEST_CASE("taylor te basic") kw::opt_level = opt_level, kw::high_accuracy = high_accuracy, kw::compact_mode = compact_mode, - kw::nt_events = {nt_ev_t(v * v - 1e-10, - [&counter_nt, &cur_time, &direction](taylor_adaptive &ta_, fp_t t, int) { - // Make sure the callbacks are called in order. - if (direction) { - REQUIRE(t > cur_time); - } else { - REQUIRE(t < cur_time); - } + kw::nt_events + = {nt_ev_t(v * v - small_delta, + [&counter_nt, &cur_time, &direction, small_delta](taylor_adaptive &ta_, fp_t t, int) { + // Make sure the callbacks are called in order. + if (direction) { + REQUIRE(t > cur_time); + } else { + REQUIRE(t < cur_time); + } - ta_.update_d_output(t); + ta_.update_d_output(t); - const auto vel = ta_.get_d_output()[1]; - REQUIRE(abs(vel * vel - 1e-10) < std::numeric_limits::epsilon()); + const auto vel = ta_.get_d_output()[1]; + REQUIRE(abs(vel * vel - small_delta) < std::numeric_limits::epsilon()); - ++counter_nt; + ++counter_nt; - cur_time = t; - })}, + cur_time = t; + })}, kw::t_events = {t_ev_t( v, kw::callback = [&counter_t, &cur_time, &direction](taylor_adaptive &ta_, bool mr, int) { const auto t = ta_.get_time(); @@ -994,7 +998,7 @@ TEST_CASE("taylor te boolean callback") // Some testing for propagate_grid() too. ta.reset_cooldowns(); ta.set_time(fp_t{0}); - ta.get_state_data()[0] = -0.1; + ta.get_state_data()[0] = fp_t(-0.1); ta.get_state_data()[1] = 0; cur_time = -1; direction = true; @@ -1092,6 +1096,7 @@ struct s11n_callback { } }; +HEYOKA_S11N_CALLABLE_EXPORT(s11n_callback, bool, taylor_adaptive &, bool, int) HEYOKA_S11N_CALLABLE_EXPORT(s11n_callback, bool, taylor_adaptive &, bool, int) HEYOKA_S11N_CALLABLE_EXPORT(s11n_callback, bool, taylor_adaptive &, bool, int) diff --git a/test/taylor_tan.cpp b/test/taylor_tan.cpp index 98f33d434..a443dae39 100644 --- a/test/taylor_tan.cpp +++ b/test/taylor_tan.cpp @@ -39,7 +39,7 @@ static std::mt19937 rng; using namespace heyoka; using namespace heyoka_test; -const auto fp_types = std::tuple=0.27' sleef 'fmt=9.*' spdlog ninja +conda create -y -q -p $deps_dir cmake c-compiler cxx-compiler clang clangxx clang-tools llvmdev tbb-devel tbb boost-cpp 'mppp>=0.27' sleef fmt spdlog ninja source activate $deps_dir # Create the build dir and cd into it. diff --git a/tools/gha_conda_coverage.sh b/tools/gha_conda_coverage.sh index 235149d08..34c7f4d0a 100644 --- a/tools/gha_conda_coverage.sh +++ b/tools/gha_conda_coverage.sh @@ -16,7 +16,7 @@ export PATH="$HOME/miniconda/bin:$PATH" bash miniconda.sh -b -p $HOME/miniconda conda config --add channels conda-forge conda config --set channel_priority strict -conda create -y -q -p $deps_dir c-compiler cxx-compiler cmake llvmdev tbb-devel tbb boost-cpp 'mppp>=0.27' sleef xtensor xtensor-blas blas blas-devel 'fmt=9.*' spdlog lcov +conda create -y -q -p $deps_dir c-compiler cxx-compiler cmake llvmdev tbb-devel tbb boost-cpp 'mppp>=0.27' sleef xtensor xtensor-blas blas blas-devel fmt spdlog lcov source activate $deps_dir # Create the build dir and cd into it. diff --git a/tools/gha_conda_release.sh b/tools/gha_conda_release.sh index 28c0afbbc..9c6426c57 100644 --- a/tools/gha_conda_release.sh +++ b/tools/gha_conda_release.sh @@ -16,7 +16,7 @@ export PATH="$HOME/miniconda/bin:$PATH" bash miniconda.sh -b -p $HOME/miniconda conda config --add channels conda-forge conda config --set channel_priority strict -conda create -y -q -p $deps_dir c-compiler cxx-compiler cmake llvmdev tbb-devel tbb boost-cpp 'mppp>=0.27' sleef xtensor xtensor-blas blas blas-devel 'fmt=9.*' spdlog +conda create -y -q -p $deps_dir c-compiler cxx-compiler cmake llvmdev tbb-devel tbb boost-cpp 'mppp>=0.27' sleef xtensor xtensor-blas blas blas-devel fmt spdlog source activate $deps_dir # Create the build dir and cd into it. diff --git a/tools/gha_llvm11_conda_asan.sh b/tools/gha_llvm11_conda_asan.sh index effacca72..d8be27f7c 100644 --- a/tools/gha_llvm11_conda_asan.sh +++ b/tools/gha_llvm11_conda_asan.sh @@ -14,7 +14,7 @@ wget https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforg export deps_dir=$HOME/local export PATH="$HOME/mambaforge/bin:$PATH" bash mambaforge.sh -b -p $HOME/mambaforge -mamba create -y -q -p $deps_dir c-compiler cxx-compiler cmake 'llvmdev=11.*' tbb-devel tbb boost-cpp 'mppp>=0.27' sleef xtensor xtensor-blas blas blas-devel 'fmt=9.*' spdlog +mamba create -y -q -p $deps_dir c-compiler cxx-compiler cmake 'llvmdev=11.*' tbb-devel tbb boost-cpp 'mppp>=0.27' sleef xtensor xtensor-blas blas blas-devel fmt spdlog source activate $deps_dir # Create the build dir and cd into it. diff --git a/tools/gha_llvm12_conda_asan.sh b/tools/gha_llvm12_conda_asan.sh index 5d0083702..c02674c23 100644 --- a/tools/gha_llvm12_conda_asan.sh +++ b/tools/gha_llvm12_conda_asan.sh @@ -14,7 +14,7 @@ wget https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforg export deps_dir=$HOME/local export PATH="$HOME/mambaforge/bin:$PATH" bash mambaforge.sh -b -p $HOME/mambaforge -mamba create -y -q -p $deps_dir c-compiler cxx-compiler cmake 'llvmdev=12.*' tbb-devel tbb boost-cpp 'mppp>=0.27' sleef xtensor xtensor-blas blas blas-devel 'fmt=9.*' spdlog +mamba create -y -q -p $deps_dir c-compiler cxx-compiler cmake 'llvmdev=12.*' tbb-devel tbb boost-cpp 'mppp>=0.27' sleef xtensor xtensor-blas blas blas-devel fmt spdlog source activate $deps_dir # Create the build dir and cd into it. diff --git a/tools/gha_llvm13_conda_asan.sh b/tools/gha_llvm13_conda_asan.sh index a953b9e86..6f6a4ad60 100644 --- a/tools/gha_llvm13_conda_asan.sh +++ b/tools/gha_llvm13_conda_asan.sh @@ -14,7 +14,7 @@ wget https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforg export deps_dir=$HOME/local export PATH="$HOME/mambaforge/bin:$PATH" bash mambaforge.sh -b -p $HOME/mambaforge -mamba create -y -q -p $deps_dir c-compiler cxx-compiler cmake 'llvmdev=13.*' tbb-devel tbb boost-cpp 'mppp>=0.27' sleef xtensor xtensor-blas blas blas-devel 'fmt=9.*' spdlog +mamba create -y -q -p $deps_dir c-compiler cxx-compiler cmake 'llvmdev=13.*' tbb-devel tbb boost-cpp 'mppp>=0.27' sleef xtensor xtensor-blas blas blas-devel fmt spdlog source activate $deps_dir # Create the build dir and cd into it. diff --git a/tools/gha_llvm14_conda_asan.sh b/tools/gha_llvm14_conda_asan.sh index b2e0c69b4..0200f9c99 100644 --- a/tools/gha_llvm14_conda_asan.sh +++ b/tools/gha_llvm14_conda_asan.sh @@ -14,7 +14,7 @@ wget https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforg export deps_dir=$HOME/local export PATH="$HOME/mambaforge/bin:$PATH" bash mambaforge.sh -b -p $HOME/mambaforge -mamba create -y -q -p $deps_dir c-compiler cxx-compiler cmake 'llvmdev=14.*' tbb-devel tbb boost-cpp 'mppp>=0.27' sleef xtensor xtensor-blas blas blas-devel 'fmt=9.*' spdlog +mamba create -y -q -p $deps_dir c-compiler cxx-compiler cmake 'llvmdev=14.*' tbb-devel tbb boost-cpp 'mppp>=0.27' sleef xtensor xtensor-blas blas blas-devel fmt spdlog source activate $deps_dir # Create the build dir and cd into it. diff --git a/tools/gha_llvm15_conda_asan.sh b/tools/gha_llvm15_conda_asan.sh index 609c121db..01f866f56 100644 --- a/tools/gha_llvm15_conda_asan.sh +++ b/tools/gha_llvm15_conda_asan.sh @@ -14,7 +14,7 @@ wget https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforg export deps_dir=$HOME/local export PATH="$HOME/mambaforge/bin:$PATH" bash mambaforge.sh -b -p $HOME/mambaforge -mamba create -y -q -p $deps_dir c-compiler cxx-compiler cmake 'llvmdev=15.*' tbb-devel tbb boost-cpp 'mppp>=0.27' sleef xtensor xtensor-blas blas blas-devel 'fmt=9.*' spdlog +mamba create -y -q -p $deps_dir c-compiler cxx-compiler cmake 'llvmdev=15.*' tbb-devel tbb boost-cpp 'mppp>=0.27' sleef xtensor xtensor-blas blas blas-devel fmt spdlog source activate $deps_dir # Create the build dir and cd into it. diff --git a/tools/gha_llvm16_conda_asan.sh b/tools/gha_llvm16_conda_asan.sh index 24c930669..840ceedcd 100755 --- a/tools/gha_llvm16_conda_asan.sh +++ b/tools/gha_llvm16_conda_asan.sh @@ -14,7 +14,7 @@ wget https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforg export deps_dir=$HOME/local export PATH="$HOME/mambaforge/bin:$PATH" bash mambaforge.sh -b -p $HOME/mambaforge -mamba create -y -q -p $deps_dir c-compiler cxx-compiler cmake 'llvmdev=16.*' tbb-devel tbb boost-cpp 'mppp>=0.27' sleef xtensor xtensor-blas blas blas-devel 'fmt=9.*' spdlog +mamba create -y -q -p $deps_dir c-compiler cxx-compiler cmake 'llvmdev=16.*' tbb-devel tbb boost-cpp 'mppp>=0.27' sleef xtensor xtensor-blas blas blas-devel fmt spdlog source activate $deps_dir # Create the build dir and cd into it. diff --git a/tools/gha_osx.sh b/tools/gha_osx.sh index 9ebdd9268..12812566d 100644 --- a/tools/gha_osx.sh +++ b/tools/gha_osx.sh @@ -13,7 +13,7 @@ export PATH="$HOME/miniconda/bin:$PATH" bash miniconda.sh -b -p $HOME/miniconda conda config --add channels conda-forge conda config --set channel_priority strict -conda create -y -q -p $deps_dir c-compiler cxx-compiler libcxx cmake llvmdev tbb-devel tbb boost-cpp sleef xtensor xtensor-blas blas blas-devel 'fmt=9.*' spdlog 'mppp>=0.27' +conda create -y -q -p $deps_dir c-compiler cxx-compiler libcxx cmake llvmdev tbb-devel tbb boost-cpp sleef xtensor xtensor-blas blas blas-devel fmt spdlog 'mppp>=0.27' source activate $deps_dir # Create the build dir and cd into it. diff --git a/tools/travis_ubuntu_ppc64.sh b/tools/travis_ubuntu_ppc64.sh index 9f8e6a714..ff6216989 100755 --- a/tools/travis_ubuntu_ppc64.sh +++ b/tools/travis_ubuntu_ppc64.sh @@ -11,7 +11,7 @@ curl -L -o miniconda.sh https://github.com/conda-forge/miniforge/releases/latest export deps_dir=$HOME/local export PATH="$HOME/miniconda/bin:$PATH" bash miniconda.sh -b -p $HOME/miniconda -conda create -y -q -p $deps_dir cxx-compiler c-compiler cmake llvmdev tbb-devel tbb boost-cpp sleef xtensor xtensor-blas blas blas-devel 'fmt=9.*' spdlog make mppp +conda create -y -q -p $deps_dir cxx-compiler c-compiler cmake llvmdev tbb-devel tbb boost-cpp sleef xtensor xtensor-blas blas blas-devel fmt spdlog make mppp source activate $deps_dir # Create the build dir and cd into it.