Provides decorator to handle rounded arithmetic

jfalcou · Sep 21, 2024 · e30a7d4 · e30a7d4
1 parent 99f7bed
commit e30a7d4
Show file tree

Hide file tree

Showing 42 changed files with 613 additions and 238 deletions.
diff --git a/benchmarks/module/core/dec/saturated/dec.hpp b/benchmarks/module/core/dec/saturated/dec.hpp
@@ -16,6 +16,6 @@ int main()
   auto arg0 = eve::bench::random_<EVE_VALUE>(lmin,lmax);
 
   eve::bench::experiment xp;
-  run<EVE_VALUE> (EVE_NAME(saturated(eve::dec)) , xp, eve::dec[eve::saturated2], arg0);
- run<EVE_TYPE>  (EVE_NAME(saturated(eve::dec)) ,  xp, eve::dec[eve::saturated2], arg0);
+  run<EVE_VALUE> (EVE_NAME(saturated(eve::dec)) , xp, eve::dec[eve::saturated], arg0);
+ run<EVE_TYPE>  (EVE_NAME(saturated(eve::dec)) ,  xp, eve::dec[eve::saturated], arg0);
 }
diff --git a/benchmarks/module/core/dec/saturated_if/dec.hpp b/benchmarks/module/core/dec/saturated_if/dec.hpp
@@ -17,7 +17,7 @@ int main()
   auto arg0 = eve::bench::random_<L_VALUE>(0, 1);
   auto arg1 = eve::bench::random_<EVE_VALUE>(lmin,lmax);
 
-  auto eve__dec =  [](auto x,  auto y){ return eve::dec[x][eve::saturated2](y); };
+  auto eve__dec =  [](auto x,  auto y){ return eve::dec[x][eve::saturated](y); };
   eve::bench::experiment xp;
   run<eve::bench::types<L_VALUE, EVE_VALUE>> (EVE_NAME(eve__dec) , xp, eve__dec, arg0, arg1);
   run<eve::bench::types<L_TYPE, EVE_TYPE>>  (EVE_NAME(eve__dec) , xp, eve__dec, arg0, arg1);

diff --git a/benchmarks/module/core/dist/saturated/dist.hpp b/benchmarks/module/core/dist/saturated/dist.hpp
@@ -17,6 +17,6 @@ int main()
   auto arg1 = eve::bench::random_<EVE_VALUE>(lmin,lmax);
 
   eve::bench::experiment xp;
-  run<EVE_VALUE>(EVE_NAME(dist) , xp, eve::dist[eve::saturated2], arg0, arg1);
-  run<EVE_TYPE> (EVE_NAME(dist) , xp, eve::dist[eve::saturated2], arg0, arg1);
+  run<EVE_VALUE>(EVE_NAME(dist) , xp, eve::dist[eve::saturated], arg0, arg1);
+  run<EVE_TYPE> (EVE_NAME(dist) , xp, eve::dist[eve::saturated], arg0, arg1);
 }
diff --git a/benchmarks/module/core/inc/saturated/inc.hpp b/benchmarks/module/core/inc/saturated/inc.hpp
@@ -16,6 +16,6 @@ int main()
   auto arg0 = eve::bench::random_<EVE_VALUE>(lmin,lmax);
 
   eve::bench::experiment xp;
-  run<EVE_VALUE> (EVE_NAME(saturated(eve::inc)) , xp, eve::inc[eve::saturated2], arg0);
-  run<EVE_TYPE>  (EVE_NAME(saturated(eve::inc)) , xp, eve::inc[eve::saturated2], arg0);
+  run<EVE_VALUE> (EVE_NAME(saturated(eve::inc)) , xp, eve::inc[eve::saturated], arg0);
+  run<EVE_TYPE>  (EVE_NAME(saturated(eve::inc)) , xp, eve::inc[eve::saturated], arg0);
 }
diff --git a/benchmarks/module/core/inc/saturated_if/inc.hpp b/benchmarks/module/core/inc/saturated_if/inc.hpp
@@ -17,7 +17,7 @@ int main()
   auto arg0 = eve::bench::random_<L_VALUE>(0, 1);
   auto arg1 = eve::bench::random_<EVE_VALUE>(lmin,lmax);
 
-  auto eve__inc =  [](auto x,  auto y){ return eve::inc[x][eve::saturated2](y); };
+  auto eve__inc =  [](auto x,  auto y){ return eve::inc[x][eve::saturated](y); };
   eve::bench::experiment xp;
   run<eve::bench::types<L_VALUE, EVE_VALUE>> (EVE_NAME(eve__inc) , xp, eve__inc, arg0, arg1);
   run<eve::bench::types<L_TYPE, EVE_TYPE>>  (EVE_NAME(eve__inc) , xp, eve__inc, arg0, arg1);

diff --git a/include/eve/module/core/decorator/core.hpp b/include/eve/module/core/decorator/core.hpp
@@ -13,6 +13,7 @@
 #include <eve/traits/overload.hpp>
 #include <eve/as_element.hpp>
 #include <eve/as.hpp>
+#include <cfenv>
 
 //======================================================================================================================
 // New option style  - TODO rename later without the '2'
@@ -34,10 +35,14 @@ namespace eve
   struct spherical_mode       {};
   struct successor_mode       {};
 
+  struct upper_mode       {static constexpr int value = FE_UPWARD;     };
+  struct lower_mode       {static constexpr int value = FE_DOWNWARD;   };
+
   struct to_nearest_mode  { static constexpr int value = 0x08 | 0x00; }; // _MM_FROUND_TO_NEAREST_INT
   struct downward_mode    { static constexpr int value = 0x08 | 0x01; }; // _MM_FROUND_TO_NEG_INF
   struct upward_mode      { static constexpr int value = 0x08 | 0x02; }; // _MM_FROUND_TO_POS_INF
   struct toward_zero_mode { static constexpr int value = 0x08 | 0x03; }; // _MM_FROUND_TO_ZERO
+
   struct pedantic_mode    {};
   struct raw_mode         {};
   struct saturated_mode   {};
@@ -59,8 +64,9 @@ namespace eve
   [[maybe_unused]] inline constexpr auto to_nearest       = ::rbr::flag( to_nearest_mode{}      );
   [[maybe_unused]] inline constexpr auto toward_zero      = ::rbr::flag( toward_zero_mode{}     );
   [[maybe_unused]] inline constexpr auto upward           = ::rbr::flag( upward_mode{}          );
-
-  [[maybe_unused]] inline constexpr auto saturated       = ::rbr::flag( saturated_mode{}       );
+  [[maybe_unused]] inline constexpr auto upper            = ::rbr::flag( upper_mode{}           );
+  [[maybe_unused]] inline constexpr auto lower            = ::rbr::flag( lower_mode{}           );
+  [[maybe_unused]] inline constexpr auto saturated        = ::rbr::flag( saturated_mode{}       );
 
   struct associated_option      : detail::exact_option<associated>      {};
   struct compensated_option     : detail::exact_option<compensated>     {};
@@ -79,7 +85,9 @@ namespace eve
   struct to_nearest_option      : detail::exact_option<to_nearest>      {};
   struct toward_zero_option     : detail::exact_option<toward_zero>     {};
   struct upward_option          : detail::exact_option<upward>          {};
-  struct saturated_option       : detail::exact_option<saturated>      {};
+  struct saturated_option       : detail::exact_option<saturated>       {};
+  struct upper_option           : detail::exact_option<upper>           {};
+  struct lower_option           : detail::exact_option<lower>           {};
 
   // ----------------------------------------------------------------------------------
   // Turn rounding mode option into the proper constexpr flags for x86 intrinsic
@@ -94,6 +102,13 @@ namespace eve
     else                                             return T::id_type::value;
   };
 
+  template<typename S> consteval int rounding_control() noexcept
+  {
+    if      constexpr(S::contains(eve::upper      )) return upper_mode::value;
+    else if constexpr(S::contains(eve::lower      )) return lower_mode::value;
+    else                                             return FE_TONEAREST;
+  };
+
   // New tolerance option that carry a value
   template<typename Value> struct almost_t;
 

diff --git a/include/eve/module/core/detail/roundings.hpp b/include/eve/module/core/detail/roundings.hpp
@@ -0,0 +1,45 @@
+//==================================================================================================
+/*
+  EVE - Expressive Vector Engine
+  Copyright : EVE Project Contributors
+  SPDX-License-Identifier: BSL-1.0
+*/
+//==================================================================================================
+#pragma once
+
+#include <cstdint>
+#include <cfenv>
+#include <eve/module/core/decorator/core.hpp>
+
+namespace eve::detail
+{
+
+  template < typename O, typename F, typename T,  std::same_as<T> ... Ts>
+  EVE_FORCEINLINE constexpr T with_rounding(F f, T a,  Ts ... b) noexcept
+  {
+#ifdef  SPY_COMPILER_IS_MSVC
+#pragma float_control(precise, on, push)
+#endif
+#ifdef  SPY_COMPILER_IS_CLANG
+#pragma clang fp exceptions(strict)
+#endif
+#ifdef  SPY_COMPILER_IS_GCC
+#if __GNUC__ >= 13
+#pragma STDC FENV_ACCESS on
+#endif
+#endif
+    auto oldstate = std::fegetround();
+    std::fesetround(rounding_control<O>());
+    auto r = f(a, b...);
+    std::fesetround(oldstate);
+#ifdef  SPY_COMPILER_IS_GCC
+#if __GNUC__ >= 13
+#pragma STDC FENV_ACCESS off
+#endif
+#endif
+#ifdef  SPY_COMPILER_IS_MSVC
+#pragma float_control(pop)
+#endif
+    return r;
+  }
+}
diff --git a/include/eve/module/core/regular/add.hpp b/include/eve/module/core/regular/add.hpp
@@ -14,7 +14,7 @@
 namespace eve
 {
   template<typename Options>
-  struct add_t : tuple_callable<add_t, Options, saturated_option>
+  struct add_t : tuple_callable<add_t, Options, saturated_option, lower_option, upper_option>
   {
     template<eve::value T0, value T1, value... Ts>
     requires(eve::same_lanes_or_scalar<T0, T1, Ts...>)
@@ -59,7 +59,8 @@ namespace eve
 //!
 //!      // Semantic options
 //!      constexpr auto add[saturated](/*any of the above overloads*/)                noexcept; // 4
-//!     2. [The operation is performed conditionnaly](@ref conditional).
+//!      constexpr auto add[lower](/*any of the above overloads*/)                 noexcept; // 5
+//!      constexpr auto add[upper](/*any of the above overloads*/)                   noexcept; // 6
 //!
 //!   }
 //!   @endcode
@@ -81,6 +82,10 @@ namespace eve
 //!    4. The call `add[saturated](...)` computes a saturated version of `add`.
 //!       Take care that for signed integral entries this kind of addition is not associative at all.
 //!       This call perform saturated additions in reverse incoming order.
+//!    5. The summation is computed in a 'round toward \f$-\infty\f$ mode. The result is guaranted
+//!       to be less or equal to the exact one (except for Nans).
+//!    6. The summation is computed in a 'round toward \f$\infty\f$ mode. The result is guaranted
+//!       to be greater or equal to the exact one (except for Nans).
 //!
 //!   @note
 //!      Although the infix notation with `+` is supported for two parameters, the `+` operator on

diff --git a/include/eve/module/core/regular/bit_ternary.hpp b/include/eve/module/core/regular/bit_ternary.hpp
@@ -18,7 +18,6 @@
 #include <eve/module/core/regular/bit_xor.hpp>
 #include <eve/module/core/regular/bit_cast.hpp>
 #include <eve/module/core/regular/bit_select.hpp>
-#include <iostream>
 
 namespace eve
 {
@@ -125,7 +124,6 @@ namespace eve
     {
       using T   = bit_value_t<T0, T1, T2>;
       using i_t = as_integer_t<T, unsigned>;
-//      std::cout << "rezut et zut" << std::endl;
       auto wa = as_wide_as_t<T0, i_t>(x);
       auto wb = as_wide_as_t<T1, i_t>(y);
       auto wc = as_wide_as_t<T2, i_t>(z);

diff --git a/include/eve/module/core/regular/div.hpp b/include/eve/module/core/regular/div.hpp
@@ -15,7 +15,7 @@ namespace eve
 {
   template<typename Options>
   struct div_t : tuple_callable<div_t, Options, saturated_option, upward_option, downward_option,
-                                to_nearest_option, toward_zero_option>
+                                to_nearest_option, toward_zero_option, upper_option, lower_option>
   {
     template<eve::value T0, value T1, value... Ts>
     requires(eve::same_lanes_or_scalar<T0, T1, Ts...>)
@@ -63,9 +63,11 @@ namespace eve
 //!      constexpr auto div[downward](/*any of the above overloads*/)                 noexcept; // 4
 //!      constexpr auto div[toward_zero](/*any of the above overloads*/)              noexcept; // 4
 //!      constexpr auto div[to_nearest](/*any of the above overloads*/)               noexcept; // 4
+//!      constexpr auto div[lower](/*any of the above overloads*/)                    noexcept; // 5
+//!      constexpr auto div[upper](/*any of the above overloads*/)                    noexcept; // 6
 //!
 //!      // Semantic options
-//!      constexpr auto div[saturated](integral_value auto x, integral_value auto y)) noexcept; // 5
+//!      constexpr auto div[saturated](integral_value auto x, integral_value auto y)) noexcept; // 7
 //!   }
 //!   @endcode
 //!
@@ -88,7 +90,11 @@ namespace eve
 //!           * `eve::floor(div(x, z))`, if `d` is `downward`.
 //!           * `eve::ceil(div(x,  z))`, if `d` is `upward`.
 //!           * `eve::nearest(div(x, z))`, if `d` is `to_nearest`.
-//!      5. computes the saturated division of `x` by  `y`.
+//!      5. The floating division is computed in a rounding mode such that the result is guaranted
+//!       to be less or equal to the exact one (except for Nans).
+//!      6. The floating division is computed in a rounding mode such that the result is guaranted
+//!       to be greater or equal to the exact one (except for Nans).
+//!      7. computes the saturated division of `x` by  `y`.
 //!         The result is always defined even if the denominator is 0.
 //!
 //!         The relevant cases are just in fact the division by 0 for integral types

diff --git a/include/eve/module/core/regular/impl/add.hpp b/include/eve/module/core/regular/impl/add.hpp
@@ -7,24 +7,45 @@
 //==================================================================================================
 #pragma once
 
+#include <cfenv>
 #include <eve/concept/value.hpp>
 #include <eve/module/core/regular/min.hpp>
 #include <eve/module/core/regular/max.hpp>
 #include <eve/module/core/regular/sub.hpp>
 #include <eve/module/core/regular/is_ltz.hpp>
 #include <eve/module/core/regular/is_less.hpp>
+#include <eve/module/core/regular/is_gtz.hpp>
 #include <eve/module/core/regular/if_else.hpp>
 #include <eve/module/core/regular/bit_or.hpp>
 #include <eve/module/core/regular/bit_mask.hpp>
+#include <eve/module/core/regular/two_add.hpp>
 #include <eve/module/core/constant/valmax.hpp>
 #include <eve/module/core/constant/valmin.hpp>
+#include <eve/module/core/detail/roundings.hpp>
 
 namespace eve::detail
 {
+
   template<callable_options O, typename T>
   EVE_FORCEINLINE constexpr T add_(EVE_REQUIRES(cpu_), O const&, T a, T b) noexcept
   {
-    if constexpr(O::contains(saturated) && integral_value<T>)
+    if constexpr(floating_value<T> && (O::contains(lower) || O::contains(upper) ))
+    {
+      using namespace spy::literal;
+      if constexpr(spy::compiler == spy::clang_ || spy::compiler >= 13_gcc || spy::compiler == spy::msvc_)
+      {
+        return with_rounding<O>(eve::add, a, b);
+      }
+      else
+      {
+       auto [r, e] = eve::two_add(a, b);
+       if constexpr(O::contains(lower))
+         return eve::prev[eve::is_ltz(e)](r);
+       else
+         return eve::next[eve::is_gtz(e)](r);
+      }
+    }
+    else if constexpr(O::contains(saturated) && integral_value<T>)
     {
       if constexpr( signed_integral_value<T> )
       {
@@ -63,8 +84,8 @@ namespace eve::detail
   {
     //TODO: both GCC and Clang can fail to properly reorder the op chain to reduce dependencies
     //      we might want to do this manually
-    r0   = add[o](r0,r1);
-    ((r0 = add[o](r0,rs)),...);
-    return r0;
+      r0   = add[o](r0,r1);
+      ((r0 = add[o](r0,rs)),...);
+      return r0;
   }
 }
diff --git a/include/eve/module/core/regular/impl/div.hpp b/include/eve/module/core/regular/impl/div.hpp
@@ -7,6 +7,7 @@
 //==================================================================================================
 #pragma once
 
+#include <cfenv>
 #include <eve/assert.hpp>
 #include <eve/concept/value.hpp>
 #include <eve/traits/common_value.hpp>
@@ -28,14 +29,15 @@
 #include <eve/module/core/regular/fnma.hpp>
 #include <eve/module/core/regular/if_else.hpp>
 #include <eve/module/core/regular/inc.hpp>
-#include <eve/module/core/regular/is_ltz.hpp>
+#include <eve/module/core/regular/is_negative.hpp>
 #include <eve/module/core/regular/is_odd.hpp>
 #include <eve/module/core/regular/minus.hpp>
 #include <eve/module/core/regular/nearest.hpp>
 #include <eve/module/core/regular/saturate.hpp>
 #include <eve/module/core/regular/round.hpp>
 #include <eve/module/core/regular/fms.hpp>
 #include <eve/module/core/regular/shr.hpp>
+#include <eve/module/core/detail/roundings.hpp>
 
 
 #ifdef EVE_COMP_IS_MSVC
@@ -49,7 +51,29 @@ namespace eve::detail
   template<callable_options O, typename T>
   EVE_FORCEINLINE constexpr T div_(EVE_REQUIRES(cpu_), O const& o, T a, T b) noexcept
   {
-    if constexpr(O::contains(saturated))
+    if constexpr(floating_value<T> && (O::contains(upper) || O::contains(lower) ))
+    {
+      using namespace spy::literal;
+      if constexpr(spy::compiler == spy::clang_ || spy::compiler >= 13_gcc || spy::compiler == spy::msvc_)
+      {
+        return with_rounding<O> (eve::div, a, b);
+      }
+      else
+      {
+        auto negb = is_negative(b);
+        b = if_else(negb, -b, b);
+        a = if_else(negb, -a, a);
+        auto d = div(a, b);
+        auto [r, e] = two_prod(d, b);
+        if constexpr(O::contains(upper))
+        {
+          return next[r < a || ((r ==  a) && is_ltz(e))](d);
+        }
+        else
+          return prev[r > a || ((r ==  a) && is_gtz(e))](d);
+      }
+    }
+    else if constexpr(O::contains(saturated))
     {
       if constexpr( integral_value<T> )
       {
@@ -173,7 +197,7 @@ namespace eve::detail
             auto r1   = fnma(b, q, a);
             auto r2   = minus[ltzb](b) - r1;
             auto cond = (r1 > r2) || ((r1 == r2) && is_odd(q));
-            
+
             return if_else(is_ltz(b), dec[cond](q), inc[cond](q));
           }
         }
@@ -198,7 +222,10 @@ namespace eve::detail
   template<typename T, std::same_as<T>... Ts, callable_options O>
   EVE_FORCEINLINE constexpr T div_(EVE_REQUIRES(cpu_), O const & o, T r0, T r1, Ts... rs) noexcept
   {
-    auto that((r1 * (rs * ...)));
+    auto that = r1;
+    if (O::contains(upper))  that = mul[lower](r1, rs...);
+    else if  (O::contains(lower))  that = mul[upper](r1, rs...);
+    else that = mul(r1, rs...);
     if constexpr(std::is_integral_v<eve::element_type_t<T>>)
       EVE_ASSERT(eve::all(is_nez(that)), "[eve] div - 0/0 is undefined");
     return div[o](r0,that);
@@ -213,7 +240,6 @@ namespace eve::detail
   }
 }
 
-
 #ifdef EVE_COMP_IS_MSVC
 #  pragma warning(pop)
 #endif