Skip to content

Commit

Permalink
More extra precision functions
Browse files Browse the repository at this point in the history
  • Loading branch information
jtlap authored Oct 3, 2024
1 parent 73de402 commit 0315922
Show file tree
Hide file tree
Showing 35 changed files with 408 additions and 135 deletions.
1 change: 1 addition & 0 deletions include/eve/module/core/regular/.#fam.hpp
2 changes: 0 additions & 2 deletions include/eve/module/core/regular/add.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,6 @@ namespace eve
//! constexpr auto add[upper](/*any of the above overloads*/) noexcept; // 6
//! constexpr auto add[lower][strict](/*any of the above overloads*/) noexcept; // 5
//! constexpr auto add[upper][strict](/*any of the above overloads*/) noexcept; // 6
//! constexpr auto div[lower][strict](/*any of the above overloads*/) noexcept; // 5
//! constexpr auto div[upper][strict](/*any of the above overloads*/) noexcept; // 6
//!
//! }
//! @endcode
Expand Down
31 changes: 30 additions & 1 deletion include/eve/module/core/regular/diff_of_prod.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
namespace eve
{
template<typename Options>
struct diff_of_prod_t : elementwise_callable<diff_of_prod_t, Options, raw_option, pedantic_option>
struct diff_of_prod_t : elementwise_callable<diff_of_prod_t, Options, raw_option, pedantic_option, lower_option,
upper_option, strict_option>
{
template<value T, value U, value V, value W>
requires(eve::same_lanes_or_scalar<T, U, V, W>)
Expand Down Expand Up @@ -97,6 +98,34 @@ namespace eve
T cd = mul(c, d);
return fms(a, b, cd);
}
else if constexpr(floating_value<T> && (O::contains(lower) || O::contains(upper) ))
{
if constexpr(O::contains(strict))
{
auto r = diff_of_prod[o.drop(lower, upper, strict)](a, b, c, d);
if constexpr(O::contains(lower))
return prev(r);
else
return next(r);
}
else
{
T cdl = mul[lower](c, d);
T cdu = mul[upper](c, d);
if constexpr(O::contains(upper))
{
auto err = fnma[o](c, d, cdu);
auto dop = fms[o](a, b, cdl);
return add[o][is_finite(err)](dop, err);
}
else
{
auto err = fnma[o](c, d, cdl);
auto dop = fms[o](a, b, cdu);
return add[o][is_finite(err)](dop, err);
}
}
}
else
{
auto cd = mul[o](c, d);
Expand Down
6 changes: 5 additions & 1 deletion include/eve/module/core/regular/dist.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ namespace eve
{
template<typename Options>
struct dist_t : elementwise_callable<dist_t, Options, saturated_option, pedantic_option,
upper_option, lower_option, strict_option>
upper_option, lower_option, strict_option>
{
template<value T, value U>
requires(eve::same_lanes_or_scalar<T, U>)
Expand Down Expand Up @@ -102,3 +102,7 @@ namespace eve
}
}
}

#if defined(EVE_INCLUDE_NEON_HEADER)
# include <eve/module/core/regular/impl/simd/arm/neon/dist.hpp>
#endif
2 changes: 1 addition & 1 deletion include/eve/module/core/regular/fam.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ namespace eve
//! 2. [The operation is performed conditionnaly](@ref conditional)
//! 3. `pedantic` option always ensures the full compliance to fam properties. This can be very expensive if the system
//! has no hardware capability.
//! 4. TO DO : DESCRIBE
//! 4. The operation is performed as if the parameters where promoted to the common type of the three parameters.
//!
//! @groupheader{External references}
//! * [C++ standard reference](https://en.cppreference.com/w/cpp/numeric/special_functions/fma)
Expand Down
2 changes: 1 addition & 1 deletion include/eve/module/core/regular/fanm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ namespace eve
//! 2. [The operation is performed conditionnaly](@ref conditional)
//! 3. `pedantic` option always ensures the full compliance to fam properties. This can be very expensive if the system
//! has no hardware capability.
//! 4. TO DO : DESCRIBE
//! 4. The operation is performed as if the parameters where promoted to the common type of the three parameters.
//!
//! @groupheader{Example}
//! @godbolt{doc/core/fanm.cpp}
Expand Down
2 changes: 1 addition & 1 deletion include/eve/module/core/regular/fma.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ namespace eve
//! 2. [The operation is performed conditionnaly](@ref conditional)
//! 3. `pedantic` option always ensures the full compliance to fam properties. This can be very expensive if the system
//! has no hardware capability.
//! 4. TO DO : DESCRIBE
//! 4. The operation is performed as if the parameters where promoted to the common type of the three parameters.
//! 5. The operation is computed in a 'round toward \f$-\infty\f$ mode. The result is guaranted
//! to be less or equal to the exact one (except for Nans). Combined with `strict` the option
//! ensures generally faster computation, but strict inequality.
Expand Down
2 changes: 1 addition & 1 deletion include/eve/module/core/regular/fms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ namespace eve
//! 2. [The operation is performed conditionnaly](@ref conditional)
//! 3. `pedantic` option always ensures the full compliance to fam properties. This can be very expensive if the system
//! has no hardware capability.
//! 4. TO DO : DESCRIBE
//! 4. The operation is performed as if the parameters where promoted to the common type of the three parameters.
//!
//! @groupheader{External references}
//! * [C++ standard reference](https://en.cppreference.com/w/cpp/numeric/special_functions/fma)
Expand Down
2 changes: 1 addition & 1 deletion include/eve/module/core/regular/fnma.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ namespace eve
//! 2. [The operation is performed conditionnaly](@ref conditional)
//! 3. `pedantic` option always ensures the full compliance to fam properties. This can be very expensive if the system
//! has no hardware capability.
//! 4. TO DO : DESCRIBE
//! 4. The operation is performed as if the parameters where promoted to the common type of the three parameters.
//!
//! @groupheader{External references}
//! * [C++ standard reference](https://en.cppreference.com/w/cpp/numeric/special_functions/fma)
Expand Down
2 changes: 1 addition & 1 deletion include/eve/module/core/regular/fnms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ namespace eve
//! 2. [The operation is performed conditionnaly](@ref conditional)
//! 3. `pedantic` option always ensures the full compliance to fam properties. This can be very expensive if the system
//! has no hardware capability.
//! 4. TO DO : DESCRIBE
//! 4. The operation is performed as if the parameters where promoted to the common type of the three parameters.
//!
//! @groupheader{External references}
//! * [C++ standard reference](https://en.cppreference.com/w/cpp/numeric/special_functions/fma)
Expand Down
3 changes: 2 additions & 1 deletion include/eve/module/core/regular/fsm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ struct fsm_t : strict_elementwise_callable<fsm_t, Options, pedantic_option, prom
//! 2. [The operation is performed conditionnaly](@ref conditional)
//! 3. `pedantic` option always ensures the full compliance to fsm properties. This can be very expensive if the system
//! has no hardware capability.
//! 4. TO DO : DESCRIBE
//! 4. The operation is performed as if the parameters where promoted to the common type of the three parameters.
//!
//! @groupheader{External references}
//! * [C++ standard reference](https://en.cppreference.com/w/cpp/numeric/special_functions/fma)
Expand All @@ -94,6 +94,7 @@ struct fsm_t : strict_elementwise_callable<fsm_t, Options, pedantic_option, prom
{
return fms[o](b, c, a);
}

}
}

Expand Down
2 changes: 1 addition & 1 deletion include/eve/module/core/regular/fsnm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ namespace eve
//! 2. [The operation is performed conditionnaly](@ref conditional)
//! 3. `pedantic` option always ensures the full compliance to fsnm properties. This can be very expensive if the system
//! has no hardware capability.
//! 4. TO DO : DESCRIBE
//! 4. The operation is performed as if the parameters where promoted to the common type of the three parameters.
//!
//! @groupheader{External references}
//! * [C++ standard reference](https://en.cppreference.com/w/cpp/numeric/special_functions/fma)
Expand Down
55 changes: 55 additions & 0 deletions include/eve/module/core/regular/impl/simd/arm/neon/dist.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
//==================================================================================================
/*
EVE - Expressive Vector Engine
Copyright : EVE Project Contributors
SPDX-License-Identifier: BSL-1.0
*/
//==================================================================================================
#pragma once

#include <eve/concept/value.hpp>
#include <eve/detail/abi.hpp>
#include <eve/detail/category.hpp>
#include <eve/forward.hpp>

namespace eve::detail
{
template<callable_options O, arithmetic_scalar_value T, typename N>
EVE_FORCEINLINE wide<T, N> dist_(EVE_REQUIRES(neon128_), O const& opts,
wide<T, N> v, wide<T, N> w) noexcept
requires arm_abi<abi_t<T, N>>
{
if constexpr((O::contains(saturated) && std::integral<T>) || O::contains(lower) || O::contains(upper))
{
return dist.behavior(cpu_{}, opts, v, w);
}
else
{
constexpr auto c = categorize<wide<T, N>>();

if constexpr( c == category::int64x1 ) return vabd_s64 (v, w);
else if constexpr( c == category::int64x2 ) return vabdq_s64(v, w);
else if constexpr( c == category::uint64x1 ) return vabd_u64 (v, w);
else if constexpr( c == category::uint64x2 ) return vabdq_u64(v, w);
else if constexpr( c == category::int32x2 ) return vabd_s32 (v, w);
else if constexpr( c == category::int32x4 ) return vabdq_s32(v, w);
else if constexpr( c == category::uint32x2 ) return vabd_u32 (v, w);
else if constexpr( c == category::uint32x4 ) return vabdq_u32(v, w);
else if constexpr( c == category::int16x4 ) return vabd_s16 (v, w);
else if constexpr( c == category::int16x8 ) return vabdq_s16(v, w);
else if constexpr( c == category::uint16x4 ) return vabd_u16 (v, w);
else if constexpr( c == category::uint16x8 ) return vabdq_u16(v, w);
else if constexpr( c == category::int8x8 ) return vabd_s8 (v, w);
else if constexpr( c == category::int8x16 ) return vabdq_s8 (v, w);
else if constexpr( c == category::uint8x8 ) return vabd_u8 (v, w);
else if constexpr( c == category::uint8x16 ) return vabdq_u8 (v, w);
else if constexpr( c == category::float32x2 ) return vabd_f32 (v, w);
else if constexpr( c == category::float32x4 ) return vabdq_f32(v, w);
else if constexpr( current_api >= asimd )
{
if constexpr( c == category::float64x1 ) return vabd_f64 (v, w);
else if constexpr( c == category::float64x2 ) return vabdq_f64 (v, w);
}
}
}
}
7 changes: 3 additions & 4 deletions include/eve/module/core/regular/impl/simd/arm/sve/fsm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,18 @@ namespace eve::detail
requires sve_abi<abi_t<T, N>>
EVE_FORCEINLINE wide<T, N> fsm_(EVE_REQUIRES(sve_), O const& o, wide<T, N> a, wide<T, N> b,wide<T, N> c) noexcept
{
return fms[o](c, b, a);
return fms[o](b, c, a);
}

template<conditional_expr C, typename T, typename N, callable_options O>
requires sve_abi<abi_t<T, N>>
EVE_FORCEINLINE wide<T, N> fsm_(EVE_REQUIRES(sve_), C m, O const&, wide<T,N> a, wide<T,N> b, wide<T,N> c) noexcept
EVE_FORCEINLINE wide<T, N> fsm_(EVE_REQUIRES(sve_), C m, O const&o, wide<T,N> a, wide<T,N> b, wide<T,N> c) noexcept
{
// This is done so the masking use a and not -a as source
// We don't care about PEDANTIC as this is a proper FMA.
// We don't care about PROMOTE as we only accept similar types.
[[maybe_unused]] auto const alt = alternative(m, a, as(a));
if constexpr( C::is_complete ) return alt;
else if constexpr( !C::has_alternative ) return minus[m](wide<T, N>(svmls_m(m.mask(as<T>{}), a, b, c)));
else return if_else(m, -eve::fanm(a, b, c), alt);
else return if_else(m, eve::fms[o.drop(condition_key)](b, c, a), alt);
}
}
8 changes: 4 additions & 4 deletions include/eve/module/core/regular/impl/simd/x86/fam.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ namespace eve::detail
template<conditional_expr C, arithmetic_scalar_value T, typename N, callable_options O>
EVE_FORCEINLINE wide<T, N> fam_(EVE_REQUIRES(avx512_),
C const &mask,
O const &,
O const &opts,
wide<T, N> const &v,
wide<T, N> const &w,
wide<T, N> const &x) noexcept
Expand All @@ -35,8 +35,8 @@ namespace eve::detail
{
constexpr auto c = categorize<wide<T, N>>();
[[maybe_unused]] auto const m = expand_mask(mask, as(v)).storage().value;

if constexpr( c == category::float32x16) return _mm512_mask3_fmadd_ps(w, x, v, m);
if ((O::contains(lower) || O::contains(upper))&& floating_value<T>) return if_else(mask, eve::fam[opts.drop(condition_key)](v, w, x), v);
else if constexpr( c == category::float32x16) return _mm512_mask3_fmadd_ps(w, x, v, m);
else if constexpr( c == category::float64x8 ) return _mm512_mask3_fmadd_pd(w, x, v, m);
else if constexpr( c == category::float32x8 ) return _mm256_mask3_fmadd_ps(w, x, v, m);
else if constexpr( c == category::float64x4 ) return _mm256_mask3_fmadd_pd(w, x, v, m);
Expand All @@ -45,6 +45,6 @@ namespace eve::detail
// No rounding issue with integers, so we just mask over regular FMA
else return if_else(mask, eve::fam(v, w, x), v);
}
else return if_else(mask, eve::fam(v, w, x), alternative(mask, v, as(v)));
else return if_else(mask, eve::fam[opts.drop(condition_key)](v, w, x), alternative(mask, v, as(v)));
}
}
14 changes: 7 additions & 7 deletions include/eve/module/core/regular/impl/simd/x86/fma.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,17 +106,17 @@ namespace eve::detail
if constexpr(current_api >= avx512)
{
auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC;
if constexpr ( c == category::float64x8 ) return _mm512_mask_fmadd_round_pd (a, m, b, c, dir);
else if constexpr ( c == category::float32x16 ) return _mm512_mask_fmadd_round_ps (a, m, b, c, dir);
else if constexpr ( c == category::float64x4 || c == category::float64x2 ||
c == category::float32x8 || c == category::float32x4 || c == category::float32x2)
if constexpr ( cx == category::float64x8 ) return _mm512_mask_fmadd_round_pd (a, m, b, c, dir);
else if constexpr ( cx == category::float32x16 ) return _mm512_mask_fmadd_round_ps (a, m, b, c, dir);
else if constexpr ( cx == category::float64x4 || cx == category::float64x2 ||
cx == category::float32x8 || cx == category::float32x4 || cx == category::float32x2)
{
auto aa = eve::combine(a, a);
auto bb = eve::combine(b, b);
auto cc = eve::combine(c, c);
auto aabbcc = fma[opts.drop(condition_key)](aa, bb, cc);
auto s = slice(aabbcc, eve::upper_);
return if_else(cx,s,src);
return if_else(mask,s,src);
}
else return fma.behavior(cpu_{}, opts, a, b, c);
}
Expand All @@ -131,8 +131,8 @@ namespace eve::detail
else if constexpr( cx == category::float32x4 ) return _mm_mask_fmadd_ps (a, m, b, c);
else if constexpr( cx == category::float64x2 ) return _mm_mask_fmadd_pd (a, m, b, c);
// No rounding issue with integers, so we just mask over regular FMA
else return if_else(mask, eve::fma(a, b, c), a);
else return if_else(mask, eve::fma[opts.drop(condition_key)](a, b, c), a);
}
else return if_else(mask, eve::fma(a, b, c), alternative(mask, a, as(a)));
else return if_else(mask, eve::fma[opts.drop(condition_key)](a, b, c), alternative(mask, a, as(a)));
}
}
26 changes: 13 additions & 13 deletions include/eve/module/core/regular/impl/simd/x86/fms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ namespace eve::detail
if constexpr( C::is_complete ) return alternative(mask, v, as(v));
else if constexpr( !C::has_alternative )
{
constexpr auto c = categorize<wide<T, N>>();
constexpr auto cx = categorize<wide<T, N>>();
auto src = alternative(mask, v, as<wide<T, N>> {});
[[maybe_unused]] auto const m = expand_mask(mask, as(v)).storage().value;

Expand All @@ -110,33 +110,33 @@ namespace eve::detail
if constexpr(current_api >= avx512)
{
auto constexpr dir =(O::contains(lower) ? _MM_FROUND_TO_NEG_INF : _MM_FROUND_TO_POS_INF) |_MM_FROUND_NO_EXC;
if constexpr ( c == category::float64x8 ) return _mm512_mask_fmsub_round_pd (v, m, w, x, dir);
else if constexpr ( c == category::float32x16 ) return _mm512_mask_fmsub_round_ps (v, m, w, x, dir);
else if constexpr ( c == category::float64x4 || c == category::float64x2 ||
c == category::float32x8 || c == category::float32x4 || c == category::float32x2)
if constexpr ( cx == category::float64x8 ) return _mm512_mask_fmsub_round_pd (v, m, w, x, dir);
else if constexpr ( cx == category::float32x16 ) return _mm512_mask_fmsub_round_ps (v, m, w, x, dir);
else if constexpr ( cx == category::float64x4 || cx == category::float64x2 ||
cx == category::float32x8 || cx == category::float32x4 || cx == category::float32x2)
{
auto aa = eve::combine(v, v);
auto bb = eve::combine(w, w);
auto cc = eve::combine(x, x);
auto aabbcc = fms[opts.drop(condition_key)](aa, bb, cc);
auto s = slice(aabbcc, eve::upper_);
return if_else(c,s,src);
return if_else(mask,s,src);
}
else return fms.behavior(cpu_{}, opts, v, w, x);
}
else return fms.behavior(cpu_{}, opts, v, w, x);
}
else return fms.behavior(cpu_{}, opts, v, w, x);
}
else if constexpr( c == category::float32x16) return _mm512_mask_fmsub_ps(v, m, w, x);
else if constexpr( c == category::float64x8 ) return _mm512_mask_fmsub_pd(v, m, w, x);
else if constexpr( c == category::float32x8 ) return _mm256_mask_fmsub_ps(v, m, w, x);
else if constexpr( c == category::float64x4 ) return _mm256_mask_fmsub_pd(v, m, w, x);
else if constexpr( c == category::float32x4 ) return _mm_mask_fmsub_ps(v, m, w, x);
else if constexpr( c == category::float64x2 ) return _mm_mask_fmsub_pd(v, m, w, x);
else if constexpr( cx == category::float32x16) return _mm512_mask_fmsub_ps(v, m, w, x);
else if constexpr( cx == category::float64x8 ) return _mm512_mask_fmsub_pd(v, m, w, x);
else if constexpr( cx == category::float32x8 ) return _mm256_mask_fmsub_ps(v, m, w, x);
else if constexpr( cx == category::float64x4 ) return _mm256_mask_fmsub_pd(v, m, w, x);
else if constexpr( cx == category::float32x4 ) return _mm_mask_fmsub_ps(v, m, w, x);
else if constexpr( cx == category::float64x2 ) return _mm_mask_fmsub_pd(v, m, w, x);
// No rounding issue with integers, so we just mask over regular FMS
else return if_else(mask, eve::fms(v, w, x), v);
}
else return if_else(mask, eve::fms(v, w, x), alternative(mask, v, as(v)));
else return if_else(mask, eve::fms[opts.drop(condition_key)](v, w, x), alternative(mask, v, as(v)));
}
}
Loading

0 comments on commit 0315922

Please sign in to comment.