From fb06f16c67c7b6e171de4c0f16856486f3f6c071 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Wed, 15 Jan 2025 18:08:41 +0100 Subject: [PATCH 01/19] stash --- include/eve/arch/cpu/wide.hpp | 12 ++++++------ include/eve/module/core/regular/is_less.hpp | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/eve/arch/cpu/wide.hpp b/include/eve/arch/cpu/wide.hpp index 1ec6953396..c912e9b9fc 100644 --- a/include/eve/arch/cpu/wide.hpp +++ b/include/eve/arch/cpu/wide.hpp @@ -861,32 +861,32 @@ namespace eve friend EVE_FORCEINLINE auto operator!=(S v, wide w) noexcept { return w != v; } //! @brief Element-wise less-than comparison between eve::wide - friend EVE_FORCEINLINE auto operator<(wide v, wide w) noexcept + friend EVE_FORCEINLINE auto operator<(wide a, wide b) noexcept #if !defined(EVE_DOXYGEN_INVOKED) requires(supports_ordering_v) #endif { - return detail::self_less(v, w); + return is_less(a, b); } //! @brief Element-wise less-than comparison between a eve::wide and a scalar template - friend EVE_FORCEINLINE auto operator<(wide v, S w) noexcept + friend EVE_FORCEINLINE auto operator<(wide w, S s) noexcept #if !defined(EVE_DOXYGEN_INVOKED) requires(supports_ordering_v) #endif { - return v < wide {w}; + return is_less(w, wide{s}); } //! @brief Element-wise less-than comparison between a scalar and a eve::wide template - friend EVE_FORCEINLINE auto operator<(S v, wide w) noexcept + friend EVE_FORCEINLINE auto operator<(S s, wide w) noexcept #if !defined(EVE_DOXYGEN_INVOKED) requires(supports_ordering_v) #endif { - return wide {v} < w; + return is_less(wide{s}, w); } //! @brief Element-wise greater-than comparison between eve::wide diff --git a/include/eve/module/core/regular/is_less.hpp b/include/eve/module/core/regular/is_less.hpp index df4aa081c7..e8820773a5 100644 --- a/include/eve/module/core/regular/is_less.hpp +++ b/include/eve/module/core/regular/is_less.hpp @@ -24,9 +24,9 @@ namespace eve template struct is_less_t : strict_elementwise_callable { - template - requires(eve::same_lanes_or_scalar) - constexpr EVE_FORCEINLINE common_logical_t operator()(T a, U b) const + template + constexpr EVE_FORCEINLINE common_logical_t operator()(T a, U b) const + requires (same_lanes_or_scalar) { // static_assert( valid_tolerance, Options>::value, "[eve::is_less] simd tolerance requires at least one simd parameter." ); return EVE_DISPATCH_CALL(a, b); @@ -102,7 +102,7 @@ namespace eve EVE_FORCEINLINE constexpr common_logical_t is_less_(EVE_REQUIRES(cpu_), O const&, logical a, logical b) noexcept { - if constexpr( scalar_value && scalar_value) return common_logical_t(a < b); + if constexpr (scalar_value && scalar_value) return common_logical_t(a < b); else return a < b; } @@ -110,7 +110,7 @@ namespace eve EVE_FORCEINLINE constexpr common_logical_t is_less_(EVE_REQUIRES(cpu_), O const & o, T const& aa, U const& bb) noexcept { - if constexpr(O::contains(definitely)) + if constexpr (O::contains(definitely)) { using w_t = common_value_t; auto a = w_t(aa); @@ -122,7 +122,7 @@ namespace eve } else { - if constexpr(scalar_value && scalar_value) return common_logical_t(aa < bb); + if constexpr (scalar_value && scalar_value) return common_logical_t(aa < bb); else return aa < bb; } } From 5227338bb1fcbfc00a4e65450dee7cf11c61b528 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Thu, 16 Jan 2025 14:36:13 +0100 Subject: [PATCH 02/19] take 1 --- include/eve/concept/element_type.hpp | 20 +++ include/eve/concept/value.hpp | 19 ++ .../detail/function/simd/arm/neon/friends.hpp | 39 ---- .../detail/function/simd/arm/sve/friends.hpp | 5 - .../detail/function/simd/common/friends.hpp | 14 -- .../eve/detail/function/simd/ppc/friends.hpp | 7 - .../detail/function/simd/riscv/friends.hpp | 31 ---- .../eve/detail/function/simd/x86/friends.hpp | 80 --------- .../eve/module/core/regular/impl/is_less.hpp | 42 +++++ .../regular/impl/simd/arm/neon/is_less.hpp | 55 ++++++ .../regular/impl/simd/arm/sve/is_less.hpp | 22 +++ .../core/regular/impl/simd/ppc/is_less.hpp | 24 +++ .../core/regular/impl/simd/riscv/is_less.hpp | 33 ++++ .../core/regular/impl/simd/x86/is_less.hpp | 166 +++++++++++++----- include/eve/module/core/regular/is_less.hpp | 55 +++--- 15 files changed, 360 insertions(+), 252 deletions(-) create mode 100644 include/eve/concept/element_type.hpp create mode 100644 include/eve/module/core/regular/impl/is_less.hpp create mode 100644 include/eve/module/core/regular/impl/simd/arm/neon/is_less.hpp create mode 100644 include/eve/module/core/regular/impl/simd/arm/sve/is_less.hpp create mode 100644 include/eve/module/core/regular/impl/simd/ppc/is_less.hpp create mode 100644 include/eve/module/core/regular/impl/simd/riscv/is_less.hpp diff --git a/include/eve/concept/element_type.hpp b/include/eve/concept/element_type.hpp new file mode 100644 index 0000000000..8fbbc4aaa7 --- /dev/null +++ b/include/eve/concept/element_type.hpp @@ -0,0 +1,20 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include + +namespace eve +{ + template + concept same_element_type = (std::same_as, element_type_t> || ...); + + template + concept same_element_type_or_scalar = (scalar_value || ... || scalar_value) || same_element_type; +} diff --git a/include/eve/concept/value.hpp b/include/eve/concept/value.hpp index 895ef0ee48..53a161ce72 100644 --- a/include/eve/concept/value.hpp +++ b/include/eve/concept/value.hpp @@ -151,6 +151,25 @@ namespace eve //! @} //================================================================================================ + //================================================================================================ + //! @ingroup simd_concepts + //! @{ + //! @concept arithmetic_value + //! @brief The concept `plain_value` is satisfied if and only if T satisfies + //! `eve::arithmetic_simd_value` or `eve::arithmetic_scalar_value`. + //! + //! @groupheader{Examples} + //! - `int` + //! - `eve::wide` + //! - `kumi::tuple` + //! - `eve::wide>` + //================================================================================================ + template + concept arithmetic_value = arithmetic_simd_value || arithmetic_scalar_value; + //================================================================================================ + //! @} + //================================================================================================ + //================================================================================================ //! @ingroup simd_concepts //! @{ diff --git a/include/eve/detail/function/simd/arm/neon/friends.hpp b/include/eve/detail/function/simd/arm/neon/friends.hpp index 26f9d9212d..b0226d408c 100644 --- a/include/eve/detail/function/simd/arm/neon/friends.hpp +++ b/include/eve/detail/function/simd/arm/neon/friends.hpp @@ -62,45 +62,6 @@ namespace eve::detail return !(v == w); } - //================================================================================================ - // operator!= implementation - //================================================================================================ - template - EVE_FORCEINLINE logical> self_less ( wide v - , wide w - ) noexcept - requires arm_abi> - { - constexpr auto cat = categorize>(); - - if constexpr( cat == category::int32x4 ) return vcltq_s32(v, w); - else if constexpr( cat == category::int16x8 ) return vcltq_s16(v, w); - else if constexpr( cat == category::int8x16 ) return vcltq_s8(v, w); - else if constexpr( cat == category::uint32x4 ) return vcltq_u32(v, w); - else if constexpr( cat == category::uint16x8 ) return vcltq_u16(v, w); - else if constexpr( cat == category::uint8x16 ) return vcltq_u8(v, w); - else if constexpr( cat == category::float32x4) return vcltq_f32(v, w); - else if constexpr( cat == category::int32x2 ) return vclt_s32(v, w); - else if constexpr( cat == category::int16x4 ) return vclt_s16(v, w); - else if constexpr( cat == category::int8x8 ) return vclt_s8(v, w); - else if constexpr( cat == category::uint32x2 ) return vclt_u32(v, w); - else if constexpr( cat == category::uint16x4 ) return vclt_u16(v, w); - else if constexpr( cat == category::uint8x8 ) return vclt_u8(v, w); - else if constexpr( cat == category::float32x2) return vclt_f32(v, w); - else if constexpr( current_api >= asimd) - { - if constexpr( cat == category::float64x1) return vclt_f64(v, w); - else if constexpr( cat == category::int64x1) return vclt_s64(v, w); - else if constexpr( cat == category::uint64x1) return vclt_u64(v, w); - else if constexpr( cat == category::float64x2) return vcltq_f64(v, w); - else if constexpr( cat == category::int64x2) return vcltq_s64(v, w); - else if constexpr( cat == category::uint64x2) return vcltq_u64(v, w); - } - else if constexpr( sizeof(T) == 8 ) - return map([](E const& e, E const& f){ return as_logical_t(e < f); }, v, w); - - } - template EVE_FORCEINLINE logical> self_greater( wide v , wide w diff --git a/include/eve/detail/function/simd/arm/sve/friends.hpp b/include/eve/detail/function/simd/arm/sve/friends.hpp index 4c3432d26f..a9b0029a37 100644 --- a/include/eve/detail/function/simd/arm/sve/friends.hpp +++ b/include/eve/detail/function/simd/arm/sve/friends.hpp @@ -22,11 +22,6 @@ EVE_FORCEINLINE auto self_neq(wide v, wide w) noexcept -> as_logical_t> requires sve_abi> { return svcmpne(sve_true(), v, w); } -template -EVE_FORCEINLINE auto -self_less(wide v, wide w) noexcept -> as_logical_t> -requires sve_abi> { return svcmplt(sve_true(), v, w); } - template EVE_FORCEINLINE auto self_greater(wide v, wide w) noexcept -> as_logical_t> diff --git a/include/eve/detail/function/simd/common/friends.hpp b/include/eve/detail/function/simd/common/friends.hpp index 6caa82a3a8..073e6e6752 100644 --- a/include/eve/detail/function/simd/common/friends.hpp +++ b/include/eve/detail/function/simd/common/friends.hpp @@ -99,20 +99,6 @@ namespace eve::detail //================================================================================================ // Ordering operators - template - EVE_FORCEINLINE auto self_less(Wide const& v,Wide const& w) noexcept - { - if constexpr( product_type ) - { - return convert(kumi::to_tuple(v) < kumi::to_tuple(w), as_element>()); - } - else - { - constexpr auto lt = [](E const& e, E const& f) { return as_logical_t(e < f); }; - return apply_over(lt, v, w); - } - } - template EVE_FORCEINLINE auto self_leq(Wide const& v,Wide const& w) noexcept { diff --git a/include/eve/detail/function/simd/ppc/friends.hpp b/include/eve/detail/function/simd/ppc/friends.hpp index 6352c14d4c..3cbc940e3c 100644 --- a/include/eve/detail/function/simd/ppc/friends.hpp +++ b/include/eve/detail/function/simd/ppc/friends.hpp @@ -28,13 +28,6 @@ namespace eve::detail return logical>(vec_cmpne(v.storage(), w.storage())); } - template - EVE_FORCEINLINE auto self_less(wide const &v, wide const &w) noexcept - requires ppc_abi> - { - return logical>(vec_cmplt(v.storage(), w.storage())); - } - template EVE_FORCEINLINE auto self_greater(wide const &v, wide const &w) noexcept requires ppc_abi> diff --git a/include/eve/detail/function/simd/riscv/friends.hpp b/include/eve/detail/function/simd/riscv/friends.hpp index f501865597..4d2476c49c 100644 --- a/include/eve/detail/function/simd/riscv/friends.hpp +++ b/include/eve/detail/function/simd/riscv/friends.hpp @@ -44,37 +44,6 @@ requires rvv_abi> return self_greater_impl(lhs, rhs); } -template -EVE_FORCEINLINE auto -self_less_impl(wide lhs, U rhs) noexcept -> logical> -requires rvv_abi> && (std::same_as, U> || scalar_value) -{ - if constexpr( scalar_value && !std::same_as ) return self_less(lhs, static_cast(rhs)); - else - { - constexpr auto c = categorize>(); - if constexpr( match(c, category::int_) ) return __riscv_vmslt(lhs, rhs, N::value); - else if constexpr( match(c, category::uint_) ) return __riscv_vmsltu(lhs, rhs, N::value); - else if constexpr( match(c, category::float_) ) return __riscv_vmflt(lhs, rhs, N::value); - } -} - -template -EVE_FORCEINLINE auto -self_less(wide lhs, wide rhs) noexcept -> logical> -requires rvv_abi> -{ - return self_less_impl(lhs, rhs); -} - -template -EVE_FORCEINLINE auto -self_less(wide lhs, std::convertible_to auto rhs) noexcept -> logical> -requires rvv_abi> -{ - return self_less_impl(lhs, rhs); -} - template EVE_FORCEINLINE auto self_geq_impl(wide lhs, U rhs) noexcept -> logical> diff --git a/include/eve/detail/function/simd/x86/friends.hpp b/include/eve/detail/function/simd/x86/friends.hpp index 0e1aa20de5..03901edc7f 100644 --- a/include/eve/detail/function/simd/x86/friends.hpp +++ b/include/eve/detail/function/simd/x86/friends.hpp @@ -179,86 +179,6 @@ self_neq(logical> v, logical> w) noexcept requires x86_abi else { return bit_cast(v.bits() ^ w.bits(), as(v)); } } -//================================================================================================ -template -EVE_FORCEINLINE as_logical_t> - self_less(wide v, wide w) noexcept requires x86_abi> -{ - constexpr auto c = categorize>(); - constexpr auto f = to_integer(cmp_flt::lt_oq); - - if constexpr( current_api >= avx512 ) - { - if constexpr( c == category::float32x16 ) return mask16 {_mm512_cmp_ps_mask(v, w, f)}; - else if constexpr( c == category::float32x8 ) return mask8 {_mm256_cmp_ps_mask(v, w, f)}; - else if constexpr( c == category::float32x4 ) return mask8 {_mm_cmp_ps_mask(v, w, f)}; - else if constexpr( c == category::float64x8 ) return mask8 {_mm512_cmp_pd_mask(v, w, f)}; - else if constexpr( c == category::float64x4 ) return mask8 {_mm256_cmp_pd_mask(v, w, f)}; - else if constexpr( c == category::float64x2 ) return mask8 {_mm_cmp_pd_mask(v, w, f)}; - else if constexpr( c == category::uint64x8 ) return mask8 {_mm512_cmplt_epu64_mask(v, w)}; - else if constexpr( c == category::uint64x4 ) return mask8 {_mm256_cmplt_epu64_mask(v, w)}; - else if constexpr( c == category::uint64x2 ) return mask8 {_mm_cmplt_epu64_mask(v, w)}; - else if constexpr( c == category::uint32x16 ) return mask16 {_mm512_cmplt_epu32_mask(v, w)}; - else if constexpr( c == category::uint32x8 ) return mask8 {_mm256_cmplt_epu32_mask(v, w)}; - else if constexpr( c == category::uint32x4 ) return mask8 {_mm_cmplt_epu32_mask(v, w)}; - else if constexpr( c == category::uint16x32 ) return mask32 {_mm512_cmplt_epu16_mask(v, w)}; - else if constexpr( c == category::uint16x16 ) return mask16 {_mm256_cmplt_epu16_mask(v, w)}; - else if constexpr( c == category::uint16x8 ) return mask8 {_mm_cmplt_epu16_mask(v, w)}; - else if constexpr( c == category::uint8x64 ) return mask64 {_mm512_cmplt_epu8_mask(v, w)}; - else if constexpr( c == category::uint8x32 ) return mask32 {_mm256_cmplt_epu8_mask(v, w)}; - else if constexpr( c == category::uint8x16 ) return mask16 {_mm_cmplt_epu8_mask(v, w)}; - else if constexpr( c == category::int64x8 ) return mask8 {_mm512_cmplt_epi64_mask(v, w)}; - else if constexpr( c == category::int64x4 ) return mask8 {_mm256_cmplt_epi64_mask(v, w)}; - else if constexpr( c == category::int64x2 ) return mask8 {_mm_cmplt_epi64_mask(v, w)}; - else if constexpr( c == category::int32x16 ) return mask16 {_mm512_cmplt_epi32_mask(v, w)}; - else if constexpr( c == category::int32x8 ) return mask8 {_mm256_cmplt_epi32_mask(v, w)}; - else if constexpr( c == category::int32x4 ) return mask8 {_mm_cmplt_epi32_mask(v, w)}; - else if constexpr( c == category::int16x32 ) return mask32 {_mm512_cmplt_epi16_mask(v, w)}; - else if constexpr( c == category::int16x16 ) return mask16 {_mm256_cmplt_epi16_mask(v, w)}; - else if constexpr( c == category::int16x8 ) return mask8 {_mm_cmplt_epi16_mask(v, w)}; - else if constexpr( c == category::int8x64 ) return mask64 {_mm512_cmplt_epi8_mask(v, w)}; - else if constexpr( c == category::int8x32 ) return mask32 {_mm256_cmplt_epi8_mask(v, w)}; - else if constexpr( c == category::int8x16 ) return mask16 {_mm_cmplt_epi8_mask(v, w)}; - } - else - { - if constexpr( c == category::float32x8 ) return _mm256_cmp_ps(v, w, f); - else if constexpr( c == category::float64x4 ) return _mm256_cmp_pd(v, w, f); - else if constexpr( c == category::float32x4 ) return _mm_cmplt_ps(v, w); - else if constexpr( c == category::float64x2 ) return _mm_cmplt_pd(v, w); - else - { - constexpr auto use_avx2 = current_api >= avx2; - constexpr auto lt = [](E ev, E fv) { return as_logical_t(ev < fv); }; - - [[maybe_unused]] auto unsigned_cmp = [](auto vv, auto wv) - { - using l_t = logical>; - auto const sm = signmask(as, signed>>()); - return bit_cast((bit_cast(vv, as(sm)) - sm) < (bit_cast(wv, as(sm)) - sm), as {}); - }; - - if constexpr( use_avx2 && c == category::int64x4 ) return _mm256_cmpgt_epi64(w, v); - else if constexpr( use_avx2 && c == category::uint64x4 ) return unsigned_cmp(v, w); - else if constexpr( use_avx2 && c == category::int32x8 ) return _mm256_cmpgt_epi32(w, v); - else if constexpr( use_avx2 && c == category::uint32x8 ) return unsigned_cmp(v, w); - else if constexpr( use_avx2 && c == category::int16x16 ) return _mm256_cmpgt_epi16(w, v); - else if constexpr( use_avx2 && c == category::uint16x16 ) return unsigned_cmp(v, w); - else if constexpr( use_avx2 && c == category::int8x32 ) return _mm256_cmpgt_epi8(w, v); - else if constexpr( use_avx2 && c == category::uint8x32 ) return unsigned_cmp(v, w); - else if constexpr( c == category::int64x2 ) return map(lt, v, w); - else if constexpr( c == category::int32x4 ) return _mm_cmplt_epi32(v, w); - else if constexpr( c == category::int16x8 ) return _mm_cmplt_epi16(v, w); - else if constexpr( c == category::int8x16 ) return _mm_cmplt_epi8(v, w); - else if constexpr( c == category::uint64x2 ) return map(lt, v, w); - else if constexpr( c == category::uint32x4 ) return unsigned_cmp(v, w); - else if constexpr( c == category::uint16x8 ) return unsigned_cmp(v, w); - else if constexpr( c == category::uint8x16 ) return unsigned_cmp(v, w); - else return aggregate(lt, v, w); - } - } -} - //================================================================================================ template EVE_FORCEINLINE as_logical_t> diff --git a/include/eve/module/core/regular/impl/is_less.hpp b/include/eve/module/core/regular/impl/is_less.hpp new file mode 100644 index 0000000000..7b4ab1f66c --- /dev/null +++ b/include/eve/module/core/regular/impl/is_less.hpp @@ -0,0 +1,42 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +namespace eve::detail +{ + template + EVE_FORCEINLINE constexpr common_logical_t is_less_(EVE_REQUIRES(cpu_), O const& o, T a, U b) noexcept + { + if constexpr (O::contains(definitely)) + { + using w_t = common_value_t; + auto aa = w_t{a}; + auto bb = w_t{b}; + + auto tol = o[definitely].value(w_t{}); + + if constexpr (integral_value) return aa < eve::prev(bb, tol); + else return aa < fam(bb, -tol, eve::max(eve::abs(aa), eve::abs(bb))); + } + else + { + if constexpr (scalar_value) + { + if constexpr (scalar_value) return common_logical_t(a < b); + else if constexpr (std::same_as>) return is_less(U{a}, b); + else return is_less(element_type_t{a}, b); + } + else + { + if constexpr (simd_value) return map(is_less, a, b); + else if constexpr (std::same_as, U>) return is_less(a, T{b}); + else return is_less(a, element_type_t{b}); + } + } + } +} diff --git a/include/eve/module/core/regular/impl/simd/arm/neon/is_less.hpp b/include/eve/module/core/regular/impl/simd/arm/neon/is_less.hpp new file mode 100644 index 0000000000..e374b0e600 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/arm/neon/is_less.hpp @@ -0,0 +1,55 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include +#include + +namespace eve::detail +{ + template + EVE_FORCEINLINE logical> is_less_(EVE_REQUIRES(neon128_), O const& opts, wide a, wide b) noexcept + requires arm_abi> + { + if constexpr (O::contains(definitely)) + { + return is_less.behavior(cpu_{}, opts, a, b); + } + else + { + constexpr auto cat = categorize>(); + + if constexpr (cat == category::int32x4 ) return vcltq_s32(a, b); + else if constexpr (cat == category::int16x8 ) return vcltq_s16(a, b); + else if constexpr (cat == category::int8x16 ) return vcltq_s8(a, b); + else if constexpr (cat == category::uint32x4 ) return vcltq_u32(a, b); + else if constexpr (cat == category::uint16x8 ) return vcltq_u16(a, b); + else if constexpr (cat == category::uint8x16 ) return vcltq_u8(a, b); + else if constexpr (cat == category::float32x4) return vcltq_f32(a, b); + else if constexpr (cat == category::int32x2 ) return vclt_s32(a, b); + else if constexpr (cat == category::int16x4 ) return vclt_s16(a, b); + else if constexpr (cat == category::int8x8 ) return vclt_s8(a, b); + else if constexpr (cat == category::uint32x2 ) return vclt_u32(a, b); + else if constexpr (cat == category::uint16x4 ) return vclt_u16(a, b); + else if constexpr (cat == category::uint8x8 ) return vclt_u8(a, b); + else if constexpr (cat == category::float32x2) return vclt_f32(a, b); + else if constexpr (current_api >= asimd) + { + if constexpr (cat == category::float64x1) return vclt_f64(a, b); + else if constexpr (cat == category::int64x1) return vclt_s64(a, b); + else if constexpr (cat == category::uint64x1) return vclt_u64(a, b); + else if constexpr (cat == category::float64x2) return vcltq_f64(a, b); + else if constexpr (cat == category::int64x2) return vcltq_s64(a, b); + else if constexpr (cat == category::uint64x2) return vcltq_u64(a, b); + } + else return map(is_less, a, b); + } + } +} diff --git a/include/eve/module/core/regular/impl/simd/arm/sve/is_less.hpp b/include/eve/module/core/regular/impl/simd/arm/sve/is_less.hpp new file mode 100644 index 0000000000..dd33a69f7d --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/arm/sve/is_less.hpp @@ -0,0 +1,22 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include + +namespace eve::detail +{ + template + EVE_FORCEINLINE logical> is_less_(EVE_REQUIRES(sve_), O const& opts, wide a, wide b) noexcept + requires sve_abi> + { + if constexpr (O::contains(definitely)) return is_less.behavior(cpu_{}, opts, a, b); + else return svcmplt(sve_true(), a, b); + } +} \ No newline at end of file diff --git a/include/eve/module/core/regular/impl/simd/ppc/is_less.hpp b/include/eve/module/core/regular/impl/simd/ppc/is_less.hpp new file mode 100644 index 0000000000..d564952a19 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/ppc/is_less.hpp @@ -0,0 +1,24 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include +#include + +namespace eve::detail +{ + template + EVE_FORCEINLINE logical> is_less_(EVE_REQUIRES(vmx_), O const& opts, wide a, wide b) noexcept + requires ppc_abi> + { + if constexpr (O::contains(definitely)) return is_less.behavior(cpu_{}, opts, a, b); + else return logical>(vec_cmplt(v.storage(), w.storage())); + } +} diff --git a/include/eve/module/core/regular/impl/simd/riscv/is_less.hpp b/include/eve/module/core/regular/impl/simd/riscv/is_less.hpp new file mode 100644 index 0000000000..3860f7e3f1 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/is_less.hpp @@ -0,0 +1,33 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include + +namespace eve::detail +{ + template + EVE_FORCEINLINE logical> is_less_(EVE_REQUIRES(rvv_), O const& opts, wide a, U b) noexcept + requires (rvv_abi> && same_element_type) + { + if constexpr (O::contains(definitely)) + { + return is_less.behavior(cpu_{}, opts, a, b); + } + else + { + constexpr auto c = categorize>(); + + if constexpr (match(c, category::int_)) return __riscv_vmslt(a, b, N::value); + else if constexpr (match(c, category::uint_)) return __riscv_vmsltu(a, b, N::value); + else if constexpr (match(c, category::float_)) return __riscv_vmflt(a, b, N::value); + } + } +} + diff --git a/include/eve/module/core/regular/impl/simd/x86/is_less.hpp b/include/eve/module/core/regular/impl/simd/x86/is_less.hpp index aae47805ca..136338dbc1 100644 --- a/include/eve/module/core/regular/impl/simd/x86/is_less.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/is_less.hpp @@ -19,58 +19,140 @@ namespace eve::detail { -// ----------------------------------------------------------------------------------------------- -// masked implementation - template - EVE_FORCEINLINE as_logical_t> is_less_(EVE_REQUIRES(avx512_), - C const &mask, - O const &opts, - wide const &v, - wide const &w) noexcept - requires x86_abi> + template + EVE_FORCEINLINE logical> self_less_(EVE_REQUIRES(sse2_), O const& opts, wide a, wide b) noexcept + requires x86_abi> + { + if constexpr (O::contains(definitely)) + { + return is_less.behavior(cpu_{}, opts, a, b); + } + else + { + constexpr auto c = categorize>(); + constexpr auto f = to_integer(cmp_flt::lt_oq); + + if constexpr (current_api >= avx512) + { + if constexpr (c == category::float32x16) return mask16 {_mm512_cmp_ps_mask(a, b, f)}; + else if constexpr (c == category::float32x8) return mask8 {_mm256_cmp_ps_mask(a, b, f)}; + else if constexpr (c == category::float32x4) return mask8 {_mm_cmp_ps_mask(a, b, f)}; + else if constexpr (c == category::float64x8) return mask8 {_mm512_cmp_pd_mask(a, b, f)}; + else if constexpr (c == category::float64x4) return mask8 {_mm256_cmp_pd_mask(a, b, f)}; + else if constexpr (c == category::float64x2) return mask8 {_mm_cmp_pd_mask(a, b, f)}; + else if constexpr (c == category::uint64x8) return mask8 {_mm512_cmplt_epu64_mask(a, b)}; + else if constexpr (c == category::uint64x4) return mask8 {_mm256_cmplt_epu64_mask(a, b)}; + else if constexpr (c == category::uint64x2) return mask8 {_mm_cmplt_epu64_mask(a, b)}; + else if constexpr (c == category::uint32x16) return mask16 {_mm512_cmplt_epu32_mask(a, b)}; + else if constexpr (c == category::uint32x8) return mask8 {_mm256_cmplt_epu32_mask(a, b)}; + else if constexpr (c == category::uint32x4) return mask8 {_mm_cmplt_epu32_mask(a, b)}; + else if constexpr (c == category::uint16x32) return mask32 {_mm512_cmplt_epu16_mask(a, b)}; + else if constexpr (c == category::uint16x16) return mask16 {_mm256_cmplt_epu16_mask(a, b)}; + else if constexpr (c == category::uint16x8) return mask8 {_mm_cmplt_epu16_mask(a, b)}; + else if constexpr (c == category::uint8x64) return mask64 {_mm512_cmplt_epu8_mask(a, b)}; + else if constexpr (c == category::uint8x32) return mask32 {_mm256_cmplt_epu8_mask(a, b)}; + else if constexpr (c == category::uint8x16) return mask16 {_mm_cmplt_epu8_mask(a, b)}; + else if constexpr (c == category::int64x8) return mask8 {_mm512_cmplt_epi64_mask(a, b)}; + else if constexpr (c == category::int64x4) return mask8 {_mm256_cmplt_epi64_mask(a, b)}; + else if constexpr (c == category::int64x2) return mask8 {_mm_cmplt_epi64_mask(a, b)}; + else if constexpr (c == category::int32x16) return mask16 {_mm512_cmplt_epi32_mask(a, b)}; + else if constexpr (c == category::int32x8) return mask8 {_mm256_cmplt_epi32_mask(a, b)}; + else if constexpr (c == category::int32x4) return mask8 {_mm_cmplt_epi32_mask(a, b)}; + else if constexpr (c == category::int16x32) return mask32 {_mm512_cmplt_epi16_mask(a, b)}; + else if constexpr (c == category::int16x16) return mask16 {_mm256_cmplt_epi16_mask(a, b)}; + else if constexpr (c == category::int16x8) return mask8 {_mm_cmplt_epi16_mask(a, b)}; + else if constexpr (c == category::int8x64) return mask64 {_mm512_cmplt_epi8_mask(a, b)}; + else if constexpr (c == category::int8x32) return mask32 {_mm256_cmplt_epi8_mask(a, b)}; + else if constexpr (c == category::int8x16) return mask16 {_mm_cmplt_epi8_mask(a, b)}; + } + else + { + if constexpr (c == category::float32x8) return _mm256_cmp_ps(a, b, f); + else if constexpr (c == category::float64x4) return _mm256_cmp_pd(a, b, f); + else if constexpr (c == category::float32x4) return _mm_cmplt_ps(a, b); + else if constexpr (c == category::float64x2) return _mm_cmplt_pd(a, b); + else + { + constexpr auto use_avx2 = current_api >= avx2; + constexpr auto lt = [](E ev, E fv) { return as_logical_t(ev < fv); }; + + [[maybe_unused]] auto unsigned_cmp = [](auto vv, auto wv) + { + using l_t = logical>; + auto const sm = signmask(as, signed>>{}); + return bit_cast((bit_cast(vv, as(sm)) - sm) < (bit_cast(wv, as(sm)) - sm), as{}); + }; + + if constexpr (use_avx2 && c == category::int64x4) return _mm256_cmpgt_epi64(b, a); + else if constexpr (use_avx2 && c == category::uint64x4) return unsigned_cmp(a, b); + else if constexpr (use_avx2 && c == category::int32x8) return _mm256_cmpgt_epi32(b, a); + else if constexpr (use_avx2 && c == category::uint32x8) return unsigned_cmp(a, b); + else if constexpr (use_avx2 && c == category::int16x16) return _mm256_cmpgt_epi16(b, a); + else if constexpr (use_avx2 && c == category::uint16x16) return unsigned_cmp(a, b); + else if constexpr (use_avx2 && c == category::int8x32) return _mm256_cmpgt_epi8(b, a); + else if constexpr (use_avx2 && c == category::uint8x32) return unsigned_cmp(a, b); + else if constexpr (c == category::int64x2) return map(lt, a, b); + else if constexpr (c == category::int32x4) return _mm_cmplt_epi32(a, b); + else if constexpr (c == category::int16x8) return _mm_cmplt_epi16(a, b); + else if constexpr (c == category::int8x16) return _mm_cmplt_epi8(a, b); + else if constexpr (c == category::uint64x2) return map(lt, a, b); + else if constexpr (c == category::uint32x4) return unsigned_cmp(a, b); + else if constexpr (c == category::uint16x8) return unsigned_cmp(a, b); + else if constexpr (c == category::uint8x16) return unsigned_cmp(a, b); + else return aggregate(lt, a, b); + } + } + } + } + + // ----------------------------------------------------------------------------------------------- + // masked implementation + template + EVE_FORCEINLINE logical> is_less_(EVE_REQUIRES(avx512_), C const &mask, O const &opts, wide a, wide b) noexcept + requires x86_abi> { if constexpr( C::has_alternative || O::contains(definitely)) { - return is_less.behavior(cpu_{}, opts, v, w); + return is_less.behavior(cpu_{}, opts, a, b); } else { - auto const s = alternative(mask, v, as(to_logical(v))); - [[maybe_unused]] auto m = expand_mask(mask, as(v)).storage().value; + auto const s = alternative(mask, a, as(to_logical(a))); + [[maybe_unused]] auto m = expand_mask(mask, as(a)).storage().value; constexpr auto c = categorize>(); constexpr auto f = to_integer(cmp_flt::lt_oq); if constexpr( C::is_complete ) return s; - else if constexpr( c == category::float32x16 ) return mask16 {_mm512_mask_cmp_ps_mask(m, v, w, f)}; - else if constexpr( c == category::float64x8 ) return mask8 {_mm512_mask_cmp_pd_mask(m, v, w, f)}; - else if constexpr( c == category::float32x8 ) return mask8 {_mm256_mask_cmp_ps_mask(m, v, w, f)}; - else if constexpr( c == category::float64x4 ) return mask8 {_mm256_mask_cmp_pd_mask(m, v, w, f)}; - else if constexpr( c == category::float32x4 ) return mask8 {_mm_mask_cmp_ps_mask(m, v, w, f)}; - else if constexpr( c == category::float64x2 ) return mask8 {_mm_mask_cmp_pd_mask(m, v, w, f)}; - else if constexpr( c == category::int64x8 ) return mask8 {_mm512_mask_cmplt_epi64_mask(m, v, w)}; - else if constexpr( c == category::int64x4 ) return mask8 {_mm256_mask_cmplt_epi64_mask(m, v, w)}; - else if constexpr( c == category::int64x2 ) return mask8 {_mm_mask_cmplt_epi64_mask(m, v, w)}; - else if constexpr( c == category::int32x16 ) return mask16 {_mm512_mask_cmplt_epi32_mask(m, v, w)}; - else if constexpr( c == category::int32x8 ) return mask8 {_mm256_mask_cmplt_epi32_mask(m, v, w)}; - else if constexpr( c == category::int32x4 ) return mask8 {_mm_mask_cmplt_epi32_mask(m, v, w)}; - else if constexpr( c == category::int16x32 ) return mask32 {_mm512_mask_cmplt_epi16_mask(m, v, w)}; - else if constexpr( c == category::int16x16 ) return mask16 {_mm256_mask_cmplt_epi16_mask(m, v, w)}; - else if constexpr( c == category::int16x8 ) return mask8 {_mm_mask_cmplt_epi16_mask(m, v, w)}; - else if constexpr( c == category::int8x64 ) return mask64 {_mm512_mask_cmplt_epi8_mask(m, v, w)}; - else if constexpr( c == category::int8x32 ) return mask32 {_mm256_mask_cmplt_epi8_mask(m, v, w)}; - else if constexpr( c == category::int8x16 ) return mask16 {_mm_mask_cmplt_epi8_mask(m, v, w)}; - else if constexpr( c == category::uint64x8 ) return mask8 {_mm512_mask_cmplt_epu64_mask(m, v, w)}; - else if constexpr( c == category::uint64x4 ) return mask8 {_mm256_mask_cmplt_epu64_mask(m, v, w)}; - else if constexpr( c == category::uint64x2 ) return mask8 {_mm_mask_cmplt_epu64_mask(m, v, w)}; - else if constexpr( c == category::uint32x16 ) return mask16 {_mm512_mask_cmplt_epu32_mask(m, v, w)}; - else if constexpr( c == category::uint32x8 ) return mask8 {_mm256_mask_cmplt_epu32_mask(m, v, w)}; - else if constexpr( c == category::uint32x4 ) return mask8 {_mm_mask_cmplt_epu32_mask(m, v, w)}; - else if constexpr( c == category::uint16x32 ) return mask32 {_mm512_mask_cmplt_epu16_mask(m, v, w)}; - else if constexpr( c == category::uint16x16 ) return mask16 {_mm256_mask_cmplt_epu16_mask(m, v, w)}; - else if constexpr( c == category::uint16x8 ) return mask8 {_mm_mask_cmplt_epu16_mask(m, v, w)}; - else if constexpr( c == category::uint8x64 ) return mask64 {_mm512_mask_cmplt_epu8_mask(m, v, w)}; - else if constexpr( c == category::uint8x32 ) return mask32 {_mm256_mask_cmplt_epu8_mask(m, v, w)}; - else if constexpr( c == category::uint8x16 ) return mask16 {_mm_mask_cmplt_epu8_mask(m, v, w)}; + else if constexpr( c == category::float32x16 ) return mask16 {_mm512_mask_cmp_ps_mask(m, a, b, f)}; + else if constexpr( c == category::float64x8 ) return mask8 {_mm512_mask_cmp_pd_mask(m, a, b, f)}; + else if constexpr( c == category::float32x8 ) return mask8 {_mm256_mask_cmp_ps_mask(m, a, b, f)}; + else if constexpr( c == category::float64x4 ) return mask8 {_mm256_mask_cmp_pd_mask(m, a, b, f)}; + else if constexpr( c == category::float32x4 ) return mask8 {_mm_mask_cmp_ps_mask(m, a, b, f)}; + else if constexpr( c == category::float64x2 ) return mask8 {_mm_mask_cmp_pd_mask(m, a, b, f)}; + else if constexpr( c == category::int64x8 ) return mask8 {_mm512_mask_cmplt_epi64_mask(m, a, b)}; + else if constexpr( c == category::int64x4 ) return mask8 {_mm256_mask_cmplt_epi64_mask(m, a, b)}; + else if constexpr( c == category::int64x2 ) return mask8 {_mm_mask_cmplt_epi64_mask(m, a, b)}; + else if constexpr( c == category::int32x16 ) return mask16 {_mm512_mask_cmplt_epi32_mask(m, a, b)}; + else if constexpr( c == category::int32x8 ) return mask8 {_mm256_mask_cmplt_epi32_mask(m, a, b)}; + else if constexpr( c == category::int32x4 ) return mask8 {_mm_mask_cmplt_epi32_mask(m, a, b)}; + else if constexpr( c == category::int16x32 ) return mask32 {_mm512_mask_cmplt_epi16_mask(m, a, b)}; + else if constexpr( c == category::int16x16 ) return mask16 {_mm256_mask_cmplt_epi16_mask(m, a, b)}; + else if constexpr( c == category::int16x8 ) return mask8 {_mm_mask_cmplt_epi16_mask(m, a, b)}; + else if constexpr( c == category::int8x64 ) return mask64 {_mm512_mask_cmplt_epi8_mask(m, a, b)}; + else if constexpr( c == category::int8x32 ) return mask32 {_mm256_mask_cmplt_epi8_mask(m, a, b)}; + else if constexpr( c == category::int8x16 ) return mask16 {_mm_mask_cmplt_epi8_mask(m, a, b)}; + else if constexpr( c == category::uint64x8 ) return mask8 {_mm512_mask_cmplt_epu64_mask(m, a, b)}; + else if constexpr( c == category::uint64x4 ) return mask8 {_mm256_mask_cmplt_epu64_mask(m, a, b)}; + else if constexpr( c == category::uint64x2 ) return mask8 {_mm_mask_cmplt_epu64_mask(m, a, b)}; + else if constexpr( c == category::uint32x16 ) return mask16 {_mm512_mask_cmplt_epu32_mask(m, a, b)}; + else if constexpr( c == category::uint32x8 ) return mask8 {_mm256_mask_cmplt_epu32_mask(m, a, b)}; + else if constexpr( c == category::uint32x4 ) return mask8 {_mm_mask_cmplt_epu32_mask(m, a, b)}; + else if constexpr( c == category::uint16x32 ) return mask32 {_mm512_mask_cmplt_epu16_mask(m, a, b)}; + else if constexpr( c == category::uint16x16 ) return mask16 {_mm256_mask_cmplt_epu16_mask(m, a, b)}; + else if constexpr( c == category::uint16x8 ) return mask8 {_mm_mask_cmplt_epu16_mask(m, a, b)}; + else if constexpr( c == category::uint8x64 ) return mask64 {_mm512_mask_cmplt_epu8_mask(m, a, b)}; + else if constexpr( c == category::uint8x32 ) return mask32 {_mm256_mask_cmplt_epu8_mask(m, a, b)}; + else if constexpr( c == category::uint8x16 ) return mask16 {_mm_mask_cmplt_epu8_mask(m, a, b)}; } } } diff --git a/include/eve/module/core/regular/is_less.hpp b/include/eve/module/core/regular/is_less.hpp index e8820773a5..2491504289 100644 --- a/include/eve/module/core/regular/is_less.hpp +++ b/include/eve/module/core/regular/is_less.hpp @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -24,9 +25,9 @@ namespace eve template struct is_less_t : strict_elementwise_callable { - template + template constexpr EVE_FORCEINLINE common_logical_t operator()(T a, U b) const - requires (same_lanes_or_scalar) + requires (same_lanes_or_scalar && same_element_type_or_scalar) { // static_assert( valid_tolerance, Options>::value, "[eve::is_less] simd tolerance requires at least one simd parameter." ); return EVE_DISPATCH_CALL(a, b); @@ -95,40 +96,26 @@ namespace eve // Required for if_else optimisation detections using callable_is_less_ = tag_t; - - namespace detail - { - template - EVE_FORCEINLINE constexpr common_logical_t - is_less_(EVE_REQUIRES(cpu_), O const&, logical a, logical b) noexcept - { - if constexpr (scalar_value && scalar_value) return common_logical_t(a < b); - else return a < b; - } - - template - EVE_FORCEINLINE constexpr common_logical_t - is_less_(EVE_REQUIRES(cpu_), O const & o, T const& aa, U const& bb) noexcept - { - if constexpr (O::contains(definitely)) - { - using w_t = common_value_t; - auto a = w_t(aa); - auto b = w_t(bb); - - auto tol = o[definitely].value(w_t{}); - if constexpr(integral_value) return a < eve::prev(b, tol); - else return a < fam(b, -tol, eve::max(eve::abs(a), eve::abs(b))); - } - else - { - if constexpr (scalar_value && scalar_value) return common_logical_t(aa < bb); - else return aa < bb; - } - } - } } +#include + #if defined(EVE_INCLUDE_X86_HEADER) # include #endif + +#if defined(EVE_INCLUDE_POWERPC_HEADER) +# include +#endif + +#if defined(EVE_INCLUDE_ARM_NEON_HEADER) +# include +#endif + +#if defined(EVE_INCLUDE_ARM_SVE_HEADER) +# include +#endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif From 649058908dc82bb13418f5a8b7e7dfa3325cd5d6 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Thu, 16 Jan 2025 14:37:08 +0100 Subject: [PATCH 03/19] fix typo --- include/eve/module/core/regular/impl/simd/x86/is_less.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/eve/module/core/regular/impl/simd/x86/is_less.hpp b/include/eve/module/core/regular/impl/simd/x86/is_less.hpp index 136338dbc1..66a73c24cc 100644 --- a/include/eve/module/core/regular/impl/simd/x86/is_less.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/is_less.hpp @@ -20,7 +20,7 @@ namespace eve::detail { template - EVE_FORCEINLINE logical> self_less_(EVE_REQUIRES(sse2_), O const& opts, wide a, wide b) noexcept + EVE_FORCEINLINE logical> is_less_(EVE_REQUIRES(sse2_), O const& opts, wide a, wide b) noexcept requires x86_abi> { if constexpr (O::contains(definitely)) From 93dcf7015b10fe288f9187e69ed5990c3ed1b386 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Thu, 16 Jan 2025 15:30:52 +0100 Subject: [PATCH 04/19] [no ci] small review fixes --- include/eve/arch/cpu/wide.hpp | 4 ++-- include/eve/module/core/regular/impl/simd/arm/sve/is_less.hpp | 2 +- include/eve/module/core/regular/impl/simd/riscv/is_less.hpp | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/include/eve/arch/cpu/wide.hpp b/include/eve/arch/cpu/wide.hpp index c912e9b9fc..f6d6238efe 100644 --- a/include/eve/arch/cpu/wide.hpp +++ b/include/eve/arch/cpu/wide.hpp @@ -876,7 +876,7 @@ namespace eve requires(supports_ordering_v) #endif { - return is_less(w, wide{s}); + return is_less(w, s); } //! @brief Element-wise less-than comparison between a scalar and a eve::wide @@ -886,7 +886,7 @@ namespace eve requires(supports_ordering_v) #endif { - return is_less(wide{s}, w); + return is_less(s, w); } //! @brief Element-wise greater-than comparison between eve::wide diff --git a/include/eve/module/core/regular/impl/simd/arm/sve/is_less.hpp b/include/eve/module/core/regular/impl/simd/arm/sve/is_less.hpp index dd33a69f7d..3f8459bdd7 100644 --- a/include/eve/module/core/regular/impl/simd/arm/sve/is_less.hpp +++ b/include/eve/module/core/regular/impl/simd/arm/sve/is_less.hpp @@ -19,4 +19,4 @@ namespace eve::detail if constexpr (O::contains(definitely)) return is_less.behavior(cpu_{}, opts, a, b); else return svcmplt(sve_true(), a, b); } -} \ No newline at end of file +} diff --git a/include/eve/module/core/regular/impl/simd/riscv/is_less.hpp b/include/eve/module/core/regular/impl/simd/riscv/is_less.hpp index 3860f7e3f1..331ceb82d8 100644 --- a/include/eve/module/core/regular/impl/simd/riscv/is_less.hpp +++ b/include/eve/module/core/regular/impl/simd/riscv/is_less.hpp @@ -30,4 +30,3 @@ namespace eve::detail } } } - From 0e2b3de700a203fd622d4ec7e1a1709abadaba91 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Thu, 16 Jan 2025 15:55:37 +0100 Subject: [PATCH 05/19] handle product types --- include/eve/module/core/regular/impl/is_less.hpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/eve/module/core/regular/impl/is_less.hpp b/include/eve/module/core/regular/impl/is_less.hpp index 7b4ab1f66c..59655a1718 100644 --- a/include/eve/module/core/regular/impl/is_less.hpp +++ b/include/eve/module/core/regular/impl/is_less.hpp @@ -12,7 +12,11 @@ namespace eve::detail template EVE_FORCEINLINE constexpr common_logical_t is_less_(EVE_REQUIRES(cpu_), O const& o, T a, U b) noexcept { - if constexpr (O::contains(definitely)) + if constexpr (product_type && product_type) + { + return convert(kumi::to_tuple(a) < kumi::to_tuple(b), as_element>()); + } + else if constexpr (O::contains(definitely)) { using w_t = common_value_t; auto aa = w_t{a}; From e77918531e07f0bf9e99bfe264798943e3bc9de5 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Thu, 16 Jan 2025 18:50:09 +0100 Subject: [PATCH 06/19] fix --- include/eve/concept/compatible.hpp | 12 ++---- include/eve/concept/element_type.hpp | 20 ---------- .../eve/module/core/regular/impl/is_less.hpp | 16 +++++--- include/eve/module/core/regular/is_less.hpp | 6 +-- test/unit/module/core/is_less.cpp | 38 +++++++++++++------ 5 files changed, 44 insertions(+), 48 deletions(-) delete mode 100644 include/eve/concept/element_type.hpp diff --git a/include/eve/concept/compatible.hpp b/include/eve/concept/compatible.hpp index 8ecd6b23eb..e251277034 100644 --- a/include/eve/concept/compatible.hpp +++ b/include/eve/concept/compatible.hpp @@ -31,19 +31,15 @@ namespace eve concept size_compatible_values = size_compatible_to || size_compatible_to; - - template - concept same_value_type = std::same_as< element_type_t> - , element_type_t> - >; + template + concept same_value_type = (std::same_as>, element_type_t>> && ...); template concept different_value_type = !std::same_as, element_type_t>; - template concept index_compatible_values = integral_value && floating_value && size_compatible_values; - - + template + concept compatible_arithmetic_values = scalar_value || scalar_value || std::same_as; } diff --git a/include/eve/concept/element_type.hpp b/include/eve/concept/element_type.hpp deleted file mode 100644 index 8fbbc4aaa7..0000000000 --- a/include/eve/concept/element_type.hpp +++ /dev/null @@ -1,20 +0,0 @@ -//================================================================================================== -/* - EVE - Expressive Vector Engine - Copyright : EVE Project Contributors - SPDX-License-Identifier: BSL-1.0 -*/ -//================================================================================================== -#pragma once - -#include -#include - -namespace eve -{ - template - concept same_element_type = (std::same_as, element_type_t> || ...); - - template - concept same_element_type_or_scalar = (scalar_value || ... || scalar_value) || same_element_type; -} diff --git a/include/eve/module/core/regular/impl/is_less.hpp b/include/eve/module/core/regular/impl/is_less.hpp index 59655a1718..01bc858d8b 100644 --- a/include/eve/module/core/regular/impl/is_less.hpp +++ b/include/eve/module/core/regular/impl/is_less.hpp @@ -18,6 +18,8 @@ namespace eve::detail } else if constexpr (O::contains(definitely)) { + static_assert(floating_value && floating_value, "[EVE] eve::is_less[definitely] only accepts floating point values."); + using w_t = common_value_t; auto aa = w_t{a}; auto bb = w_t{b}; @@ -31,15 +33,17 @@ namespace eve::detail { if constexpr (scalar_value) { - if constexpr (scalar_value) return common_logical_t(a < b); - else if constexpr (std::same_as>) return is_less(U{a}, b); - else return is_less(element_type_t{a}, b); + if constexpr (scalar_value) return common_logical_t(a < b); + // because of the auto-conversion rules in elementwise_callable, + // we can assume that T will have the type element_type at that point + else return is_less(U{a}, b); } else { - if constexpr (simd_value) return map(is_less, a, b); - else if constexpr (std::same_as, U>) return is_less(a, T{b}); - else return is_less(a, element_type_t{b}); + if constexpr (simd_value) return map(is_less, a, b); + // because of the auto-conversion rules in elementwise_callable, + // we can assume that U will have the type element_type at that point + else return is_less(a, T{b}); } } } diff --git a/include/eve/module/core/regular/is_less.hpp b/include/eve/module/core/regular/is_less.hpp index 2491504289..6ff0ba4da0 100644 --- a/include/eve/module/core/regular/is_less.hpp +++ b/include/eve/module/core/regular/is_less.hpp @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include @@ -23,11 +23,11 @@ namespace eve { template - struct is_less_t : strict_elementwise_callable + struct is_less_t : elementwise_callable { template constexpr EVE_FORCEINLINE common_logical_t operator()(T a, U b) const - requires (same_lanes_or_scalar && same_element_type_or_scalar) + requires compatible_arithmetic_values { // static_assert( valid_tolerance, Options>::value, "[eve::is_less] simd tolerance requires at least one simd parameter." ); return EVE_DISPATCH_CALL(a, b); diff --git a/test/unit/module/core/is_less.cpp b/test/unit/module/core/is_less.cpp index 5b617285ed..e8b8a5111a 100644 --- a/test/unit/module/core/is_less.cpp +++ b/test/unit/module/core/is_less.cpp @@ -9,28 +9,44 @@ #include +template +void test_with_types(F f) +{ + TTS_TYPE_IS(decltype(f(T{}, U{})), Expected); + TTS_TYPE_IS(decltype(f(U{}, T{})), Expected); +} + //================================================================================================== //== Types tests //================================================================================================== TTS_CASE_TPL("Check return types of eve::is_less(simd)", eve::test::simd::all_types) (tts::type) { - using eve::logical; using v_t = eve::element_type_t; - TTS_EXPR_IS(eve::is_less(T(), T()), logical); - TTS_EXPR_IS(eve::is_less(v_t(), v_t()), logical); - TTS_EXPR_IS(eve::is_less(T(), v_t()), logical); - TTS_EXPR_IS(eve::is_less(v_t(), T()), logical); - if constexpr( eve::floating_value ) + TTS_EXPR_IS(eve::is_less(T(), T()), eve::logical); + TTS_EXPR_IS(eve::is_less(v_t(), v_t()), eve::logical); + + test_with_types>(eve::is_less); + + if constexpr (eve::floating_value) { - TTS_EXPR_IS(eve::is_less[eve::definitely](T(), T()), logical); - TTS_EXPR_IS(eve::is_less[eve::definitely](v_t(), v_t()), logical); - TTS_EXPR_IS(eve::is_less[eve::definitely](T(), v_t()), logical); - TTS_EXPR_IS(eve::is_less[eve::definitely](v_t(), T()), logical); + TTS_EXPR_IS(eve::is_less[eve::definitely](T(), T()), eve::logical); + TTS_EXPR_IS(eve::is_less[eve::definitely](v_t(), v_t()), eve::logical); + + test_with_types>(eve::is_less[eve::definitely]); } }; +TTS_CASE_TPL("Check return types of eve::is_less(simd) with mixed types", eve::test::simd::all_types) +(tts::type) +{ + using D = eve::downgrade_t; + using vd_t = eve::element_type_t; + + test_with_types>(eve::is_less); +}; + //================================================================================================== //== Tests for eve::is_less //================================================================================================== @@ -54,7 +70,7 @@ TTS_CASE_WITH("Check behavior of eve::is_less(simd)", //================================================================================================== //== Tests for eve::is_less corner cases for floating //================================================================================================== -TTS_CASE_TPL("Check behavior of eve::is_less(simd)", eve::test::simd::ieee_reals) +TTS_CASE_TPL("Check behavior of eve::is_less(simd) corner cases", eve::test::simd::ieee_reals) (tts::type const&) { using eve::as; From 2ee02e9806eb22ec7b4f3d07cc3b7a5d703a43e3 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Thu, 16 Jan 2025 18:52:20 +0100 Subject: [PATCH 07/19] update comments --- include/eve/module/core/regular/impl/is_less.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/eve/module/core/regular/impl/is_less.hpp b/include/eve/module/core/regular/impl/is_less.hpp index 01bc858d8b..b46b793637 100644 --- a/include/eve/module/core/regular/impl/is_less.hpp +++ b/include/eve/module/core/regular/impl/is_less.hpp @@ -34,15 +34,15 @@ namespace eve::detail if constexpr (scalar_value) { if constexpr (scalar_value) return common_logical_t(a < b); - // because of the auto-conversion rules in elementwise_callable, - // we can assume that T will have the type element_type at that point + // because of the auto-conversion rules in elementwise_callable, we can assume that T will have the type + // element_type at that point and that the appropriate wide x S backend doesn't exist. else return is_less(U{a}, b); } else { if constexpr (simd_value) return map(is_less, a, b); - // because of the auto-conversion rules in elementwise_callable, - // we can assume that U will have the type element_type at that point + // because of the auto-conversion rules in elementwise_callable, we can assume that U will have the type + // element_type at that point and that the appropriate wide x S backend doesn't exist. else return is_less(a, T{b}); } } From 7f9e8f12b3a216b344a0834550a9ca37e297d561 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Thu, 16 Jan 2025 19:00:30 +0100 Subject: [PATCH 08/19] renamed concept --- include/eve/concept/compatible.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/eve/concept/compatible.hpp b/include/eve/concept/compatible.hpp index e251277034..a824c441d0 100644 --- a/include/eve/concept/compatible.hpp +++ b/include/eve/concept/compatible.hpp @@ -32,7 +32,7 @@ namespace eve || size_compatible_to; template - concept same_value_type = (std::same_as>, element_type_t>> && ...); + concept same_element_type = (std::same_as>, element_type_t>> && ...); template concept different_value_type = !std::same_as, element_type_t>; From efe48c1cfbdd1dc32b068b225e6c062a0d8ab1e1 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Thu, 16 Jan 2025 19:26:08 +0100 Subject: [PATCH 09/19] fix typo --- include/eve/module/core/regular/impl/simd/ppc/is_less.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/eve/module/core/regular/impl/simd/ppc/is_less.hpp b/include/eve/module/core/regular/impl/simd/ppc/is_less.hpp index d564952a19..8bdcde40b3 100644 --- a/include/eve/module/core/regular/impl/simd/ppc/is_less.hpp +++ b/include/eve/module/core/regular/impl/simd/ppc/is_less.hpp @@ -19,6 +19,6 @@ namespace eve::detail requires ppc_abi> { if constexpr (O::contains(definitely)) return is_less.behavior(cpu_{}, opts, a, b); - else return logical>(vec_cmplt(v.storage(), w.storage())); + else return logical>(vec_cmplt(a.storage(), b.storage())); } } From caf9bd499c688ef7bda1b6b14d7b1cc3c2c23d52 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Fri, 17 Jan 2025 14:44:29 +0100 Subject: [PATCH 10/19] improved codegen on x86 --- .../core/regular/impl/simd/x86/is_less.hpp | 70 ++++++++++++++++--- 1 file changed, 59 insertions(+), 11 deletions(-) diff --git a/include/eve/module/core/regular/impl/simd/x86/is_less.hpp b/include/eve/module/core/regular/impl/simd/x86/is_less.hpp index 66a73c24cc..e341de5c1a 100644 --- a/include/eve/module/core/regular/impl/simd/x86/is_less.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/is_less.hpp @@ -74,32 +74,80 @@ namespace eve::detail else { constexpr auto use_avx2 = current_api >= avx2; + constexpr auto use_avx = current_api >= avx; + constexpr auto use_sse4_1 = current_api >= sse4_1; constexpr auto lt = [](E ev, E fv) { return as_logical_t(ev < fv); }; - [[maybe_unused]] auto unsigned_cmp = [](auto vv, auto wv) + [[maybe_unused]] auto unsigned_cmp = [](auto lhs, auto rhs) { using l_t = logical>; auto const sm = signmask(as, signed>>{}); - return bit_cast((bit_cast(vv, as(sm)) - sm) < (bit_cast(wv, as(sm)) - sm), as{}); + return bit_cast((bit_cast(lhs, as(sm)) - sm) < (bit_cast(rhs, as(sm)) - sm), as{}); }; if constexpr (use_avx2 && c == category::int64x4) return _mm256_cmpgt_epi64(b, a); else if constexpr (use_avx2 && c == category::uint64x4) return unsigned_cmp(a, b); else if constexpr (use_avx2 && c == category::int32x8) return _mm256_cmpgt_epi32(b, a); - else if constexpr (use_avx2 && c == category::uint32x8) return unsigned_cmp(a, b); + else if constexpr (use_avx2 && c == category::uint32x8) return _mm256_xor_si256( + _mm256_cmpeq_epi32( + _mm256_min_epu32(a, b), + b), + _mm256_cmpeq_epi32(b, b)); else if constexpr (use_avx2 && c == category::int16x16) return _mm256_cmpgt_epi16(b, a); - else if constexpr (use_avx2 && c == category::uint16x16) return unsigned_cmp(a, b); + else if constexpr (use_avx2 && c == category::uint16x16) return _mm256_xor_si256( + _mm256_cmpeq_epi16( + _mm256_min_epu16(a, b), + b), + _mm256_cmpeq_epi16(b, b)); else if constexpr (use_avx2 && c == category::int8x32) return _mm256_cmpgt_epi8(b, a); - else if constexpr (use_avx2 && c == category::uint8x32) return unsigned_cmp(a, b); - else if constexpr (c == category::int64x2) return map(lt, a, b); + else if constexpr (use_avx2 && c == category::uint8x32) return _mm256_xor_si256( + _mm256_cmpeq_epi8( + _mm256_min_epu8(a, b), + b), + _mm256_cmpeq_epi8(b, b)); else if constexpr (c == category::int32x4) return _mm_cmplt_epi32(a, b); else if constexpr (c == category::int16x8) return _mm_cmplt_epi16(a, b); else if constexpr (c == category::int8x16) return _mm_cmplt_epi8(a, b); - else if constexpr (c == category::uint64x2) return map(lt, a, b); - else if constexpr (c == category::uint32x4) return unsigned_cmp(a, b); - else if constexpr (c == category::uint16x8) return unsigned_cmp(a, b); - else if constexpr (c == category::uint8x16) return unsigned_cmp(a, b); - else return aggregate(lt, a, b); + else if constexpr (c == category::uint32x4) + { + if constexpr (use_sse4_1) + { + // offers better codegen on GCC and for subvectors on clang + // force generation of the v/pxor mask without needing a memory load + return _mm_xor_si128(_mm_cmpeq_epi32(_mm_min_epu32(a, b), b), _mm_cmpeq_epi32(b, b)); + } + else + { + return unsigned_cmp(a, b); + } + } + else if constexpr (c == category::uint16x8) + { + if constexpr (use_sse4_1) + { + // offers better codegen on GCC and for subvectors on clang + // force generation of the v/pxor mask without needing a memory load + return _mm_xor_si128(_mm_cmpeq_epi16(_mm_min_epu16(a, b), b), _mm_cmpeq_epi16(b, b)); + } + else + { + return unsigned_cmp(a, b); + } + } + else if constexpr (c == category::uint8x16) + { + if constexpr (use_sse4_1) + { + // offers better codegen on GCC and for subvectors on clang + // force generation of the v/pxor mask without needing a memory load + return _mm_xor_si128(_mm_cmpeq_epi8(_mm_min_epu8(a, b), b), _mm_cmpeq_epi8(b, b)); + } + else + { + return unsigned_cmp(a, b); + } + } + else return map(lt, a, b); } } } From 81f644d80a083ab7e7871ef40a8e3e692a33e840 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Fri, 17 Jan 2025 15:13:41 +0100 Subject: [PATCH 11/19] fix unused variable --- .../core/regular/impl/simd/x86/is_less.hpp | 32 ++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/include/eve/module/core/regular/impl/simd/x86/is_less.hpp b/include/eve/module/core/regular/impl/simd/x86/is_less.hpp index e341de5c1a..23660057c2 100644 --- a/include/eve/module/core/regular/impl/simd/x86/is_less.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/is_less.hpp @@ -73,10 +73,9 @@ namespace eve::detail else if constexpr (c == category::float64x2) return _mm_cmplt_pd(a, b); else { - constexpr auto use_avx2 = current_api >= avx2; - constexpr auto use_avx = current_api >= avx; + constexpr auto use_avx2 = current_api >= avx2; constexpr auto use_sse4_1 = current_api >= sse4_1; - constexpr auto lt = [](E ev, E fv) { return as_logical_t(ev < fv); }; + constexpr auto lt = [](E ev, E fv) { return as_logical_t(ev < fv); }; [[maybe_unused]] auto unsigned_cmp = [](auto lhs, auto rhs) { @@ -88,23 +87,20 @@ namespace eve::detail if constexpr (use_avx2 && c == category::int64x4) return _mm256_cmpgt_epi64(b, a); else if constexpr (use_avx2 && c == category::uint64x4) return unsigned_cmp(a, b); else if constexpr (use_avx2 && c == category::int32x8) return _mm256_cmpgt_epi32(b, a); - else if constexpr (use_avx2 && c == category::uint32x8) return _mm256_xor_si256( - _mm256_cmpeq_epi32( - _mm256_min_epu32(a, b), - b), - _mm256_cmpeq_epi32(b, b)); + else if constexpr (use_avx2 && c == category::uint32x8) + { + return _mm256_xor_si256(_mm256_cmpeq_epi32(_mm256_min_epu32(a, b), b), _mm256_cmpeq_epi32(b, b)); + } else if constexpr (use_avx2 && c == category::int16x16) return _mm256_cmpgt_epi16(b, a); - else if constexpr (use_avx2 && c == category::uint16x16) return _mm256_xor_si256( - _mm256_cmpeq_epi16( - _mm256_min_epu16(a, b), - b), - _mm256_cmpeq_epi16(b, b)); + else if constexpr (use_avx2 && c == category::uint16x16) + { + return _mm256_xor_si256(_mm256_cmpeq_epi16(_mm256_min_epu16(a, b), b), _mm256_cmpeq_epi16(b, b)); + } else if constexpr (use_avx2 && c == category::int8x32) return _mm256_cmpgt_epi8(b, a); - else if constexpr (use_avx2 && c == category::uint8x32) return _mm256_xor_si256( - _mm256_cmpeq_epi8( - _mm256_min_epu8(a, b), - b), - _mm256_cmpeq_epi8(b, b)); + else if constexpr (use_avx2 && c == category::uint8x32) + { + return _mm256_xor_si256(_mm256_cmpeq_epi8(_mm256_min_epu8(a, b), b), _mm256_cmpeq_epi8(b, b)); + } else if constexpr (c == category::int32x4) return _mm_cmplt_epi32(a, b); else if constexpr (c == category::int16x8) return _mm_cmplt_epi16(a, b); else if constexpr (c == category::int8x16) return _mm_cmplt_epi8(a, b); From 57be8754f117591fe68af116a032220250c9113e Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Fri, 17 Jan 2025 16:03:46 +0100 Subject: [PATCH 12/19] prevent calling is_less with O::definitely at the API level --- include/eve/module/core/regular/impl/is_less.hpp | 2 -- include/eve/module/core/regular/is_less.hpp | 11 +++++++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/include/eve/module/core/regular/impl/is_less.hpp b/include/eve/module/core/regular/impl/is_less.hpp index b46b793637..f39a490e29 100644 --- a/include/eve/module/core/regular/impl/is_less.hpp +++ b/include/eve/module/core/regular/impl/is_less.hpp @@ -18,8 +18,6 @@ namespace eve::detail } else if constexpr (O::contains(definitely)) { - static_assert(floating_value && floating_value, "[EVE] eve::is_less[definitely] only accepts floating point values."); - using w_t = common_value_t; auto aa = w_t{a}; auto bb = w_t{b}; diff --git a/include/eve/module/core/regular/is_less.hpp b/include/eve/module/core/regular/is_less.hpp index 6ff0ba4da0..5146a68327 100644 --- a/include/eve/module/core/regular/is_less.hpp +++ b/include/eve/module/core/regular/is_less.hpp @@ -25,14 +25,21 @@ namespace eve template struct is_less_t : elementwise_callable { - template + template constexpr EVE_FORCEINLINE common_logical_t operator()(T a, U b) const - requires compatible_arithmetic_values + requires (compatible_arithmetic_values && Options::contains(definitely)) { // static_assert( valid_tolerance, Options>::value, "[eve::is_less] simd tolerance requires at least one simd parameter." ); return EVE_DISPATCH_CALL(a, b); } + template + constexpr EVE_FORCEINLINE common_logical_t operator()(T a, U b) const + requires (compatible_arithmetic_values && !Options::contains(definitely)) + { + return EVE_DISPATCH_CALL(a, b); + } + EVE_CALLABLE_OBJECT(is_less_t, is_less_); }; From 8bc17fccb148b534855df030f17be41786f95461 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Mon, 20 Jan 2025 14:34:11 +0100 Subject: [PATCH 13/19] use eve functions instead of intrinsics in some places --- .../core/regular/impl/simd/x86/is_less.hpp | 51 ++++--------------- 1 file changed, 9 insertions(+), 42 deletions(-) diff --git a/include/eve/module/core/regular/impl/simd/x86/is_less.hpp b/include/eve/module/core/regular/impl/simd/x86/is_less.hpp index 23660057c2..1465ccd66f 100644 --- a/include/eve/module/core/regular/impl/simd/x86/is_less.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/is_less.hpp @@ -87,61 +87,28 @@ namespace eve::detail if constexpr (use_avx2 && c == category::int64x4) return _mm256_cmpgt_epi64(b, a); else if constexpr (use_avx2 && c == category::uint64x4) return unsigned_cmp(a, b); else if constexpr (use_avx2 && c == category::int32x8) return _mm256_cmpgt_epi32(b, a); - else if constexpr (use_avx2 && c == category::uint32x8) - { - return _mm256_xor_si256(_mm256_cmpeq_epi32(_mm256_min_epu32(a, b), b), _mm256_cmpeq_epi32(b, b)); - } + else if constexpr (use_avx2 && c == category::uint32x8) return eve::min(a, b) != b; else if constexpr (use_avx2 && c == category::int16x16) return _mm256_cmpgt_epi16(b, a); - else if constexpr (use_avx2 && c == category::uint16x16) - { - return _mm256_xor_si256(_mm256_cmpeq_epi16(_mm256_min_epu16(a, b), b), _mm256_cmpeq_epi16(b, b)); - } + else if constexpr (use_avx2 && c == category::uint16x16) return eve::min(a, b) != b; else if constexpr (use_avx2 && c == category::int8x32) return _mm256_cmpgt_epi8(b, a); - else if constexpr (use_avx2 && c == category::uint8x32) - { - return _mm256_xor_si256(_mm256_cmpeq_epi8(_mm256_min_epu8(a, b), b), _mm256_cmpeq_epi8(b, b)); - } + else if constexpr (use_avx2 && c == category::uint8x32) return eve::min(a, b) != b; else if constexpr (c == category::int32x4) return _mm_cmplt_epi32(a, b); else if constexpr (c == category::int16x8) return _mm_cmplt_epi16(a, b); else if constexpr (c == category::int8x16) return _mm_cmplt_epi8(a, b); else if constexpr (c == category::uint32x4) { - if constexpr (use_sse4_1) - { - // offers better codegen on GCC and for subvectors on clang - // force generation of the v/pxor mask without needing a memory load - return _mm_xor_si128(_mm_cmpeq_epi32(_mm_min_epu32(a, b), b), _mm_cmpeq_epi32(b, b)); - } - else - { - return unsigned_cmp(a, b); - } + if constexpr (use_sse4_1) return eve::min(a, b) != b; + else return unsigned_cmp(a, b); } else if constexpr (c == category::uint16x8) { - if constexpr (use_sse4_1) - { - // offers better codegen on GCC and for subvectors on clang - // force generation of the v/pxor mask without needing a memory load - return _mm_xor_si128(_mm_cmpeq_epi16(_mm_min_epu16(a, b), b), _mm_cmpeq_epi16(b, b)); - } - else - { - return unsigned_cmp(a, b); - } + if constexpr (use_sse4_1) return eve::min(a, b) != b; + else return unsigned_cmp(a, b); } else if constexpr (c == category::uint8x16) { - if constexpr (use_sse4_1) - { - // offers better codegen on GCC and for subvectors on clang - // force generation of the v/pxor mask without needing a memory load - return _mm_xor_si128(_mm_cmpeq_epi8(_mm_min_epu8(a, b), b), _mm_cmpeq_epi8(b, b)); - } - else - { - return unsigned_cmp(a, b); - } + if constexpr (use_sse4_1) return eve::min(a, b) != b; + else return unsigned_cmp(a, b); } else return map(lt, a, b); } From 45810b084f7ab92961b309c234914e5a8c62c304 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Thu, 23 Jan 2025 09:29:26 +0100 Subject: [PATCH 14/19] fixed as_logical for product_types --- .../detail/function/simd/common/friends.hpp | 6 +- include/eve/detail/logical_tie_breaker.hpp | 75 +++++++++++++++++++ .../eve/module/core/regular/impl/is_less.hpp | 2 +- include/eve/traits/as_logical.hpp | 10 +-- include/eve/traits/common_value.hpp | 38 +--------- test/unit/meta/traits/as_logical.cpp | 4 +- test/unit/meta/traits/common_logical.cpp | 4 +- 7 files changed, 90 insertions(+), 49 deletions(-) create mode 100644 include/eve/detail/logical_tie_breaker.hpp diff --git a/include/eve/detail/function/simd/common/friends.hpp b/include/eve/detail/function/simd/common/friends.hpp index 073e6e6752..a516f5146e 100644 --- a/include/eve/detail/function/simd/common/friends.hpp +++ b/include/eve/detail/function/simd/common/friends.hpp @@ -39,7 +39,7 @@ namespace eve::detail } else { - return convert(v.storage() == w.storage(), as_element>()); + return v.storage() == w.storage(); } } @@ -77,7 +77,7 @@ namespace eve::detail } else { - return convert(v.storage() != w.storage(), as_element>()); + return v.storage() != w.storage(); } } @@ -132,7 +132,7 @@ namespace eve::detail { if constexpr( product_type ) { - return convert(kumi::to_tuple(v) >= kumi::to_tuple(w), as_element>()); + return kumi::to_tuple(v) >= kumi::to_tuple(w); } else { diff --git a/include/eve/detail/logical_tie_breaker.hpp b/include/eve/detail/logical_tie_breaker.hpp new file mode 100644 index 0000000000..2b5d2b5ead --- /dev/null +++ b/include/eve/detail/logical_tie_breaker.hpp @@ -0,0 +1,75 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include + +#include + +namespace eve +{ + template + struct as_logical; +} + +namespace eve::detail +{ + template + inline consteval auto logical_tie_breaker_impl() + { + using ea_t = as_arithmetic_t>; + using eb_t = as_arithmetic_t>; + + if constexpr (std::same_as) + { + return A{}; + } + // smallest type + else if constexpr (sizeof(ea_t) != sizeof(eb_t)) + { + if constexpr (sizeof(ea_t) < sizeof(eb_t)) return A{}; + else return B{}; + } + // unsigned first + else if constexpr (signed_value != signed_value) + { + if constexpr (signed_value) return B{}; + else return A{}; + } + // integral first + else if constexpr (integral_value != integral_value) + { + if constexpr (integral_value) return A{}; + else return B{}; + } + // both types have the same size, signedness and integral-ness, they are functionally the same. + else + { + // handles the (long, long long) and (unsigned long, unsigned long long) cases + if constexpr (std::same_as) return A{}; + else return B{}; + } + } + + template + struct logical_tie_breaker + { + using type = typename logical_tie_breaker::type, Ts...>::type; + }; + + template + struct logical_tie_breaker + { + using type = decltype(logical_tie_breaker_impl::type, typename as_logical::type>()); + }; + + template + using logical_tie_breaker_t = typename logical_tie_breaker::type; +} diff --git a/include/eve/module/core/regular/impl/is_less.hpp b/include/eve/module/core/regular/impl/is_less.hpp index f39a490e29..603f1331d4 100644 --- a/include/eve/module/core/regular/impl/is_less.hpp +++ b/include/eve/module/core/regular/impl/is_less.hpp @@ -14,7 +14,7 @@ namespace eve::detail { if constexpr (product_type && product_type) { - return convert(kumi::to_tuple(a) < kumi::to_tuple(b), as_element>()); + return kumi::to_tuple(a) < kumi::to_tuple(b); } else if constexpr (O::contains(definitely)) { diff --git a/include/eve/traits/as_logical.hpp b/include/eve/traits/as_logical.hpp index e2811b1f20..4679d8162a 100644 --- a/include/eve/traits/as_logical.hpp +++ b/include/eve/traits/as_logical.hpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace eve { @@ -31,15 +32,14 @@ namespace eve using type = logical; }; - - template - struct as_logical : as_logical< kumi::element_t<0,T> > - {}; - template struct as_logical : as_logical< translate_t > {}; + template + struct as_logical : as_logical> + {}; + template using as_logical_t = typename as_logical::type; } diff --git a/include/eve/traits/common_value.hpp b/include/eve/traits/common_value.hpp index 387cf5bf47..722ea26ab8 100644 --- a/include/eve/traits/common_value.hpp +++ b/include/eve/traits/common_value.hpp @@ -11,7 +11,6 @@ #include #include #include -#include #include namespace eve::detail @@ -92,39 +91,6 @@ namespace eve::detail { using type = T; - template - static constexpr auto tie_breaker() - { - using ea_t = as_arithmetic_t>; - using eb_t = as_arithmetic_t>; - - // smallest type - if constexpr (sizeof(ea_t) != sizeof(eb_t)) - { - if constexpr (sizeof(ea_t) < sizeof(eb_t)) return find_common_logical_reducer{}; - else return find_common_logical_reducer{}; - } - // unsigned first - else if constexpr (signed_value != signed_value) - { - if constexpr (signed_value) return find_common_logical_reducer{}; - else return find_common_logical_reducer{}; - } - // integral first - else if constexpr (integral_value != integral_value) - { - if constexpr (integral_value) return find_common_logical_reducer{}; - else return find_common_logical_reducer{}; - } - // both types have the same size, signedness and integral-ness, they are functionally the same. - else - { - // handles the (long, long long) and (unsigned long, unsigned long long) cases - if constexpr (std::same_as) return find_common_logical_reducer{}; - else return find_common_logical_reducer{}; - } - } - template friend auto operator%( find_common_logical_reducer, @@ -135,11 +101,11 @@ namespace eve::detail else if constexpr (std::same_as) return find_common_logical_reducer{}; else if constexpr (simd_value) { - if constexpr (simd_value) return tie_breaker(); + if constexpr (simd_value) return find_common_logical_reducer>{}; else return find_common_logical_reducer{}; } else if constexpr (simd_value) return find_common_logical_reducer{}; - else if constexpr (scalar_value && scalar_value) return tie_breaker(); + else if constexpr (scalar_value && scalar_value) return find_common_logical_reducer>{}; else return find_common_logical_reducer{}; } }; diff --git a/test/unit/meta/traits/as_logical.cpp b/test/unit/meta/traits/as_logical.cpp index 681eaa84ec..e2e37757e5 100644 --- a/test/unit/meta/traits/as_logical.cpp +++ b/test/unit/meta/traits/as_logical.cpp @@ -18,7 +18,7 @@ TTS_CASE_TPL( "Check as_logical on scalar", ::tts::arithmetic_types ) TTS_TYPE_IS(as_logical_t , logical); TTS_TYPE_IS(as_logical_t> , logical); - TTS_TYPE_IS((as_logical_t>), logical); + TTS_TYPE_IS((as_logical_t>), logical); }; TTS_CASE_TPL("Check as_wide on wide", ::tts::arithmetic_types ) @@ -31,5 +31,5 @@ TTS_CASE_TPL("Check as_wide on wide", ::tts::arithmetic_types ) TTS_TYPE_IS(as_logical_t> , logical>); TTS_TYPE_IS(as_logical_t>> , logical>); - TTS_TYPE_IS((as_logical_t,fixed<4>>>) , (logical>>)); + TTS_TYPE_IS((as_logical_t, fixed<4>>>) , (logical>>)); }; diff --git a/test/unit/meta/traits/common_logical.cpp b/test/unit/meta/traits/common_logical.cpp index 5254e5016e..003919cef5 100644 --- a/test/unit/meta/traits/common_logical.cpp +++ b/test/unit/meta/traits/common_logical.cpp @@ -124,6 +124,6 @@ TTS_CASE("eve::common_logical on tuples") using t1 = kumi::tuple; using t2 = kumi::tuple; - TTS_TYPE_IS((eve::common_logical_t), eve::logical); - TTS_TYPE_IS((eve::common_logical_t, eve::wide>), eve::logical>); + TTS_TYPE_IS((eve::common_logical_t), eve::logical); + TTS_TYPE_IS((eve::common_logical_t>, eve::wide>>), (eve::logical>>)); }; From 0ac17c5101d0a443faf00f4e4563f1f251a8897d Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Thu, 23 Jan 2025 11:04:33 +0100 Subject: [PATCH 15/19] fix for one-element product_types --- include/eve/detail/logical_tie_breaker.hpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/include/eve/detail/logical_tie_breaker.hpp b/include/eve/detail/logical_tie_breaker.hpp index 2b5d2b5ead..f5060be592 100644 --- a/include/eve/detail/logical_tie_breaker.hpp +++ b/include/eve/detail/logical_tie_breaker.hpp @@ -58,8 +58,11 @@ namespace eve::detail } } + template + struct logical_tie_breaker; + template - struct logical_tie_breaker + struct logical_tie_breaker { using type = typename logical_tie_breaker::type, Ts...>::type; }; @@ -70,6 +73,12 @@ namespace eve::detail using type = decltype(logical_tie_breaker_impl::type, typename as_logical::type>()); }; + template + struct logical_tie_breaker + { + using type = typename as_logical::type; + }; + template using logical_tie_breaker_t = typename logical_tie_breaker::type; } From df2a76bbb95552fdcf68fdc450df8292eeb2548e Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Thu, 23 Jan 2025 17:29:26 +0100 Subject: [PATCH 16/19] changed the impl of as_logical --- include/eve/detail/logical_tie_breaker.hpp | 84 ---------------------- include/eve/traits/as_logical.hpp | 7 +- include/eve/traits/common_value.hpp | 38 +++++++++- 3 files changed, 41 insertions(+), 88 deletions(-) delete mode 100644 include/eve/detail/logical_tie_breaker.hpp diff --git a/include/eve/detail/logical_tie_breaker.hpp b/include/eve/detail/logical_tie_breaker.hpp deleted file mode 100644 index f5060be592..0000000000 --- a/include/eve/detail/logical_tie_breaker.hpp +++ /dev/null @@ -1,84 +0,0 @@ -//================================================================================================== -/* - EVE - Expressive Vector Engine - Copyright : EVE Project Contributors - SPDX-License-Identifier: BSL-1.0 -*/ -//================================================================================================== -#pragma once - -#include -#include -#include - -#include - -namespace eve -{ - template - struct as_logical; -} - -namespace eve::detail -{ - template - inline consteval auto logical_tie_breaker_impl() - { - using ea_t = as_arithmetic_t>; - using eb_t = as_arithmetic_t>; - - if constexpr (std::same_as) - { - return A{}; - } - // smallest type - else if constexpr (sizeof(ea_t) != sizeof(eb_t)) - { - if constexpr (sizeof(ea_t) < sizeof(eb_t)) return A{}; - else return B{}; - } - // unsigned first - else if constexpr (signed_value != signed_value) - { - if constexpr (signed_value) return B{}; - else return A{}; - } - // integral first - else if constexpr (integral_value != integral_value) - { - if constexpr (integral_value) return A{}; - else return B{}; - } - // both types have the same size, signedness and integral-ness, they are functionally the same. - else - { - // handles the (long, long long) and (unsigned long, unsigned long long) cases - if constexpr (std::same_as) return A{}; - else return B{}; - } - } - - template - struct logical_tie_breaker; - - template - struct logical_tie_breaker - { - using type = typename logical_tie_breaker::type, Ts...>::type; - }; - - template - struct logical_tie_breaker - { - using type = decltype(logical_tie_breaker_impl::type, typename as_logical::type>()); - }; - - template - struct logical_tie_breaker - { - using type = typename as_logical::type; - }; - - template - using logical_tie_breaker_t = typename logical_tie_breaker::type; -} diff --git a/include/eve/traits/as_logical.hpp b/include/eve/traits/as_logical.hpp index 4679d8162a..b19ca794d2 100644 --- a/include/eve/traits/as_logical.hpp +++ b/include/eve/traits/as_logical.hpp @@ -10,10 +10,13 @@ #include #include #include -#include namespace eve { + // forward declare common_logical + template + struct common_logical; + template struct as_logical { @@ -37,7 +40,7 @@ namespace eve {}; template - struct as_logical : as_logical> + struct as_logical: kumi::apply_traits {}; template diff --git a/include/eve/traits/common_value.hpp b/include/eve/traits/common_value.hpp index 722ea26ab8..387cf5bf47 100644 --- a/include/eve/traits/common_value.hpp +++ b/include/eve/traits/common_value.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include namespace eve::detail @@ -91,6 +92,39 @@ namespace eve::detail { using type = T; + template + static constexpr auto tie_breaker() + { + using ea_t = as_arithmetic_t>; + using eb_t = as_arithmetic_t>; + + // smallest type + if constexpr (sizeof(ea_t) != sizeof(eb_t)) + { + if constexpr (sizeof(ea_t) < sizeof(eb_t)) return find_common_logical_reducer{}; + else return find_common_logical_reducer{}; + } + // unsigned first + else if constexpr (signed_value != signed_value) + { + if constexpr (signed_value) return find_common_logical_reducer{}; + else return find_common_logical_reducer{}; + } + // integral first + else if constexpr (integral_value != integral_value) + { + if constexpr (integral_value) return find_common_logical_reducer{}; + else return find_common_logical_reducer{}; + } + // both types have the same size, signedness and integral-ness, they are functionally the same. + else + { + // handles the (long, long long) and (unsigned long, unsigned long long) cases + if constexpr (std::same_as) return find_common_logical_reducer{}; + else return find_common_logical_reducer{}; + } + } + template friend auto operator%( find_common_logical_reducer, @@ -101,11 +135,11 @@ namespace eve::detail else if constexpr (std::same_as) return find_common_logical_reducer{}; else if constexpr (simd_value) { - if constexpr (simd_value) return find_common_logical_reducer>{}; + if constexpr (simd_value) return tie_breaker(); else return find_common_logical_reducer{}; } else if constexpr (simd_value) return find_common_logical_reducer{}; - else if constexpr (scalar_value && scalar_value) return find_common_logical_reducer>{}; + else if constexpr (scalar_value && scalar_value) return tie_breaker(); else return find_common_logical_reducer{}; } }; From 00f1bd3389c8d096feb8bbd086e12196c0fbfb28 Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Thu, 23 Jan 2025 19:21:17 +0100 Subject: [PATCH 17/19] simplified generic backend --- .../eve/module/core/regular/impl/is_less.hpp | 32 +++++-------------- 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/include/eve/module/core/regular/impl/is_less.hpp b/include/eve/module/core/regular/impl/is_less.hpp index 603f1331d4..3e9256c6cd 100644 --- a/include/eve/module/core/regular/impl/is_less.hpp +++ b/include/eve/module/core/regular/impl/is_less.hpp @@ -9,40 +9,24 @@ namespace eve::detail { - template - EVE_FORCEINLINE constexpr common_logical_t is_less_(EVE_REQUIRES(cpu_), O const& o, T a, U b) noexcept + template + EVE_FORCEINLINE constexpr common_logical_t is_less_(EVE_REQUIRES(cpu_), O const& o, T a, T b) noexcept { - if constexpr (product_type && product_type) + if constexpr (product_type) { return kumi::to_tuple(a) < kumi::to_tuple(b); } else if constexpr (O::contains(definitely)) { - using w_t = common_value_t; - auto aa = w_t{a}; - auto bb = w_t{b}; + auto tol = o[definitely].value(T{}); - auto tol = o[definitely].value(w_t{}); - - if constexpr (integral_value) return aa < eve::prev(bb, tol); - else return aa < fam(bb, -tol, eve::max(eve::abs(aa), eve::abs(bb))); + if constexpr (integral_value) return a < eve::prev(b, tol); + else return a < fam(b, -tol, eve::max(eve::abs(a), eve::abs(b))); } else { - if constexpr (scalar_value) - { - if constexpr (scalar_value) return common_logical_t(a < b); - // because of the auto-conversion rules in elementwise_callable, we can assume that T will have the type - // element_type at that point and that the appropriate wide x S backend doesn't exist. - else return is_less(U{a}, b); - } - else - { - if constexpr (simd_value) return map(is_less, a, b); - // because of the auto-conversion rules in elementwise_callable, we can assume that U will have the type - // element_type at that point and that the appropriate wide x S backend doesn't exist. - else return is_less(a, T{b}); - } + if constexpr (scalar_value) return as_logical_t(a < b); + else return map([](auto e, auto f) { return e < f; }, a, b); } } } From f20ab2fa864cd29212e4a0bf29b69f8347ada79a Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Fri, 24 Jan 2025 12:36:35 +0100 Subject: [PATCH 18/19] review comments --- include/eve/module/core/regular/impl/is_less.hpp | 14 ++++++++------ include/eve/module/core/regular/is_less.hpp | 11 ++--------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/include/eve/module/core/regular/impl/is_less.hpp b/include/eve/module/core/regular/impl/is_less.hpp index 3e9256c6cd..b541ee6388 100644 --- a/include/eve/module/core/regular/impl/is_less.hpp +++ b/include/eve/module/core/regular/impl/is_less.hpp @@ -10,19 +10,21 @@ namespace eve::detail { template - EVE_FORCEINLINE constexpr common_logical_t is_less_(EVE_REQUIRES(cpu_), O const& o, T a, T b) noexcept + EVE_FORCEINLINE constexpr as_logical_t is_less_(EVE_REQUIRES(cpu_), O const& o, T a, T b) noexcept { - if constexpr (product_type) - { - return kumi::to_tuple(a) < kumi::to_tuple(b); - } - else if constexpr (O::contains(definitely)) + if constexpr (O::contains(definitely)) { + static_assert(floating_value, "[eve::is_less] The definitely option is only supported for floating types."); + auto tol = o[definitely].value(T{}); if constexpr (integral_value) return a < eve::prev(b, tol); else return a < fam(b, -tol, eve::max(eve::abs(a), eve::abs(b))); } + else if constexpr (product_type) + { + return kumi::to_tuple(a) < kumi::to_tuple(b); + } else { if constexpr (scalar_value) return as_logical_t(a < b); diff --git a/include/eve/module/core/regular/is_less.hpp b/include/eve/module/core/regular/is_less.hpp index 5146a68327..a95a59f0eb 100644 --- a/include/eve/module/core/regular/is_less.hpp +++ b/include/eve/module/core/regular/is_less.hpp @@ -25,18 +25,11 @@ namespace eve template struct is_less_t : elementwise_callable { - template - constexpr EVE_FORCEINLINE common_logical_t operator()(T a, U b) const - requires (compatible_arithmetic_values && Options::contains(definitely)) - { - // static_assert( valid_tolerance, Options>::value, "[eve::is_less] simd tolerance requires at least one simd parameter." ); - return EVE_DISPATCH_CALL(a, b); - } - template constexpr EVE_FORCEINLINE common_logical_t operator()(T a, U b) const - requires (compatible_arithmetic_values && !Options::contains(definitely)) + requires (compatible_arithmetic_values) { + // static_assert( valid_tolerance, Options>::value, "[eve::is_less] simd tolerance requires at least one simd parameter." ); return EVE_DISPATCH_CALL(a, b); } From 65ffa75e0706b5a23d333fb7a800497fff90368c Mon Sep 17 00:00:00 2001 From: SadiinsoSnowfall Date: Fri, 24 Jan 2025 14:01:33 +0100 Subject: [PATCH 19/19] review comments --- include/eve/module/core/regular/impl/is_less.hpp | 2 -- include/eve/module/core/regular/is_less.hpp | 4 ++++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/eve/module/core/regular/impl/is_less.hpp b/include/eve/module/core/regular/impl/is_less.hpp index b541ee6388..2f80ca1e0f 100644 --- a/include/eve/module/core/regular/impl/is_less.hpp +++ b/include/eve/module/core/regular/impl/is_less.hpp @@ -14,8 +14,6 @@ namespace eve::detail { if constexpr (O::contains(definitely)) { - static_assert(floating_value, "[eve::is_less] The definitely option is only supported for floating types."); - auto tol = o[definitely].value(T{}); if constexpr (integral_value) return a < eve::prev(b, tol); diff --git a/include/eve/module/core/regular/is_less.hpp b/include/eve/module/core/regular/is_less.hpp index a95a59f0eb..63425af819 100644 --- a/include/eve/module/core/regular/is_less.hpp +++ b/include/eve/module/core/regular/is_less.hpp @@ -29,6 +29,10 @@ namespace eve constexpr EVE_FORCEINLINE common_logical_t operator()(T a, U b) const requires (compatible_arithmetic_values) { + if constexpr (Options::contains(definitely)) + { + static_assert(floating_value, "[eve::is_less] The definitely option is only supported for floating types."); + } // static_assert( valid_tolerance, Options>::value, "[eve::is_less] simd tolerance requires at least one simd parameter." ); return EVE_DISPATCH_CALL(a, b); }