diff --git a/include/eve/arch/cpu/wide.hpp b/include/eve/arch/cpu/wide.hpp index 1ec6953396..f6d6238efe 100644 --- a/include/eve/arch/cpu/wide.hpp +++ b/include/eve/arch/cpu/wide.hpp @@ -861,32 +861,32 @@ namespace eve friend EVE_FORCEINLINE auto operator!=(S v, wide w) noexcept { return w != v; } //! @brief Element-wise less-than comparison between eve::wide - friend EVE_FORCEINLINE auto operator<(wide v, wide w) noexcept + friend EVE_FORCEINLINE auto operator<(wide a, wide b) noexcept #if !defined(EVE_DOXYGEN_INVOKED) requires(supports_ordering_v) #endif { - return detail::self_less(v, w); + return is_less(a, b); } //! @brief Element-wise less-than comparison between a eve::wide and a scalar template - friend EVE_FORCEINLINE auto operator<(wide v, S w) noexcept + friend EVE_FORCEINLINE auto operator<(wide w, S s) noexcept #if !defined(EVE_DOXYGEN_INVOKED) requires(supports_ordering_v) #endif { - return v < wide {w}; + return is_less(w, s); } //! @brief Element-wise less-than comparison between a scalar and a eve::wide template - friend EVE_FORCEINLINE auto operator<(S v, wide w) noexcept + friend EVE_FORCEINLINE auto operator<(S s, wide w) noexcept #if !defined(EVE_DOXYGEN_INVOKED) requires(supports_ordering_v) #endif { - return wide {v} < w; + return is_less(s, w); } //! @brief Element-wise greater-than comparison between eve::wide diff --git a/include/eve/concept/compatible.hpp b/include/eve/concept/compatible.hpp index 8ecd6b23eb..a824c441d0 100644 --- a/include/eve/concept/compatible.hpp +++ b/include/eve/concept/compatible.hpp @@ -31,19 +31,15 @@ namespace eve concept size_compatible_values = size_compatible_to || size_compatible_to; - - template - concept same_value_type = std::same_as< element_type_t> - , element_type_t> - >; + template + concept same_element_type = (std::same_as>, element_type_t>> && ...); template concept different_value_type = !std::same_as, element_type_t>; - template concept index_compatible_values = integral_value && floating_value && size_compatible_values; - - + template + concept compatible_arithmetic_values = scalar_value || scalar_value || std::same_as; } diff --git a/include/eve/concept/value.hpp b/include/eve/concept/value.hpp index 895ef0ee48..53a161ce72 100644 --- a/include/eve/concept/value.hpp +++ b/include/eve/concept/value.hpp @@ -151,6 +151,25 @@ namespace eve //! @} //================================================================================================ + //================================================================================================ + //! @ingroup simd_concepts + //! @{ + //! @concept arithmetic_value + //! @brief The concept `plain_value` is satisfied if and only if T satisfies + //! `eve::arithmetic_simd_value` or `eve::arithmetic_scalar_value`. + //! + //! @groupheader{Examples} + //! - `int` + //! - `eve::wide` + //! - `kumi::tuple` + //! - `eve::wide>` + //================================================================================================ + template + concept arithmetic_value = arithmetic_simd_value || arithmetic_scalar_value; + //================================================================================================ + //! @} + //================================================================================================ + //================================================================================================ //! @ingroup simd_concepts //! @{ diff --git a/include/eve/detail/function/simd/arm/neon/friends.hpp b/include/eve/detail/function/simd/arm/neon/friends.hpp index 26f9d9212d..b0226d408c 100644 --- a/include/eve/detail/function/simd/arm/neon/friends.hpp +++ b/include/eve/detail/function/simd/arm/neon/friends.hpp @@ -62,45 +62,6 @@ namespace eve::detail return !(v == w); } - //================================================================================================ - // operator!= implementation - //================================================================================================ - template - EVE_FORCEINLINE logical> self_less ( wide v - , wide w - ) noexcept - requires arm_abi> - { - constexpr auto cat = categorize>(); - - if constexpr( cat == category::int32x4 ) return vcltq_s32(v, w); - else if constexpr( cat == category::int16x8 ) return vcltq_s16(v, w); - else if constexpr( cat == category::int8x16 ) return vcltq_s8(v, w); - else if constexpr( cat == category::uint32x4 ) return vcltq_u32(v, w); - else if constexpr( cat == category::uint16x8 ) return vcltq_u16(v, w); - else if constexpr( cat == category::uint8x16 ) return vcltq_u8(v, w); - else if constexpr( cat == category::float32x4) return vcltq_f32(v, w); - else if constexpr( cat == category::int32x2 ) return vclt_s32(v, w); - else if constexpr( cat == category::int16x4 ) return vclt_s16(v, w); - else if constexpr( cat == category::int8x8 ) return vclt_s8(v, w); - else if constexpr( cat == category::uint32x2 ) return vclt_u32(v, w); - else if constexpr( cat == category::uint16x4 ) return vclt_u16(v, w); - else if constexpr( cat == category::uint8x8 ) return vclt_u8(v, w); - else if constexpr( cat == category::float32x2) return vclt_f32(v, w); - else if constexpr( current_api >= asimd) - { - if constexpr( cat == category::float64x1) return vclt_f64(v, w); - else if constexpr( cat == category::int64x1) return vclt_s64(v, w); - else if constexpr( cat == category::uint64x1) return vclt_u64(v, w); - else if constexpr( cat == category::float64x2) return vcltq_f64(v, w); - else if constexpr( cat == category::int64x2) return vcltq_s64(v, w); - else if constexpr( cat == category::uint64x2) return vcltq_u64(v, w); - } - else if constexpr( sizeof(T) == 8 ) - return map([](E const& e, E const& f){ return as_logical_t(e < f); }, v, w); - - } - template EVE_FORCEINLINE logical> self_greater( wide v , wide w diff --git a/include/eve/detail/function/simd/arm/sve/friends.hpp b/include/eve/detail/function/simd/arm/sve/friends.hpp index 4c3432d26f..a9b0029a37 100644 --- a/include/eve/detail/function/simd/arm/sve/friends.hpp +++ b/include/eve/detail/function/simd/arm/sve/friends.hpp @@ -22,11 +22,6 @@ EVE_FORCEINLINE auto self_neq(wide v, wide w) noexcept -> as_logical_t> requires sve_abi> { return svcmpne(sve_true(), v, w); } -template -EVE_FORCEINLINE auto -self_less(wide v, wide w) noexcept -> as_logical_t> -requires sve_abi> { return svcmplt(sve_true(), v, w); } - template EVE_FORCEINLINE auto self_greater(wide v, wide w) noexcept -> as_logical_t> diff --git a/include/eve/detail/function/simd/common/friends.hpp b/include/eve/detail/function/simd/common/friends.hpp index 6caa82a3a8..a516f5146e 100644 --- a/include/eve/detail/function/simd/common/friends.hpp +++ b/include/eve/detail/function/simd/common/friends.hpp @@ -39,7 +39,7 @@ namespace eve::detail } else { - return convert(v.storage() == w.storage(), as_element>()); + return v.storage() == w.storage(); } } @@ -77,7 +77,7 @@ namespace eve::detail } else { - return convert(v.storage() != w.storage(), as_element>()); + return v.storage() != w.storage(); } } @@ -99,20 +99,6 @@ namespace eve::detail //================================================================================================ // Ordering operators - template - EVE_FORCEINLINE auto self_less(Wide const& v,Wide const& w) noexcept - { - if constexpr( product_type ) - { - return convert(kumi::to_tuple(v) < kumi::to_tuple(w), as_element>()); - } - else - { - constexpr auto lt = [](E const& e, E const& f) { return as_logical_t(e < f); }; - return apply_over(lt, v, w); - } - } - template EVE_FORCEINLINE auto self_leq(Wide const& v,Wide const& w) noexcept { @@ -146,7 +132,7 @@ namespace eve::detail { if constexpr( product_type ) { - return convert(kumi::to_tuple(v) >= kumi::to_tuple(w), as_element>()); + return kumi::to_tuple(v) >= kumi::to_tuple(w); } else { diff --git a/include/eve/detail/function/simd/ppc/friends.hpp b/include/eve/detail/function/simd/ppc/friends.hpp index 6352c14d4c..3cbc940e3c 100644 --- a/include/eve/detail/function/simd/ppc/friends.hpp +++ b/include/eve/detail/function/simd/ppc/friends.hpp @@ -28,13 +28,6 @@ namespace eve::detail return logical>(vec_cmpne(v.storage(), w.storage())); } - template - EVE_FORCEINLINE auto self_less(wide const &v, wide const &w) noexcept - requires ppc_abi> - { - return logical>(vec_cmplt(v.storage(), w.storage())); - } - template EVE_FORCEINLINE auto self_greater(wide const &v, wide const &w) noexcept requires ppc_abi> diff --git a/include/eve/detail/function/simd/riscv/friends.hpp b/include/eve/detail/function/simd/riscv/friends.hpp index f501865597..4d2476c49c 100644 --- a/include/eve/detail/function/simd/riscv/friends.hpp +++ b/include/eve/detail/function/simd/riscv/friends.hpp @@ -44,37 +44,6 @@ requires rvv_abi> return self_greater_impl(lhs, rhs); } -template -EVE_FORCEINLINE auto -self_less_impl(wide lhs, U rhs) noexcept -> logical> -requires rvv_abi> && (std::same_as, U> || scalar_value) -{ - if constexpr( scalar_value && !std::same_as ) return self_less(lhs, static_cast(rhs)); - else - { - constexpr auto c = categorize>(); - if constexpr( match(c, category::int_) ) return __riscv_vmslt(lhs, rhs, N::value); - else if constexpr( match(c, category::uint_) ) return __riscv_vmsltu(lhs, rhs, N::value); - else if constexpr( match(c, category::float_) ) return __riscv_vmflt(lhs, rhs, N::value); - } -} - -template -EVE_FORCEINLINE auto -self_less(wide lhs, wide rhs) noexcept -> logical> -requires rvv_abi> -{ - return self_less_impl(lhs, rhs); -} - -template -EVE_FORCEINLINE auto -self_less(wide lhs, std::convertible_to auto rhs) noexcept -> logical> -requires rvv_abi> -{ - return self_less_impl(lhs, rhs); -} - template EVE_FORCEINLINE auto self_geq_impl(wide lhs, U rhs) noexcept -> logical> diff --git a/include/eve/detail/function/simd/x86/friends.hpp b/include/eve/detail/function/simd/x86/friends.hpp index 0e1aa20de5..03901edc7f 100644 --- a/include/eve/detail/function/simd/x86/friends.hpp +++ b/include/eve/detail/function/simd/x86/friends.hpp @@ -179,86 +179,6 @@ self_neq(logical> v, logical> w) noexcept requires x86_abi else { return bit_cast(v.bits() ^ w.bits(), as(v)); } } -//================================================================================================ -template -EVE_FORCEINLINE as_logical_t> - self_less(wide v, wide w) noexcept requires x86_abi> -{ - constexpr auto c = categorize>(); - constexpr auto f = to_integer(cmp_flt::lt_oq); - - if constexpr( current_api >= avx512 ) - { - if constexpr( c == category::float32x16 ) return mask16 {_mm512_cmp_ps_mask(v, w, f)}; - else if constexpr( c == category::float32x8 ) return mask8 {_mm256_cmp_ps_mask(v, w, f)}; - else if constexpr( c == category::float32x4 ) return mask8 {_mm_cmp_ps_mask(v, w, f)}; - else if constexpr( c == category::float64x8 ) return mask8 {_mm512_cmp_pd_mask(v, w, f)}; - else if constexpr( c == category::float64x4 ) return mask8 {_mm256_cmp_pd_mask(v, w, f)}; - else if constexpr( c == category::float64x2 ) return mask8 {_mm_cmp_pd_mask(v, w, f)}; - else if constexpr( c == category::uint64x8 ) return mask8 {_mm512_cmplt_epu64_mask(v, w)}; - else if constexpr( c == category::uint64x4 ) return mask8 {_mm256_cmplt_epu64_mask(v, w)}; - else if constexpr( c == category::uint64x2 ) return mask8 {_mm_cmplt_epu64_mask(v, w)}; - else if constexpr( c == category::uint32x16 ) return mask16 {_mm512_cmplt_epu32_mask(v, w)}; - else if constexpr( c == category::uint32x8 ) return mask8 {_mm256_cmplt_epu32_mask(v, w)}; - else if constexpr( c == category::uint32x4 ) return mask8 {_mm_cmplt_epu32_mask(v, w)}; - else if constexpr( c == category::uint16x32 ) return mask32 {_mm512_cmplt_epu16_mask(v, w)}; - else if constexpr( c == category::uint16x16 ) return mask16 {_mm256_cmplt_epu16_mask(v, w)}; - else if constexpr( c == category::uint16x8 ) return mask8 {_mm_cmplt_epu16_mask(v, w)}; - else if constexpr( c == category::uint8x64 ) return mask64 {_mm512_cmplt_epu8_mask(v, w)}; - else if constexpr( c == category::uint8x32 ) return mask32 {_mm256_cmplt_epu8_mask(v, w)}; - else if constexpr( c == category::uint8x16 ) return mask16 {_mm_cmplt_epu8_mask(v, w)}; - else if constexpr( c == category::int64x8 ) return mask8 {_mm512_cmplt_epi64_mask(v, w)}; - else if constexpr( c == category::int64x4 ) return mask8 {_mm256_cmplt_epi64_mask(v, w)}; - else if constexpr( c == category::int64x2 ) return mask8 {_mm_cmplt_epi64_mask(v, w)}; - else if constexpr( c == category::int32x16 ) return mask16 {_mm512_cmplt_epi32_mask(v, w)}; - else if constexpr( c == category::int32x8 ) return mask8 {_mm256_cmplt_epi32_mask(v, w)}; - else if constexpr( c == category::int32x4 ) return mask8 {_mm_cmplt_epi32_mask(v, w)}; - else if constexpr( c == category::int16x32 ) return mask32 {_mm512_cmplt_epi16_mask(v, w)}; - else if constexpr( c == category::int16x16 ) return mask16 {_mm256_cmplt_epi16_mask(v, w)}; - else if constexpr( c == category::int16x8 ) return mask8 {_mm_cmplt_epi16_mask(v, w)}; - else if constexpr( c == category::int8x64 ) return mask64 {_mm512_cmplt_epi8_mask(v, w)}; - else if constexpr( c == category::int8x32 ) return mask32 {_mm256_cmplt_epi8_mask(v, w)}; - else if constexpr( c == category::int8x16 ) return mask16 {_mm_cmplt_epi8_mask(v, w)}; - } - else - { - if constexpr( c == category::float32x8 ) return _mm256_cmp_ps(v, w, f); - else if constexpr( c == category::float64x4 ) return _mm256_cmp_pd(v, w, f); - else if constexpr( c == category::float32x4 ) return _mm_cmplt_ps(v, w); - else if constexpr( c == category::float64x2 ) return _mm_cmplt_pd(v, w); - else - { - constexpr auto use_avx2 = current_api >= avx2; - constexpr auto lt = [](E ev, E fv) { return as_logical_t(ev < fv); }; - - [[maybe_unused]] auto unsigned_cmp = [](auto vv, auto wv) - { - using l_t = logical>; - auto const sm = signmask(as, signed>>()); - return bit_cast((bit_cast(vv, as(sm)) - sm) < (bit_cast(wv, as(sm)) - sm), as {}); - }; - - if constexpr( use_avx2 && c == category::int64x4 ) return _mm256_cmpgt_epi64(w, v); - else if constexpr( use_avx2 && c == category::uint64x4 ) return unsigned_cmp(v, w); - else if constexpr( use_avx2 && c == category::int32x8 ) return _mm256_cmpgt_epi32(w, v); - else if constexpr( use_avx2 && c == category::uint32x8 ) return unsigned_cmp(v, w); - else if constexpr( use_avx2 && c == category::int16x16 ) return _mm256_cmpgt_epi16(w, v); - else if constexpr( use_avx2 && c == category::uint16x16 ) return unsigned_cmp(v, w); - else if constexpr( use_avx2 && c == category::int8x32 ) return _mm256_cmpgt_epi8(w, v); - else if constexpr( use_avx2 && c == category::uint8x32 ) return unsigned_cmp(v, w); - else if constexpr( c == category::int64x2 ) return map(lt, v, w); - else if constexpr( c == category::int32x4 ) return _mm_cmplt_epi32(v, w); - else if constexpr( c == category::int16x8 ) return _mm_cmplt_epi16(v, w); - else if constexpr( c == category::int8x16 ) return _mm_cmplt_epi8(v, w); - else if constexpr( c == category::uint64x2 ) return map(lt, v, w); - else if constexpr( c == category::uint32x4 ) return unsigned_cmp(v, w); - else if constexpr( c == category::uint16x8 ) return unsigned_cmp(v, w); - else if constexpr( c == category::uint8x16 ) return unsigned_cmp(v, w); - else return aggregate(lt, v, w); - } - } -} - //================================================================================================ template EVE_FORCEINLINE as_logical_t> diff --git a/include/eve/module/core/regular/impl/is_less.hpp b/include/eve/module/core/regular/impl/is_less.hpp new file mode 100644 index 0000000000..2f80ca1e0f --- /dev/null +++ b/include/eve/module/core/regular/impl/is_less.hpp @@ -0,0 +1,32 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +namespace eve::detail +{ + template + EVE_FORCEINLINE constexpr as_logical_t is_less_(EVE_REQUIRES(cpu_), O const& o, T a, T b) noexcept + { + if constexpr (O::contains(definitely)) + { + auto tol = o[definitely].value(T{}); + + if constexpr (integral_value) return a < eve::prev(b, tol); + else return a < fam(b, -tol, eve::max(eve::abs(a), eve::abs(b))); + } + else if constexpr (product_type) + { + return kumi::to_tuple(a) < kumi::to_tuple(b); + } + else + { + if constexpr (scalar_value) return as_logical_t(a < b); + else return map([](auto e, auto f) { return e < f; }, a, b); + } + } +} diff --git a/include/eve/module/core/regular/impl/simd/arm/neon/is_less.hpp b/include/eve/module/core/regular/impl/simd/arm/neon/is_less.hpp new file mode 100644 index 0000000000..e374b0e600 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/arm/neon/is_less.hpp @@ -0,0 +1,55 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include +#include + +namespace eve::detail +{ + template + EVE_FORCEINLINE logical> is_less_(EVE_REQUIRES(neon128_), O const& opts, wide a, wide b) noexcept + requires arm_abi> + { + if constexpr (O::contains(definitely)) + { + return is_less.behavior(cpu_{}, opts, a, b); + } + else + { + constexpr auto cat = categorize>(); + + if constexpr (cat == category::int32x4 ) return vcltq_s32(a, b); + else if constexpr (cat == category::int16x8 ) return vcltq_s16(a, b); + else if constexpr (cat == category::int8x16 ) return vcltq_s8(a, b); + else if constexpr (cat == category::uint32x4 ) return vcltq_u32(a, b); + else if constexpr (cat == category::uint16x8 ) return vcltq_u16(a, b); + else if constexpr (cat == category::uint8x16 ) return vcltq_u8(a, b); + else if constexpr (cat == category::float32x4) return vcltq_f32(a, b); + else if constexpr (cat == category::int32x2 ) return vclt_s32(a, b); + else if constexpr (cat == category::int16x4 ) return vclt_s16(a, b); + else if constexpr (cat == category::int8x8 ) return vclt_s8(a, b); + else if constexpr (cat == category::uint32x2 ) return vclt_u32(a, b); + else if constexpr (cat == category::uint16x4 ) return vclt_u16(a, b); + else if constexpr (cat == category::uint8x8 ) return vclt_u8(a, b); + else if constexpr (cat == category::float32x2) return vclt_f32(a, b); + else if constexpr (current_api >= asimd) + { + if constexpr (cat == category::float64x1) return vclt_f64(a, b); + else if constexpr (cat == category::int64x1) return vclt_s64(a, b); + else if constexpr (cat == category::uint64x1) return vclt_u64(a, b); + else if constexpr (cat == category::float64x2) return vcltq_f64(a, b); + else if constexpr (cat == category::int64x2) return vcltq_s64(a, b); + else if constexpr (cat == category::uint64x2) return vcltq_u64(a, b); + } + else return map(is_less, a, b); + } + } +} diff --git a/include/eve/module/core/regular/impl/simd/arm/sve/is_less.hpp b/include/eve/module/core/regular/impl/simd/arm/sve/is_less.hpp new file mode 100644 index 0000000000..3f8459bdd7 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/arm/sve/is_less.hpp @@ -0,0 +1,22 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include + +namespace eve::detail +{ + template + EVE_FORCEINLINE logical> is_less_(EVE_REQUIRES(sve_), O const& opts, wide a, wide b) noexcept + requires sve_abi> + { + if constexpr (O::contains(definitely)) return is_less.behavior(cpu_{}, opts, a, b); + else return svcmplt(sve_true(), a, b); + } +} diff --git a/include/eve/module/core/regular/impl/simd/ppc/is_less.hpp b/include/eve/module/core/regular/impl/simd/ppc/is_less.hpp new file mode 100644 index 0000000000..8bdcde40b3 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/ppc/is_less.hpp @@ -0,0 +1,24 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include +#include + +namespace eve::detail +{ + template + EVE_FORCEINLINE logical> is_less_(EVE_REQUIRES(vmx_), O const& opts, wide a, wide b) noexcept + requires ppc_abi> + { + if constexpr (O::contains(definitely)) return is_less.behavior(cpu_{}, opts, a, b); + else return logical>(vec_cmplt(a.storage(), b.storage())); + } +} diff --git a/include/eve/module/core/regular/impl/simd/riscv/is_less.hpp b/include/eve/module/core/regular/impl/simd/riscv/is_less.hpp new file mode 100644 index 0000000000..331ceb82d8 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/is_less.hpp @@ -0,0 +1,32 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include + +namespace eve::detail +{ + template + EVE_FORCEINLINE logical> is_less_(EVE_REQUIRES(rvv_), O const& opts, wide a, U b) noexcept + requires (rvv_abi> && same_element_type) + { + if constexpr (O::contains(definitely)) + { + return is_less.behavior(cpu_{}, opts, a, b); + } + else + { + constexpr auto c = categorize>(); + + if constexpr (match(c, category::int_)) return __riscv_vmslt(a, b, N::value); + else if constexpr (match(c, category::uint_)) return __riscv_vmsltu(a, b, N::value); + else if constexpr (match(c, category::float_)) return __riscv_vmflt(a, b, N::value); + } + } +} diff --git a/include/eve/module/core/regular/impl/simd/x86/is_less.hpp b/include/eve/module/core/regular/impl/simd/x86/is_less.hpp index aae47805ca..1465ccd66f 100644 --- a/include/eve/module/core/regular/impl/simd/x86/is_less.hpp +++ b/include/eve/module/core/regular/impl/simd/x86/is_less.hpp @@ -19,58 +19,151 @@ namespace eve::detail { -// ----------------------------------------------------------------------------------------------- -// masked implementation - template - EVE_FORCEINLINE as_logical_t> is_less_(EVE_REQUIRES(avx512_), - C const &mask, - O const &opts, - wide const &v, - wide const &w) noexcept - requires x86_abi> + template + EVE_FORCEINLINE logical> is_less_(EVE_REQUIRES(sse2_), O const& opts, wide a, wide b) noexcept + requires x86_abi> + { + if constexpr (O::contains(definitely)) + { + return is_less.behavior(cpu_{}, opts, a, b); + } + else + { + constexpr auto c = categorize>(); + constexpr auto f = to_integer(cmp_flt::lt_oq); + + if constexpr (current_api >= avx512) + { + if constexpr (c == category::float32x16) return mask16 {_mm512_cmp_ps_mask(a, b, f)}; + else if constexpr (c == category::float32x8) return mask8 {_mm256_cmp_ps_mask(a, b, f)}; + else if constexpr (c == category::float32x4) return mask8 {_mm_cmp_ps_mask(a, b, f)}; + else if constexpr (c == category::float64x8) return mask8 {_mm512_cmp_pd_mask(a, b, f)}; + else if constexpr (c == category::float64x4) return mask8 {_mm256_cmp_pd_mask(a, b, f)}; + else if constexpr (c == category::float64x2) return mask8 {_mm_cmp_pd_mask(a, b, f)}; + else if constexpr (c == category::uint64x8) return mask8 {_mm512_cmplt_epu64_mask(a, b)}; + else if constexpr (c == category::uint64x4) return mask8 {_mm256_cmplt_epu64_mask(a, b)}; + else if constexpr (c == category::uint64x2) return mask8 {_mm_cmplt_epu64_mask(a, b)}; + else if constexpr (c == category::uint32x16) return mask16 {_mm512_cmplt_epu32_mask(a, b)}; + else if constexpr (c == category::uint32x8) return mask8 {_mm256_cmplt_epu32_mask(a, b)}; + else if constexpr (c == category::uint32x4) return mask8 {_mm_cmplt_epu32_mask(a, b)}; + else if constexpr (c == category::uint16x32) return mask32 {_mm512_cmplt_epu16_mask(a, b)}; + else if constexpr (c == category::uint16x16) return mask16 {_mm256_cmplt_epu16_mask(a, b)}; + else if constexpr (c == category::uint16x8) return mask8 {_mm_cmplt_epu16_mask(a, b)}; + else if constexpr (c == category::uint8x64) return mask64 {_mm512_cmplt_epu8_mask(a, b)}; + else if constexpr (c == category::uint8x32) return mask32 {_mm256_cmplt_epu8_mask(a, b)}; + else if constexpr (c == category::uint8x16) return mask16 {_mm_cmplt_epu8_mask(a, b)}; + else if constexpr (c == category::int64x8) return mask8 {_mm512_cmplt_epi64_mask(a, b)}; + else if constexpr (c == category::int64x4) return mask8 {_mm256_cmplt_epi64_mask(a, b)}; + else if constexpr (c == category::int64x2) return mask8 {_mm_cmplt_epi64_mask(a, b)}; + else if constexpr (c == category::int32x16) return mask16 {_mm512_cmplt_epi32_mask(a, b)}; + else if constexpr (c == category::int32x8) return mask8 {_mm256_cmplt_epi32_mask(a, b)}; + else if constexpr (c == category::int32x4) return mask8 {_mm_cmplt_epi32_mask(a, b)}; + else if constexpr (c == category::int16x32) return mask32 {_mm512_cmplt_epi16_mask(a, b)}; + else if constexpr (c == category::int16x16) return mask16 {_mm256_cmplt_epi16_mask(a, b)}; + else if constexpr (c == category::int16x8) return mask8 {_mm_cmplt_epi16_mask(a, b)}; + else if constexpr (c == category::int8x64) return mask64 {_mm512_cmplt_epi8_mask(a, b)}; + else if constexpr (c == category::int8x32) return mask32 {_mm256_cmplt_epi8_mask(a, b)}; + else if constexpr (c == category::int8x16) return mask16 {_mm_cmplt_epi8_mask(a, b)}; + } + else + { + if constexpr (c == category::float32x8) return _mm256_cmp_ps(a, b, f); + else if constexpr (c == category::float64x4) return _mm256_cmp_pd(a, b, f); + else if constexpr (c == category::float32x4) return _mm_cmplt_ps(a, b); + else if constexpr (c == category::float64x2) return _mm_cmplt_pd(a, b); + else + { + constexpr auto use_avx2 = current_api >= avx2; + constexpr auto use_sse4_1 = current_api >= sse4_1; + constexpr auto lt = [](E ev, E fv) { return as_logical_t(ev < fv); }; + + [[maybe_unused]] auto unsigned_cmp = [](auto lhs, auto rhs) + { + using l_t = logical>; + auto const sm = signmask(as, signed>>{}); + return bit_cast((bit_cast(lhs, as(sm)) - sm) < (bit_cast(rhs, as(sm)) - sm), as{}); + }; + + if constexpr (use_avx2 && c == category::int64x4) return _mm256_cmpgt_epi64(b, a); + else if constexpr (use_avx2 && c == category::uint64x4) return unsigned_cmp(a, b); + else if constexpr (use_avx2 && c == category::int32x8) return _mm256_cmpgt_epi32(b, a); + else if constexpr (use_avx2 && c == category::uint32x8) return eve::min(a, b) != b; + else if constexpr (use_avx2 && c == category::int16x16) return _mm256_cmpgt_epi16(b, a); + else if constexpr (use_avx2 && c == category::uint16x16) return eve::min(a, b) != b; + else if constexpr (use_avx2 && c == category::int8x32) return _mm256_cmpgt_epi8(b, a); + else if constexpr (use_avx2 && c == category::uint8x32) return eve::min(a, b) != b; + else if constexpr (c == category::int32x4) return _mm_cmplt_epi32(a, b); + else if constexpr (c == category::int16x8) return _mm_cmplt_epi16(a, b); + else if constexpr (c == category::int8x16) return _mm_cmplt_epi8(a, b); + else if constexpr (c == category::uint32x4) + { + if constexpr (use_sse4_1) return eve::min(a, b) != b; + else return unsigned_cmp(a, b); + } + else if constexpr (c == category::uint16x8) + { + if constexpr (use_sse4_1) return eve::min(a, b) != b; + else return unsigned_cmp(a, b); + } + else if constexpr (c == category::uint8x16) + { + if constexpr (use_sse4_1) return eve::min(a, b) != b; + else return unsigned_cmp(a, b); + } + else return map(lt, a, b); + } + } + } + } + + // ----------------------------------------------------------------------------------------------- + // masked implementation + template + EVE_FORCEINLINE logical> is_less_(EVE_REQUIRES(avx512_), C const &mask, O const &opts, wide a, wide b) noexcept + requires x86_abi> { if constexpr( C::has_alternative || O::contains(definitely)) { - return is_less.behavior(cpu_{}, opts, v, w); + return is_less.behavior(cpu_{}, opts, a, b); } else { - auto const s = alternative(mask, v, as(to_logical(v))); - [[maybe_unused]] auto m = expand_mask(mask, as(v)).storage().value; + auto const s = alternative(mask, a, as(to_logical(a))); + [[maybe_unused]] auto m = expand_mask(mask, as(a)).storage().value; constexpr auto c = categorize>(); constexpr auto f = to_integer(cmp_flt::lt_oq); if constexpr( C::is_complete ) return s; - else if constexpr( c == category::float32x16 ) return mask16 {_mm512_mask_cmp_ps_mask(m, v, w, f)}; - else if constexpr( c == category::float64x8 ) return mask8 {_mm512_mask_cmp_pd_mask(m, v, w, f)}; - else if constexpr( c == category::float32x8 ) return mask8 {_mm256_mask_cmp_ps_mask(m, v, w, f)}; - else if constexpr( c == category::float64x4 ) return mask8 {_mm256_mask_cmp_pd_mask(m, v, w, f)}; - else if constexpr( c == category::float32x4 ) return mask8 {_mm_mask_cmp_ps_mask(m, v, w, f)}; - else if constexpr( c == category::float64x2 ) return mask8 {_mm_mask_cmp_pd_mask(m, v, w, f)}; - else if constexpr( c == category::int64x8 ) return mask8 {_mm512_mask_cmplt_epi64_mask(m, v, w)}; - else if constexpr( c == category::int64x4 ) return mask8 {_mm256_mask_cmplt_epi64_mask(m, v, w)}; - else if constexpr( c == category::int64x2 ) return mask8 {_mm_mask_cmplt_epi64_mask(m, v, w)}; - else if constexpr( c == category::int32x16 ) return mask16 {_mm512_mask_cmplt_epi32_mask(m, v, w)}; - else if constexpr( c == category::int32x8 ) return mask8 {_mm256_mask_cmplt_epi32_mask(m, v, w)}; - else if constexpr( c == category::int32x4 ) return mask8 {_mm_mask_cmplt_epi32_mask(m, v, w)}; - else if constexpr( c == category::int16x32 ) return mask32 {_mm512_mask_cmplt_epi16_mask(m, v, w)}; - else if constexpr( c == category::int16x16 ) return mask16 {_mm256_mask_cmplt_epi16_mask(m, v, w)}; - else if constexpr( c == category::int16x8 ) return mask8 {_mm_mask_cmplt_epi16_mask(m, v, w)}; - else if constexpr( c == category::int8x64 ) return mask64 {_mm512_mask_cmplt_epi8_mask(m, v, w)}; - else if constexpr( c == category::int8x32 ) return mask32 {_mm256_mask_cmplt_epi8_mask(m, v, w)}; - else if constexpr( c == category::int8x16 ) return mask16 {_mm_mask_cmplt_epi8_mask(m, v, w)}; - else if constexpr( c == category::uint64x8 ) return mask8 {_mm512_mask_cmplt_epu64_mask(m, v, w)}; - else if constexpr( c == category::uint64x4 ) return mask8 {_mm256_mask_cmplt_epu64_mask(m, v, w)}; - else if constexpr( c == category::uint64x2 ) return mask8 {_mm_mask_cmplt_epu64_mask(m, v, w)}; - else if constexpr( c == category::uint32x16 ) return mask16 {_mm512_mask_cmplt_epu32_mask(m, v, w)}; - else if constexpr( c == category::uint32x8 ) return mask8 {_mm256_mask_cmplt_epu32_mask(m, v, w)}; - else if constexpr( c == category::uint32x4 ) return mask8 {_mm_mask_cmplt_epu32_mask(m, v, w)}; - else if constexpr( c == category::uint16x32 ) return mask32 {_mm512_mask_cmplt_epu16_mask(m, v, w)}; - else if constexpr( c == category::uint16x16 ) return mask16 {_mm256_mask_cmplt_epu16_mask(m, v, w)}; - else if constexpr( c == category::uint16x8 ) return mask8 {_mm_mask_cmplt_epu16_mask(m, v, w)}; - else if constexpr( c == category::uint8x64 ) return mask64 {_mm512_mask_cmplt_epu8_mask(m, v, w)}; - else if constexpr( c == category::uint8x32 ) return mask32 {_mm256_mask_cmplt_epu8_mask(m, v, w)}; - else if constexpr( c == category::uint8x16 ) return mask16 {_mm_mask_cmplt_epu8_mask(m, v, w)}; + else if constexpr( c == category::float32x16 ) return mask16 {_mm512_mask_cmp_ps_mask(m, a, b, f)}; + else if constexpr( c == category::float64x8 ) return mask8 {_mm512_mask_cmp_pd_mask(m, a, b, f)}; + else if constexpr( c == category::float32x8 ) return mask8 {_mm256_mask_cmp_ps_mask(m, a, b, f)}; + else if constexpr( c == category::float64x4 ) return mask8 {_mm256_mask_cmp_pd_mask(m, a, b, f)}; + else if constexpr( c == category::float32x4 ) return mask8 {_mm_mask_cmp_ps_mask(m, a, b, f)}; + else if constexpr( c == category::float64x2 ) return mask8 {_mm_mask_cmp_pd_mask(m, a, b, f)}; + else if constexpr( c == category::int64x8 ) return mask8 {_mm512_mask_cmplt_epi64_mask(m, a, b)}; + else if constexpr( c == category::int64x4 ) return mask8 {_mm256_mask_cmplt_epi64_mask(m, a, b)}; + else if constexpr( c == category::int64x2 ) return mask8 {_mm_mask_cmplt_epi64_mask(m, a, b)}; + else if constexpr( c == category::int32x16 ) return mask16 {_mm512_mask_cmplt_epi32_mask(m, a, b)}; + else if constexpr( c == category::int32x8 ) return mask8 {_mm256_mask_cmplt_epi32_mask(m, a, b)}; + else if constexpr( c == category::int32x4 ) return mask8 {_mm_mask_cmplt_epi32_mask(m, a, b)}; + else if constexpr( c == category::int16x32 ) return mask32 {_mm512_mask_cmplt_epi16_mask(m, a, b)}; + else if constexpr( c == category::int16x16 ) return mask16 {_mm256_mask_cmplt_epi16_mask(m, a, b)}; + else if constexpr( c == category::int16x8 ) return mask8 {_mm_mask_cmplt_epi16_mask(m, a, b)}; + else if constexpr( c == category::int8x64 ) return mask64 {_mm512_mask_cmplt_epi8_mask(m, a, b)}; + else if constexpr( c == category::int8x32 ) return mask32 {_mm256_mask_cmplt_epi8_mask(m, a, b)}; + else if constexpr( c == category::int8x16 ) return mask16 {_mm_mask_cmplt_epi8_mask(m, a, b)}; + else if constexpr( c == category::uint64x8 ) return mask8 {_mm512_mask_cmplt_epu64_mask(m, a, b)}; + else if constexpr( c == category::uint64x4 ) return mask8 {_mm256_mask_cmplt_epu64_mask(m, a, b)}; + else if constexpr( c == category::uint64x2 ) return mask8 {_mm_mask_cmplt_epu64_mask(m, a, b)}; + else if constexpr( c == category::uint32x16 ) return mask16 {_mm512_mask_cmplt_epu32_mask(m, a, b)}; + else if constexpr( c == category::uint32x8 ) return mask8 {_mm256_mask_cmplt_epu32_mask(m, a, b)}; + else if constexpr( c == category::uint32x4 ) return mask8 {_mm_mask_cmplt_epu32_mask(m, a, b)}; + else if constexpr( c == category::uint16x32 ) return mask32 {_mm512_mask_cmplt_epu16_mask(m, a, b)}; + else if constexpr( c == category::uint16x16 ) return mask16 {_mm256_mask_cmplt_epu16_mask(m, a, b)}; + else if constexpr( c == category::uint16x8 ) return mask8 {_mm_mask_cmplt_epu16_mask(m, a, b)}; + else if constexpr( c == category::uint8x64 ) return mask64 {_mm512_mask_cmplt_epu8_mask(m, a, b)}; + else if constexpr( c == category::uint8x32 ) return mask32 {_mm256_mask_cmplt_epu8_mask(m, a, b)}; + else if constexpr( c == category::uint8x16 ) return mask16 {_mm_mask_cmplt_epu8_mask(m, a, b)}; } } } diff --git a/include/eve/module/core/regular/is_less.hpp b/include/eve/module/core/regular/is_less.hpp index df4aa081c7..63425af819 100644 --- a/include/eve/module/core/regular/is_less.hpp +++ b/include/eve/module/core/regular/is_less.hpp @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -22,12 +23,16 @@ namespace eve { template - struct is_less_t : strict_elementwise_callable + struct is_less_t : elementwise_callable { - template - requires(eve::same_lanes_or_scalar) - constexpr EVE_FORCEINLINE common_logical_t operator()(T a, U b) const + template + constexpr EVE_FORCEINLINE common_logical_t operator()(T a, U b) const + requires (compatible_arithmetic_values) { + if constexpr (Options::contains(definitely)) + { + static_assert(floating_value, "[eve::is_less] The definitely option is only supported for floating types."); + } // static_assert( valid_tolerance, Options>::value, "[eve::is_less] simd tolerance requires at least one simd parameter." ); return EVE_DISPATCH_CALL(a, b); } @@ -95,40 +100,26 @@ namespace eve // Required for if_else optimisation detections using callable_is_less_ = tag_t; - - namespace detail - { - template - EVE_FORCEINLINE constexpr common_logical_t - is_less_(EVE_REQUIRES(cpu_), O const&, logical a, logical b) noexcept - { - if constexpr( scalar_value && scalar_value) return common_logical_t(a < b); - else return a < b; - } - - template - EVE_FORCEINLINE constexpr common_logical_t - is_less_(EVE_REQUIRES(cpu_), O const & o, T const& aa, U const& bb) noexcept - { - if constexpr(O::contains(definitely)) - { - using w_t = common_value_t; - auto a = w_t(aa); - auto b = w_t(bb); - - auto tol = o[definitely].value(w_t{}); - if constexpr(integral_value) return a < eve::prev(b, tol); - else return a < fam(b, -tol, eve::max(eve::abs(a), eve::abs(b))); - } - else - { - if constexpr(scalar_value && scalar_value) return common_logical_t(aa < bb); - else return aa < bb; - } - } - } } +#include + #if defined(EVE_INCLUDE_X86_HEADER) # include #endif + +#if defined(EVE_INCLUDE_POWERPC_HEADER) +# include +#endif + +#if defined(EVE_INCLUDE_ARM_NEON_HEADER) +# include +#endif + +#if defined(EVE_INCLUDE_ARM_SVE_HEADER) +# include +#endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/traits/as_logical.hpp b/include/eve/traits/as_logical.hpp index e2811b1f20..b19ca794d2 100644 --- a/include/eve/traits/as_logical.hpp +++ b/include/eve/traits/as_logical.hpp @@ -13,6 +13,10 @@ namespace eve { + // forward declare common_logical + template + struct common_logical; + template struct as_logical { @@ -31,15 +35,14 @@ namespace eve using type = logical; }; - - template - struct as_logical : as_logical< kumi::element_t<0,T> > - {}; - template struct as_logical : as_logical< translate_t > {}; + template + struct as_logical: kumi::apply_traits + {}; + template using as_logical_t = typename as_logical::type; } diff --git a/test/unit/meta/traits/as_logical.cpp b/test/unit/meta/traits/as_logical.cpp index 681eaa84ec..e2e37757e5 100644 --- a/test/unit/meta/traits/as_logical.cpp +++ b/test/unit/meta/traits/as_logical.cpp @@ -18,7 +18,7 @@ TTS_CASE_TPL( "Check as_logical on scalar", ::tts::arithmetic_types ) TTS_TYPE_IS(as_logical_t , logical); TTS_TYPE_IS(as_logical_t> , logical); - TTS_TYPE_IS((as_logical_t>), logical); + TTS_TYPE_IS((as_logical_t>), logical); }; TTS_CASE_TPL("Check as_wide on wide", ::tts::arithmetic_types ) @@ -31,5 +31,5 @@ TTS_CASE_TPL("Check as_wide on wide", ::tts::arithmetic_types ) TTS_TYPE_IS(as_logical_t> , logical>); TTS_TYPE_IS(as_logical_t>> , logical>); - TTS_TYPE_IS((as_logical_t,fixed<4>>>) , (logical>>)); + TTS_TYPE_IS((as_logical_t, fixed<4>>>) , (logical>>)); }; diff --git a/test/unit/meta/traits/common_logical.cpp b/test/unit/meta/traits/common_logical.cpp index 5254e5016e..003919cef5 100644 --- a/test/unit/meta/traits/common_logical.cpp +++ b/test/unit/meta/traits/common_logical.cpp @@ -124,6 +124,6 @@ TTS_CASE("eve::common_logical on tuples") using t1 = kumi::tuple; using t2 = kumi::tuple; - TTS_TYPE_IS((eve::common_logical_t), eve::logical); - TTS_TYPE_IS((eve::common_logical_t, eve::wide>), eve::logical>); + TTS_TYPE_IS((eve::common_logical_t), eve::logical); + TTS_TYPE_IS((eve::common_logical_t>, eve::wide>>), (eve::logical>>)); }; diff --git a/test/unit/module/core/is_less.cpp b/test/unit/module/core/is_less.cpp index 5b617285ed..e8b8a5111a 100644 --- a/test/unit/module/core/is_less.cpp +++ b/test/unit/module/core/is_less.cpp @@ -9,28 +9,44 @@ #include +template +void test_with_types(F f) +{ + TTS_TYPE_IS(decltype(f(T{}, U{})), Expected); + TTS_TYPE_IS(decltype(f(U{}, T{})), Expected); +} + //================================================================================================== //== Types tests //================================================================================================== TTS_CASE_TPL("Check return types of eve::is_less(simd)", eve::test::simd::all_types) (tts::type) { - using eve::logical; using v_t = eve::element_type_t; - TTS_EXPR_IS(eve::is_less(T(), T()), logical); - TTS_EXPR_IS(eve::is_less(v_t(), v_t()), logical); - TTS_EXPR_IS(eve::is_less(T(), v_t()), logical); - TTS_EXPR_IS(eve::is_less(v_t(), T()), logical); - if constexpr( eve::floating_value ) + TTS_EXPR_IS(eve::is_less(T(), T()), eve::logical); + TTS_EXPR_IS(eve::is_less(v_t(), v_t()), eve::logical); + + test_with_types>(eve::is_less); + + if constexpr (eve::floating_value) { - TTS_EXPR_IS(eve::is_less[eve::definitely](T(), T()), logical); - TTS_EXPR_IS(eve::is_less[eve::definitely](v_t(), v_t()), logical); - TTS_EXPR_IS(eve::is_less[eve::definitely](T(), v_t()), logical); - TTS_EXPR_IS(eve::is_less[eve::definitely](v_t(), T()), logical); + TTS_EXPR_IS(eve::is_less[eve::definitely](T(), T()), eve::logical); + TTS_EXPR_IS(eve::is_less[eve::definitely](v_t(), v_t()), eve::logical); + + test_with_types>(eve::is_less[eve::definitely]); } }; +TTS_CASE_TPL("Check return types of eve::is_less(simd) with mixed types", eve::test::simd::all_types) +(tts::type) +{ + using D = eve::downgrade_t; + using vd_t = eve::element_type_t; + + test_with_types>(eve::is_less); +}; + //================================================================================================== //== Tests for eve::is_less //================================================================================================== @@ -54,7 +70,7 @@ TTS_CASE_WITH("Check behavior of eve::is_less(simd)", //================================================================================================== //== Tests for eve::is_less corner cases for floating //================================================================================================== -TTS_CASE_TPL("Check behavior of eve::is_less(simd)", eve::test::simd::ieee_reals) +TTS_CASE_TPL("Check behavior of eve::is_less(simd) corner cases", eve::test::simd::ieee_reals) (tts::type const&) { using eve::as;