From 8fc060df29a840ec1475d179cd023044e254eb9b Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 20 Oct 2024 10:40:52 +0200 Subject: [PATCH 01/83] portable wip --- portable/.gitignore | 5 + portable/Cargo.toml | 20 + portable/src/basic.rs | 213 +++++++ portable/src/compat.rs | 116 ++++ portable/src/implementation/algorithm.rs | 575 ++++++++++++++++++ portable/src/implementation/helpers.rs | 121 ++++ portable/src/implementation/mod.rs | 24 + portable/src/implementation/portable/mod.rs | 2 + .../src/implementation/portable/simd128.rs | 239 ++++++++ .../src/implementation/portable/simd256.rs | 234 +++++++ portable/src/lib.rs | 105 ++++ 11 files changed, 1654 insertions(+) create mode 100644 portable/.gitignore create mode 100644 portable/Cargo.toml create mode 100644 portable/src/basic.rs create mode 100644 portable/src/compat.rs create mode 100644 portable/src/implementation/algorithm.rs create mode 100644 portable/src/implementation/helpers.rs create mode 100644 portable/src/implementation/mod.rs create mode 100644 portable/src/implementation/portable/mod.rs create mode 100644 portable/src/implementation/portable/simd128.rs create mode 100644 portable/src/implementation/portable/simd256.rs create mode 100644 portable/src/lib.rs diff --git a/portable/.gitignore b/portable/.gitignore new file mode 100644 index 00000000..cbfad3bb --- /dev/null +++ b/portable/.gitignore @@ -0,0 +1,5 @@ +/target +/.vscode +/.idea +/.zed +/.cargo diff --git a/portable/Cargo.toml b/portable/Cargo.toml new file mode 100644 index 00000000..188d7a0f --- /dev/null +++ b/portable/Cargo.toml @@ -0,0 +1,20 @@ +cargo-features = ["edition2024"] + +[package] +name = "simdutf8-portable" +version = "0.1.0" +edition = "2024" + +[features] +default = ["std"] + +std = [] # fixme: needed? + +# expose SIMD implementations in basic::imp::* and compat::imp::* +public_imp = [] + +# use 256-bit vectors +v256 = [] + +[dependencies] +flexpect = "0.0.4" diff --git a/portable/src/basic.rs b/portable/src/basic.rs new file mode 100644 index 00000000..1187e625 --- /dev/null +++ b/portable/src/basic.rs @@ -0,0 +1,213 @@ +//! The `basic` API flavor provides barebones UTF-8 checking at the highest speed. +//! +//! It is fastest on valid UTF-8, but only checks for errors after processing the whole byte sequence +//! and does not provide detailed information if the data is not valid UTF-8. [`Utf8Error`] is a zero-sized error struct. +//! +//! If you need detailed error information use the functions from the [`crate::compat`] module instead. + +use core::str::{from_utf8_unchecked, from_utf8_unchecked_mut}; + +use crate::implementation::validate_utf8_basic; + +/// Simple zero-sized UTF-8 error. +/// +/// No information is provided where the error occurred or how long the invalid byte +/// byte sequence is. +#[derive(Copy, Eq, PartialEq, Clone, Debug)] +pub struct Utf8Error; + +impl core::fmt::Display for Utf8Error { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + f.write_str("invalid utf-8 sequence") + } +} + +#[cfg(feature = "std")] +impl std::error::Error for Utf8Error {} + +/// Analogue to [`std::str::from_utf8()`]. +/// +/// Checks if the passed byte sequence is valid UTF-8 and returns an +/// [`std::str`] reference to the passed byte slice wrapped in `Ok()` if it is. +/// +/// # Errors +/// Will return the zero-sized Err([`Utf8Error`]) on if the input contains invalid UTF-8. +#[inline] +pub fn from_utf8(input: &[u8]) -> Result<&str, Utf8Error> { + unsafe { + validate_utf8_basic(input)?; + Ok(from_utf8_unchecked(input)) + } +} + +/// Analogue to [`std::str::from_utf8_mut()`]. +/// +/// Checks if the passed mutable byte sequence is valid UTF-8 and returns a mutable +/// [`std::str`] reference to the passed byte slice wrapped in `Ok()` if it is. +/// +/// # Errors +/// Will return the zero-sized Err([`Utf8Error`]) on if the input contains invalid UTF-8. +#[inline] +pub fn from_utf8_mut(input: &mut [u8]) -> Result<&mut str, Utf8Error> { + unsafe { + validate_utf8_basic(input)?; + Ok(from_utf8_unchecked_mut(input)) + } +} + +/// Allows direct access to the platform-specific unsafe validation implementations. +#[cfg(feature = "public_imp")] +pub mod imp { + use crate::basic; + + /// A low-level interface for streaming validation of UTF-8 data. It is meant to be integrated + /// in high-performance data processing pipelines. + /// + /// Data can be streamed in arbitrarily-sized chunks using the [`Self::update()`] method. There is + /// no way to find out if the input so far was valid UTF-8 during the validation. Only when + /// the validation is completed with the [`Self::finalize()`] method the result of the validation is + /// returned. Use [`ChunkedUtf8Validator`] if possible for highest performance. + /// + /// This implementation requires CPU SIMD features specified by the module it resides in. + /// It is undefined behavior to use it if the required CPU features are not available which + /// is why all trait methods are `unsafe`. + /// + /// General usage: + /// ```rust + /// use simdutf8::basic::imp::Utf8Validator; + /// use std::io::{stdin, Read, Result}; + /// + /// # #[cfg(target_arch = "x86_64")] + /// fn main() -> Result<()> { + /// unsafe { + /// if !std::is_x86_feature_detected!("avx2") { + /// panic!("This example only works with CPUs supporting AVX 2"); + /// } + /// + /// let mut validator = simdutf8::basic::imp::x86::avx2::Utf8ValidatorImp::new(); + /// let mut buf = vec![0; 8192]; + /// loop { + /// let bytes_read = stdin().read(buf.as_mut())?; + /// if bytes_read == 0 { + /// break; + /// } + /// validator.update(&buf); + /// } + /// + /// if validator.finalize().is_ok() { + /// println!("Input is valid UTF-8"); + /// } else { + /// println!("Input is not valid UTF-8"); + /// } + /// } + /// + /// Ok(()) + /// } + /// + /// # #[cfg(not(target_arch = "x86_64"))] + /// # fn main() { } + /// ``` + /// + pub trait Utf8Validator { + /// Creates a new validator. + /// + /// # Safety + /// This implementation requires CPU SIMD features specified by the module it resides in. + /// It is undefined behavior to call it if the required CPU features are not available. + #[must_use] + unsafe fn new() -> Self + where + Self: Sized; + + /// Updates the validator with `input`. + /// + /// # Safety + /// This implementation requires CPU SIMD features specified by the module it resides in. + /// It is undefined behavior to call it if the required CPU features are not available. + unsafe fn update(&mut self, input: &[u8]); + + /// Finishes the validation and returns `Ok(())` if the input was valid UTF-8. + /// + /// # Errors + /// A [`basic::Utf8Error`] is returned if the input was not valid UTF-8. No + /// further information about the location of the error is provided. + /// + /// # Safety + /// This implementation requires CPU SIMD features specified by the module it resides in. + /// It is undefined behavior to call it if the required CPU features are not available. + unsafe fn finalize(self) -> core::result::Result<(), basic::Utf8Error>; + } + + /// Like [`Utf8Validator`] this low-level API is for streaming validation of UTF-8 data. + /// + /// It has additional restrictions imposed on how the input is passed in to allow + /// validation with as little overhead as possible. + /// + /// To feed it data you need to call the [`Self::update_from_chunks()`] method which takes slices which + /// have to be a multiple of 64 bytes long. The method will panic otherwise. There is + /// no way to find out if the input so far was valid UTF-8 during the validation. Only when + /// the validation is completed with the [`Self::finalize()`] method the result of the validation is + /// returned. + /// + /// The `Self::finalize()` method can be fed the rest of the data. There is no restriction on the + /// data passed to it. + /// + /// This implementation requires CPU SIMD features specified by the module it resides in. + /// It is undefined behavior to use it if the required CPU features are not available which + /// is why all trait methods are `unsafe`. + pub trait ChunkedUtf8Validator { + /// Creates a new validator. + /// + /// # Safety + /// This implementation requires CPU SIMD features specified by the module it resides in. + /// It is undefined behavior to call it if the required CPU features are not available. + #[must_use] + unsafe fn new() -> Self + where + Self: Sized; + + /// Updates the validator with `input`. + /// + /// # Panics + /// If `input.len()` is not a multiple of 64. + /// + /// # Safety + /// This implementation requires CPU SIMD features specified by the module it resides in. + /// It is undefined behavior to call it if the required CPU features are not available. + unsafe fn update_from_chunks(&mut self, input: &[u8]); + + /// Updates the validator with remaining input if any. There is no restriction on the + /// data provided. + /// + /// Finishes the validation and returns `Ok(())` if the input was valid UTF-8. + /// + /// # Errors + /// A [`basic::Utf8Error`] is returned if the input was not valid UTF-8. No + /// further information about the location of the error is provided. + /// + /// # Safety + /// This implementation requires CPU SIMD features specified by the module it resides in. + /// It is undefined behavior to call it if the required CPU features are not available. + unsafe fn finalize( + self, + remaining_input: core::option::Option<&[u8]>, + ) -> core::result::Result<(), basic::Utf8Error>; + } + + /// Includes the portable SIMD implementations. + pub mod portable { + /// Includes the validation implementation using 128-bit portable SIMD. + pub mod simd128 { + pub use crate::implementation::portable::simd128::ChunkedUtf8ValidatorImp; + pub use crate::implementation::portable::simd128::Utf8ValidatorImp; + pub use crate::implementation::portable::simd128::validate_utf8_basic as validate_utf8; + } + + /// Includes the validation implementation using 256-bit portable SIMD. + pub mod simd256 { + pub use crate::implementation::portable::simd256::ChunkedUtf8ValidatorImp; + pub use crate::implementation::portable::simd256::Utf8ValidatorImp; + pub use crate::implementation::portable::simd256::validate_utf8_basic as validate_utf8; + } + } +} diff --git a/portable/src/compat.rs b/portable/src/compat.rs new file mode 100644 index 00000000..ecea303c --- /dev/null +++ b/portable/src/compat.rs @@ -0,0 +1,116 @@ +//! The `compat` API flavor provides full compatibility with [`std::str::from_utf8()`] and detailed validation errors. +//! +//! In particular, [`from_utf8()`] +//! returns an [`Utf8Error`], which has the [`valid_up_to()`](Utf8Error#method.valid_up_to) and +//! [`error_len()`](Utf8Error#method.error_len) methods. The first is useful for verification of streamed data. The +//! second is useful e.g. for replacing invalid byte sequences with a replacement character. +//! +//! The functions in this module also fail early: errors are checked on-the-fly as the string is processed and once +//! an invalid UTF-8 sequence is encountered, it returns without processing the rest of the data. +//! This comes at a slight performance penalty compared to the [`crate::basic`] module if the input is valid UTF-8. + +use core::fmt::Display; +use core::fmt::Formatter; + +use core::str::{from_utf8_unchecked, from_utf8_unchecked_mut}; + +use crate::implementation::validate_utf8_compat; + +/// UTF-8 error information compatible with [`std::str::Utf8Error`]. +/// +/// Contains information on the location of the encountered validation error and the length of the +/// invalid UTF-8 sequence. +#[derive(Copy, Eq, PartialEq, Clone, Debug)] +pub struct Utf8Error { + pub(crate) valid_up_to: usize, + pub(crate) error_len: Option, +} + +impl Utf8Error { + /// Analogue to [`std::str::Utf8Error::valid_up_to()`](std::str::Utf8Error#method.valid_up_to). + /// + /// ... + #[inline] + #[must_use] + pub fn valid_up_to(&self) -> usize { + self.valid_up_to + } + + /// Analogue to [`std::str::Utf8Error::error_len()`](std::str::Utf8Error#method.error_len). + /// + /// ... + #[inline] + #[must_use] + pub fn error_len(&self) -> Option { + self.error_len.map(|len| len as usize) + } +} + +impl Display for Utf8Error { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + if let Some(error_len) = self.error_len { + write!( + f, + "invalid utf-8 sequence of {} bytes from index {}", + error_len, self.valid_up_to + ) + } else { + write!( + f, + "incomplete utf-8 byte sequence from index {}", + self.valid_up_to + ) + } + } +} + +#[cfg(feature = "std")] +impl std::error::Error for Utf8Error {} + +/// Analogue to [`std::str::from_utf8()`]. +/// +/// Checks if the passed byte sequence is valid UTF-8 and returns an +/// [`std::str`] reference to the passed byte slice wrapped in `Ok()` if it is. +/// +/// # Errors +/// Will return Err([`Utf8Error`]) on if the input contains invalid UTF-8 with +/// detailed error information. +#[inline] +pub fn from_utf8(input: &[u8]) -> Result<&str, Utf8Error> { + unsafe { + validate_utf8_compat(input)?; + Ok(from_utf8_unchecked(input)) + } +} + +/// Analogue to [`std::str::from_utf8_mut()`]. +/// +/// Checks if the passed mutable byte sequence is valid UTF-8 and returns a mutable +/// [`std::str`] reference to the passed byte slice wrapped in `Ok()` if it is. +/// +/// # Errors +/// Will return Err([`Utf8Error`]) on if the input contains invalid UTF-8 with +/// detailed error information. +#[inline] +pub fn from_utf8_mut(input: &mut [u8]) -> Result<&mut str, Utf8Error> { + unsafe { + validate_utf8_compat(input)?; + Ok(from_utf8_unchecked_mut(input)) + } +} + +/// Allows direct access to the platform-specific unsafe validation implementations. +#[cfg(feature = "public_imp")] +pub mod imp { + pub mod portable { + /// Includes the validation implementation for 128-bit portable SIMD. + pub mod simd128 { + pub use crate::implementation::portable::simd128::validate_utf8_compat as validate_utf8; + } + + /// Includes the validation implementation for 256-bit portable SIMD. + pub mod simd256 { + pub use crate::implementation::portable::simd256::validate_utf8_compat as validate_utf8; + } + } +} diff --git a/portable/src/implementation/algorithm.rs b/portable/src/implementation/algorithm.rs new file mode 100644 index 00000000..41ab183a --- /dev/null +++ b/portable/src/implementation/algorithm.rs @@ -0,0 +1,575 @@ +/// Macros requires newtypes in scope: +/// `SimdU8Value` - implementation of SIMD primitives +/// `SimdInput` - which holds 64 bytes of SIMD input +/// `TempSimdChunk` - correctly aligned `TempSimdChunk`, either `TempSimdChunkA16` or `TempSimdChunkA32` +macro_rules! algorithm_simd { + ($(#[$feat:meta])*) => { + use crate::{basic, compat}; + + impl Utf8CheckAlgorithm { + $(#[$feat])* + #[inline] + unsafe fn default() -> Self { + Self { + prev: SimdU8Value::splat0(), + incomplete: SimdU8Value::splat0(), + error: SimdU8Value::splat0(), + } + } + + $(#[$feat])* + #[inline] + unsafe fn check_incomplete_pending(&mut self) { + self.error = self.error.or(self.incomplete); + } + + $(#[$feat])* + #[inline] + unsafe fn is_incomplete(input: SimdU8Value) -> SimdU8Value { + input.saturating_sub(SimdU8Value::from_32_cut_off_leading( + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0b1111_0000 - 1, + 0b1110_0000 - 1, + 0b1100_0000 - 1, + )) + } + + $(#[$feat])* + #[inline] + unsafe fn check_special_cases(input: SimdU8Value, prev1: SimdU8Value) -> SimdU8Value { + const TOO_SHORT: u8 = 1 << 0; + const TOO_LONG: u8 = 1 << 1; + const OVERLONG_3: u8 = 1 << 2; + const SURROGATE: u8 = 1 << 4; + const OVERLONG_2: u8 = 1 << 5; + const TWO_CONTS: u8 = 1 << 7; + const TOO_LARGE: u8 = 1 << 3; + const TOO_LARGE_1000: u8 = 1 << 6; + const OVERLONG_4: u8 = 1 << 6; + const CARRY: u8 = TOO_SHORT | TOO_LONG | TWO_CONTS; + + let byte_1_high = prev1.shr4().lookup_16( + TOO_LONG, + TOO_LONG, + TOO_LONG, + TOO_LONG, + TOO_LONG, + TOO_LONG, + TOO_LONG, + TOO_LONG, + TWO_CONTS, + TWO_CONTS, + TWO_CONTS, + TWO_CONTS, + TOO_SHORT | OVERLONG_2, + TOO_SHORT, + TOO_SHORT | OVERLONG_3 | SURROGATE, + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4, + ); + + let byte_1_low = prev1.and(SimdU8Value::splat(0x0F)).lookup_16( + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + CARRY | OVERLONG_2, + CARRY, + CARRY, + CARRY | TOO_LARGE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + ); + + let byte_2_high = input.shr4().lookup_16( + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + ); + + byte_1_high.and(byte_1_low).and(byte_2_high) + } + + $(#[$feat])* + #[inline] + unsafe fn check_multibyte_lengths( + input: SimdU8Value, + prev: SimdU8Value, + special_cases: SimdU8Value, + ) -> SimdU8Value { + let prev2 = input.prev2(prev); + let prev3 = input.prev3(prev); + let must23 = Self::must_be_2_3_continuation(prev2, prev3); + let must23_80 = must23.and(SimdU8Value::splat(0x80)); + must23_80.xor(special_cases) + } + + $(#[$feat])* + #[inline] + unsafe fn has_error(&self) -> bool { + self.error.any_bit_set() + } + + $(#[$feat])* + #[inline] + unsafe fn check_bytes(&mut self, input: SimdU8Value) { + let prev1 = input.prev1(self.prev); + let sc = Self::check_special_cases(input, prev1); + self.error = self + .error + .or(Self::check_multibyte_lengths(input, self.prev, sc)); + self.prev = input; + } + + $(#[$feat])* + #[inline] + unsafe fn check_utf8(&mut self, input: SimdInput) { + if input.is_ascii() { + self.check_incomplete_pending(); + } else { + self.check_block(input); + } + } + + $(#[$feat])* + #[inline] + unsafe fn check_block(&mut self, input: SimdInput) { + // WORKAROUND + // necessary because the for loop is not unrolled on ARM64 + if input.vals.len() == 2 { + self.check_bytes(*input.vals.as_ptr()); + self.check_bytes(*input.vals.as_ptr().add(1)); + self.incomplete = Self::is_incomplete(*input.vals.as_ptr().add(1)); + } else if input.vals.len() == 4 { + self.check_bytes(*input.vals.as_ptr()); + self.check_bytes(*input.vals.as_ptr().add(1)); + self.check_bytes(*input.vals.as_ptr().add(2)); + self.check_bytes(*input.vals.as_ptr().add(3)); + self.incomplete = Self::is_incomplete(*input.vals.as_ptr().add(3)); + } else { + panic!("Unsupported number of chunks"); + } + } + } + + /// Validation implementation for CPUs supporting the SIMD extension (see module). + /// + /// # Errors + /// Returns the zero-sized [`basic::Utf8Error`] on failure. + /// + /// # Safety + /// This function is inherently unsafe because it is compiled with SIMD extensions + /// enabled. Make sure that the CPU supports it before calling. + /// + $(#[$feat])* + #[inline] + pub unsafe fn validate_utf8_basic( + input: &[u8], + ) -> core::result::Result<(), basic::Utf8Error> { + use crate::implementation::helpers::SIMD_CHUNK_SIZE; + let len = input.len(); + let mut algorithm = Utf8CheckAlgorithm::::default(); + let mut idx: usize = 0; + let iter_lim = len - (len % SIMD_CHUNK_SIZE); + + while idx < iter_lim { + let simd_input = SimdInput::new(input.as_ptr().add(idx as usize)); + idx += SIMD_CHUNK_SIZE; + if !simd_input.is_ascii() { + algorithm.check_block(simd_input); + break; + } + } + + while idx < iter_lim { + if PREFETCH { + simd_prefetch(input.as_ptr().add(idx + SIMD_CHUNK_SIZE * 2)); + } + let input = SimdInput::new(input.as_ptr().add(idx as usize)); + algorithm.check_utf8(input); + idx += SIMD_CHUNK_SIZE; + } + + if idx < len { + let mut tmpbuf = TempSimdChunk::new(); + crate::implementation::helpers::memcpy_unaligned_nonoverlapping_inline_opt_lt_64( + input.as_ptr().add(idx), + tmpbuf.0.as_mut_ptr(), + len - idx, + ); + let simd_input = SimdInput::new(tmpbuf.0.as_ptr()); + algorithm.check_utf8(simd_input); + } + algorithm.check_incomplete_pending(); + if algorithm.has_error() { + Err(basic::Utf8Error {}) + } else { + Ok(()) + } + } + + /// Validation implementation for CPUs supporting the SIMD extension (see module). + /// + /// # Errors + /// Returns [`compat::Utf8Error`] with detailed error information on failure. + /// + /// # Safety + /// This function is inherently unsafe because it is compiled with SIMD extensions + /// enabled. Make sure that the CPU supports it before calling. + /// + $(#[$feat])* + #[inline] + pub unsafe fn validate_utf8_compat( + input: &[u8], + ) -> core::result::Result<(), compat::Utf8Error> { + validate_utf8_compat_simd0(input) + .map_err(|idx| crate::implementation::helpers::get_compat_error(input, idx)) + } + + $(#[$feat])* + #[inline] + #[flexpect::e(clippy::redundant_else)] // more readable + unsafe fn validate_utf8_compat_simd0(input: &[u8]) -> core::result::Result<(), usize> { + use crate::implementation::helpers::SIMD_CHUNK_SIZE; + let len = input.len(); + let mut algorithm = Utf8CheckAlgorithm::::default(); + let mut idx: usize = 0; + let mut only_ascii = true; + let iter_lim = len - (len % SIMD_CHUNK_SIZE); + + 'outer: loop { + if only_ascii { + while idx < iter_lim { + let simd_input = SimdInput::new(input.as_ptr().add(idx as usize)); + if !simd_input.is_ascii() { + algorithm.check_block(simd_input); + if algorithm.has_error() { + return Err(idx); + } else { + only_ascii = false; + idx += SIMD_CHUNK_SIZE; + continue 'outer; + } + } + idx += SIMD_CHUNK_SIZE; + } + } else { + while idx < iter_lim { + if PREFETCH { + simd_prefetch(input.as_ptr().add(idx + SIMD_CHUNK_SIZE * 2)); + } + let simd_input = SimdInput::new(input.as_ptr().add(idx as usize)); + if simd_input.is_ascii() { + algorithm.check_incomplete_pending(); + if algorithm.has_error() { + return Err(idx); + } else { + // we are in pure ASCII territory again + only_ascii = true; + idx += SIMD_CHUNK_SIZE; + continue 'outer; + } + } else { + algorithm.check_block(simd_input); + if algorithm.has_error() { + return Err(idx); + } + } + idx += SIMD_CHUNK_SIZE; + } + } + break; + } + if idx < len { + let mut tmpbuf = TempSimdChunk::new(); + crate::implementation::helpers::memcpy_unaligned_nonoverlapping_inline_opt_lt_64( + input.as_ptr().add(idx), + tmpbuf.0.as_mut_ptr(), + len - idx, + ); + let simd_input = SimdInput::new(tmpbuf.0.as_ptr()); + + algorithm.check_utf8(simd_input); + } + algorithm.check_incomplete_pending(); + if algorithm.has_error() { + Err(idx) + } else { + Ok(()) + } + } + + /// Low-level implementation of the [`basic::imp::Utf8Validator`] trait. + /// + /// This is implementation requires CPU SIMD features specified by the module it resides in. + /// It is undefined behavior to call it if the required CPU features are not + /// available. + #[cfg(feature = "public_imp")] + pub struct Utf8ValidatorImp { + algorithm: Utf8CheckAlgorithm, + incomplete_data: [u8; 64], + incomplete_len: usize, + } + + #[cfg(feature = "public_imp")] + impl Utf8ValidatorImp { + $(#[$feat])* + #[inline] + unsafe fn update_from_incomplete_data(&mut self) { + let simd_input = SimdInput::new(self.incomplete_data.as_ptr()); + self.algorithm.check_utf8(simd_input); + self.incomplete_len = 0; + } + } + + #[cfg(feature = "public_imp")] + impl basic::imp::Utf8Validator for Utf8ValidatorImp { + $(#[$feat])* + #[inline] + #[must_use] + unsafe fn new() -> Self { + Self { + algorithm: Utf8CheckAlgorithm::::default(), + incomplete_data: [0; 64], + incomplete_len: 0, + } + } + + $(#[$feat])* + #[inline] + unsafe fn update(&mut self, mut input: &[u8]) { + use crate::implementation::helpers::SIMD_CHUNK_SIZE; + if input.is_empty() { + return; + } + if self.incomplete_len != 0 { + let to_copy = + core::cmp::min(SIMD_CHUNK_SIZE - self.incomplete_len, input.len()); + self.incomplete_data + .as_mut_ptr() + .add(self.incomplete_len) + .copy_from_nonoverlapping(input.as_ptr(), to_copy); + if self.incomplete_len + to_copy == SIMD_CHUNK_SIZE { + self.update_from_incomplete_data(); + input = &input[to_copy..]; + } else { + self.incomplete_len += to_copy; + return; + } + } + let len = input.len(); + let mut idx: usize = 0; + let iter_lim = len - (len % SIMD_CHUNK_SIZE); + while idx < iter_lim { + let input = SimdInput::new(input.as_ptr().add(idx as usize)); + self.algorithm.check_utf8(input); + idx += SIMD_CHUNK_SIZE; + } + if idx < len { + let to_copy = len - idx; + self.incomplete_data + .as_mut_ptr() + .copy_from_nonoverlapping(input.as_ptr().add(idx), to_copy); + self.incomplete_len = to_copy; + } + } + + $(#[$feat])* + #[inline] + unsafe fn finalize(mut self) -> core::result::Result<(), basic::Utf8Error> { + if self.incomplete_len != 0 { + for i in &mut self.incomplete_data[self.incomplete_len..] { + *i = 0; + } + self.update_from_incomplete_data(); + } + self.algorithm.check_incomplete_pending(); + if self.algorithm.has_error() { + Err(basic::Utf8Error {}) + } else { + Ok(()) + } + } + } + + /// Low-level implementation of the [`basic::imp::ChunkedUtf8Validator`] trait. + /// + /// This is implementation requires CPU SIMD features specified by the module it resides in. + /// It is undefined behavior to call it if the required CPU features are not + /// available. + #[cfg(feature = "public_imp")] + pub struct ChunkedUtf8ValidatorImp { + algorithm: Utf8CheckAlgorithm, + } + + #[cfg(feature = "public_imp")] + impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp { + $(#[$feat])* + #[inline] + #[must_use] + unsafe fn new() -> Self { + Self { + algorithm: Utf8CheckAlgorithm::::default(), + } + } + + $(#[$feat])* + #[inline] + unsafe fn update_from_chunks(&mut self, input: &[u8]) { + use crate::implementation::helpers::SIMD_CHUNK_SIZE; + + assert!( + input.len() % SIMD_CHUNK_SIZE == 0, + "Input size must be a multiple of 64." + ); + for chunk in input.chunks_exact(SIMD_CHUNK_SIZE) { + let input = SimdInput::new(chunk.as_ptr()); + self.algorithm.check_utf8(input); + } + } + + $(#[$feat])* + #[inline] + unsafe fn finalize( + mut self, + remaining_input: core::option::Option<&[u8]>, + ) -> core::result::Result<(), basic::Utf8Error> { + use crate::implementation::helpers::SIMD_CHUNK_SIZE; + + if let Some(mut remaining_input) = remaining_input { + if !remaining_input.is_empty() { + let len = remaining_input.len(); + let chunks_lim = len - (len % SIMD_CHUNK_SIZE); + if chunks_lim > 0 { + self.update_from_chunks(&remaining_input[..chunks_lim]); + } + let rem = len - chunks_lim; + if rem > 0 { + remaining_input = &remaining_input[chunks_lim..]; + let mut tmpbuf = TempSimdChunk::new(); + tmpbuf.0.as_mut_ptr().copy_from_nonoverlapping( + remaining_input.as_ptr(), + remaining_input.len(), + ); + let simd_input = SimdInput::new(tmpbuf.0.as_ptr()); + self.algorithm.check_utf8(simd_input); + } + } + } + self.algorithm.check_incomplete_pending(); + if self.algorithm.has_error() { + Err(basic::Utf8Error {}) + } else { + Ok(()) + } + } + } + }; +} + +macro_rules! simd_input_128_bit { + ($(#[$feat:meta])*) => { + #[repr(C)] + struct SimdInput { + vals: [SimdU8Value; 4], + } + + impl SimdInput { + $(#[$feat])* + #[inline] + unsafe fn new(ptr: *const u8) -> Self { + Self { + vals: [ + SimdU8Value::load_from(ptr), + SimdU8Value::load_from(ptr.add(16)), + SimdU8Value::load_from(ptr.add(32)), + SimdU8Value::load_from(ptr.add(48)), + ], + } + } + + $(#[$feat])* + #[inline] + unsafe fn is_ascii(&self) -> bool { + let r1 = self.vals[0].or(self.vals[1]); + let r2 = self.vals[2].or(self.vals[3]); + let r = r1.or(r2); + r.is_ascii() + } + } + }; +} + +macro_rules! simd_input_256_bit { + ($(#[$feat:meta])*) => { + #[repr(C)] + struct SimdInput { + vals: [SimdU8Value; 2], + } + + impl SimdInput { + $(#[$feat])* + #[inline] + unsafe fn new(ptr: *const u8) -> Self { + Self { + vals: [ + SimdU8Value::load_from(ptr), + SimdU8Value::load_from(ptr.add(32)), + ], + } + } + + $(#[$feat])* + #[inline] + unsafe fn is_ascii(&self) -> bool { + self.vals[0].or(self.vals[1]).is_ascii() + } + } + }; +} diff --git a/portable/src/implementation/helpers.rs b/portable/src/implementation/helpers.rs new file mode 100644 index 00000000..0f4ecc9b --- /dev/null +++ b/portable/src/implementation/helpers.rs @@ -0,0 +1,121 @@ +type Utf8ErrorCompat = crate::compat::Utf8Error; + +#[inline] +#[flexpect::e(clippy::cast_possible_truncation)] +pub(crate) fn validate_utf8_at_offset(input: &[u8], offset: usize) -> Result<(), Utf8ErrorCompat> { + match core::str::from_utf8(&input[offset..]) { + Ok(_) => Ok(()), + Err(err) => Err(Utf8ErrorCompat { + valid_up_to: err.valid_up_to() + offset, + error_len: err.error_len().map(|len| { + // never truncates since std::str::err::Utf8Error::error_len() never returns value larger than 4 + len as u8 + }), + }), + } +} + +#[cold] +#[flexpect::e(clippy::unwrap_used)] +#[allow(dead_code)] // only used if there is a SIMD implementation +pub(crate) fn get_compat_error(input: &[u8], failing_block_pos: usize) -> Utf8ErrorCompat { + let offset = if failing_block_pos == 0 { + // Error must be in this block since it is the first. + 0 + } else { + // The previous block is OK except for a possible continuation over the block boundary. + // We go backwards over the last three bytes of the previous block and find the + // last non-continuation byte as a starting point for an std validation. If the last + // three bytes are all continuation bytes then the previous block ends with a four byte + // UTF-8 codepoint, is thus complete and valid UTF-8. We start the check with the + // current block in that case. + (1..=3) + .find(|i| input[failing_block_pos - i] >> 6 != 0b10) + .map_or(failing_block_pos, |i| failing_block_pos - i) + }; + // UNWRAP: safe because the SIMD UTF-8 validation found an error + validate_utf8_at_offset(input, offset).unwrap_err() +} + +#[allow(dead_code)] // only used if there is a SIMD implementation +#[inline(always)] // needs to be forced because otherwise it is not inlined on armv7 neon +pub(crate) unsafe fn memcpy_unaligned_nonoverlapping_inline_opt_lt_64( + mut src: *const u8, + mut dest: *mut u8, + mut len: usize, +) { + // This gets properly auto-vectorized on AVX 2 and SSE 4.2. + // Needs to be forced because otherwise it is not inlined on armv7 neon. + #[inline(always)] + #[flexpect::e(clippy::inline_always)] + unsafe fn memcpy_u64(src: &mut *const u8, dest: &mut *mut u8) { + dest.cast::() + .write_unaligned(src.cast::().read_unaligned()); + *src = src.offset(8); + *dest = dest.offset(8); + } + if len >= 32 { + memcpy_u64(&mut src, &mut dest); + memcpy_u64(&mut src, &mut dest); + memcpy_u64(&mut src, &mut dest); + memcpy_u64(&mut src, &mut dest); + len -= 32; + } + if len >= 16 { + memcpy_u64(&mut src, &mut dest); + memcpy_u64(&mut src, &mut dest); + len -= 16; + } + if len >= 8 { + memcpy_u64(&mut src, &mut dest); + len -= 8; + } + while len > 0 { + *dest = *src; + src = src.offset(1); + dest = dest.offset(1); + len -= 1; + } +} + +pub(crate) const SIMD_CHUNK_SIZE: usize = 64; + +#[repr(C, align(32))] +#[allow(dead_code)] // only used if there is a SIMD implementation +pub(crate) struct Utf8CheckAlgorithm { + pub(crate) prev: T, + pub(crate) incomplete: T, + pub(crate) error: T, +} + +#[repr(C, align(16))] +#[allow(dead_code)] // only used if a 128-bit SIMD implementation is used +pub(crate) struct TempSimdChunkA16(pub(crate) [u8; SIMD_CHUNK_SIZE]); + +#[allow(dead_code)] // only used if there is a SIMD implementation +impl TempSimdChunkA16 { + #[flexpect::e(clippy::inline_always)] + #[inline(always)] // needs to be forced because otherwise it is not inlined on armv7 neo + pub(crate) const fn new() -> Self { + Self([0; SIMD_CHUNK_SIZE]) + } +} + +#[repr(C, align(32))] +#[allow(dead_code)] // only used if a 256-bit SIMD implementation is used +pub(crate) struct TempSimdChunkA32(pub(crate) [u8; SIMD_CHUNK_SIZE]); + +#[allow(dead_code)] // only used if there is a SIMD implementation +impl TempSimdChunkA32 { + #[flexpect::e(clippy::inline_always)] + #[inline(always)] // needs to be forced because otherwise it is not inlined on armv7 neo + pub(crate) const fn new() -> Self { + Self([0; SIMD_CHUNK_SIZE]) + } +} + +#[derive(Clone, Copy)] +#[allow(dead_code)] // only used if there is a SIMD implementation +pub(crate) struct SimdU8Value(pub(crate) T) +where + T: Copy; diff --git a/portable/src/implementation/mod.rs b/portable/src/implementation/mod.rs new file mode 100644 index 00000000..15f68519 --- /dev/null +++ b/portable/src/implementation/mod.rs @@ -0,0 +1,24 @@ +//! Contains UTF-8 validation implementations. + +#[macro_use] +#[allow(unused_macros)] // only used if there is a SIMD implementation +mod algorithm; + +pub(crate) mod helpers; + +// UTF-8 validation function types +pub(crate) mod portable; + +// fallback method implementations +#[inline] +pub(crate) fn validate_utf8_basic_fallback(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { + match core::str::from_utf8(input) { + Ok(_) => Ok(()), + Err(_) => Err(crate::basic::Utf8Error {}), + } +} + +#[inline] +pub(crate) fn validate_utf8_compat_fallback(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { + helpers::validate_utf8_at_offset(input, 0) +} diff --git a/portable/src/implementation/portable/mod.rs b/portable/src/implementation/portable/mod.rs new file mode 100644 index 00000000..47da8cdb --- /dev/null +++ b/portable/src/implementation/portable/mod.rs @@ -0,0 +1,2 @@ +pub(crate) mod simd128; +pub(crate) mod simd256; diff --git a/portable/src/implementation/portable/simd128.rs b/portable/src/implementation/portable/simd128.rs new file mode 100644 index 00000000..0683b296 --- /dev/null +++ b/portable/src/implementation/portable/simd128.rs @@ -0,0 +1,239 @@ +//! Contains the portable SIMD UTF-8 validation implementation. + +#![allow(clippy::too_many_arguments)] +#![allow(clippy::needless_pass_by_value, clippy::pedantic, clippy::all)] +use crate::implementation::helpers::Utf8CheckAlgorithm; +use core::simd::prelude::*; +use core::simd::{simd_swizzle, u8x16}; + +// Portable SIMD primitives +type SimdU8Value = crate::implementation::helpers::SimdU8Value; + +impl SimdU8Value { + #[inline] + fn from_32_cut_off_leading( + _v0: u8, + _v1: u8, + _v2: u8, + _v3: u8, + _v4: u8, + _v5: u8, + _v6: u8, + _v7: u8, + _v8: u8, + _v9: u8, + _v10: u8, + _v11: u8, + _v12: u8, + _v13: u8, + _v14: u8, + _v15: u8, + v16: u8, + v17: u8, + v18: u8, + v19: u8, + v20: u8, + v21: u8, + v22: u8, + v23: u8, + v24: u8, + v25: u8, + v26: u8, + v27: u8, + v28: u8, + v29: u8, + v30: u8, + v31: u8, + ) -> Self { + Self::from(u8x16::from_array([ + v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, + ])) + } + + #[inline] + fn repeat_16( + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self { + Self::from(u8x16::from_array([ + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, + ])) + } + + #[inline] + unsafe fn load_from(ptr: *const u8) -> Self { + Self::from(ptr.cast::().read_unaligned()) + } + + #[inline] + fn lookup_16( + self, + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self { + // We need to ensure that 'self' only contains the lower 4 bits, unlike the avx instruction + // this will otherwise lead to bad results + let idx: u8x16 = self.0; + let src: u8x16 = Self::repeat_16( + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, + ) + .0; + let res = src.swizzle_dyn(idx); + Self::from(res) + } + + #[inline] + fn splat(val: u8) -> Self { + #[allow(clippy::cast_possible_wrap)] + Self::from(u8x16::splat(val)) + } + + #[inline] + fn splat0() -> Self { + Self::from(u8x16::splat(0)) + } + + #[inline] + fn or(self, b: Self) -> Self { + Self::from(self.0 | b.0) + } + + #[inline] + fn and(self, b: Self) -> Self { + Self::from(self.0 & b.0) + } + + #[inline] + fn xor(self, b: Self) -> Self { + Self::from(self.0 ^ b.0) + } + + #[inline] + fn saturating_sub(self, b: Self) -> Self { + Self::from(self.0.saturating_sub(b.0)) + } + + // ugly but shr requires const generics + #[inline] + fn shr4(self) -> Self { + Self::from(self.0 >> 4) + } + + #[inline] + fn prev1(self, prev: Self) -> Self { + Self::from(simd_swizzle!( + self.0, + prev.0, + [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,] + )) + } + + // ugly but prev requires const generics + #[inline] + fn prev2(self, prev: Self) -> Self { + Self::from(simd_swizzle!( + self.0, + prev.0, + [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,] + )) + } + + // ugly but prev requires const generics + #[inline] + fn prev3(self, prev: Self) -> Self { + Self::from(simd_swizzle!( + self.0, + prev.0, + [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,] + )) + } + + #[inline] + fn unsigned_gt(self, other: Self) -> Self { + let gt = self.0.simd_gt(other.0).to_int(); + Self::from(gt.cast()) + } + + #[inline] + fn any_bit_set(self) -> bool { + if HAS_FAST_REDUCE_MAX { + self.0.reduce_max() != 0 + } else { + self.0 != u8x16::splat(0) + } + } + + #[inline] + fn is_ascii(self) -> bool { + if HAS_FAST_REDUCE_MAX { + self.0.reduce_max() < 0b1000_0000 + } else { + (self.0 & u8x16::splat(0b1000_0000)) == u8x16::splat(0) + } + } +} + +impl From for SimdU8Value { + #[inline] + fn from(val: u8x16) -> Self { + Self(val) + } +} + +impl Utf8CheckAlgorithm { + #[inline] + fn must_be_2_3_continuation(prev2: SimdU8Value, prev3: SimdU8Value) -> SimdU8Value { + let is_third_byte = prev2.unsigned_gt(SimdU8Value::splat(0b1110_0000 - 1)); + let is_fourth_byte = prev3.unsigned_gt(SimdU8Value::splat(0b1111_0000 - 1)); + + is_third_byte.or(is_fourth_byte) + } +} + +#[inline] +unsafe fn simd_prefetch(_ptr: *const u8) {} + +#[cfg(all( + any(target_arch = "aarch64", target_arch = "arm"), + target_feature = "neon" +))] +const HAS_FAST_REDUCE_MAX: bool = true; + +#[cfg(not(all( + any(target_arch = "aarch64", target_arch = "arm"), + target_feature = "neon" +)))] +const HAS_FAST_REDUCE_MAX: bool = false; + +const PREFETCH: bool = false; +use crate::implementation::helpers::TempSimdChunkA16 as TempSimdChunk; +simd_input_128_bit!(); +algorithm_simd!(); diff --git a/portable/src/implementation/portable/simd256.rs b/portable/src/implementation/portable/simd256.rs new file mode 100644 index 00000000..c99916ab --- /dev/null +++ b/portable/src/implementation/portable/simd256.rs @@ -0,0 +1,234 @@ +//! Contains the portable SIMD UTF-8 validation implementation. + +#![allow(clippy::too_many_arguments)] +#![allow(clippy::needless_pass_by_value, clippy::pedantic, clippy::all)] +use crate::implementation::helpers::Utf8CheckAlgorithm; +use core::simd::prelude::*; +use core::simd::{simd_swizzle, u8x32}; + +// Portable SIMD primitives +type SimdU8Value = crate::implementation::helpers::SimdU8Value; + +impl SimdU8Value { + #[inline] + fn from_32_cut_off_leading( + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + v16: u8, + v17: u8, + v18: u8, + v19: u8, + v20: u8, + v21: u8, + v22: u8, + v23: u8, + v24: u8, + v25: u8, + v26: u8, + v27: u8, + v28: u8, + v29: u8, + v30: u8, + v31: u8, + ) -> Self { + Self::from(u8x32::from_array([ + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, + v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, + ])) + } + + #[inline] + fn repeat_16( + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self { + Self::from_32_cut_off_leading( + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v0, v1, v2, v3, + v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, + ) + } + + #[inline] + unsafe fn load_from(ptr: *const u8) -> Self { + Self::from(ptr.cast::().read_unaligned()) + } + + #[inline] + fn lookup_16( + self, + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self { + // We need to ensure that 'self' only contains the lower 4 bits, unlike the avx instruction + // this will otherwise lead to bad results + let idx: u8x32 = self.0.cast(); + let src: u8x32 = Self::repeat_16( + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, + ) + .0 + .cast(); + let res = src.swizzle_dyn(idx); + Self::from(res.cast()) + } + + #[inline] + fn splat(val: u8) -> Self { + #[allow(clippy::cast_possible_wrap)] + Self::from(u8x32::splat(val)) + } + + #[inline] + fn splat0() -> Self { + Self::from(u8x32::splat(0)) + } + + #[inline] + fn or(self, b: Self) -> Self { + Self::from(self.0 | b.0) + } + + #[inline] + fn and(self, b: Self) -> Self { + Self::from(self.0 & b.0) + } + + #[inline] + fn xor(self, b: Self) -> Self { + Self::from(self.0 ^ b.0) + } + + #[inline] + fn saturating_sub(self, b: Self) -> Self { + Self::from(self.0.saturating_sub(b.0)) + } + + // ugly but shr requires const generics + #[inline] + fn shr4(self) -> Self { + Self::from(self.0 >> 4) + } + + #[inline] + fn prev1(self, prev: Self) -> Self { + Self::from(simd_swizzle!( + self.0, + prev.0, + [ + 63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30, + ] + )) + } + + // ugly but prev requires const generics + #[inline] + fn prev2(self, prev: Self) -> Self { + Self::from(simd_swizzle!( + self.0, + prev.0, + [ + 62, 63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, + ] + )) + } + + // ugly but prev requires const generics + #[inline] + fn prev3(self, prev: Self) -> Self { + Self::from(simd_swizzle!( + self.0, + prev.0, + [ + 61, 62, 63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, + ] + )) + } + + #[inline] + fn unsigned_gt(self, other: Self) -> Self { + let gt = self.0.simd_gt(other.0).to_int(); + Self::from(gt.cast()) + } + + #[inline] + fn any_bit_set(self) -> bool { + self.0 != u8x32::splat(0) + } + + #[inline] + fn is_ascii(self) -> bool { + let significan_bits = self.0 & u8x32::from_array([0b1000_0000; 32]); + significan_bits == u8x32::from_array([0; 32]) + } +} + +impl From for SimdU8Value { + #[inline] + fn from(val: u8x32) -> Self { + Self(val) + } +} + +impl Utf8CheckAlgorithm { + #[inline] + fn must_be_2_3_continuation(prev2: SimdU8Value, prev3: SimdU8Value) -> SimdU8Value { + let is_third_byte = prev2.saturating_sub(SimdU8Value::splat(0b1110_0000 - 1)); + let is_fourth_byte = prev3.saturating_sub(SimdU8Value::splat(0b1111_0000 - 1)); + + is_third_byte + .or(is_fourth_byte) + .unsigned_gt(SimdU8Value::splat0()) + } +} + +#[inline] +unsafe fn simd_prefetch(_ptr: *const u8) {} + +const PREFETCH: bool = false; +use crate::implementation::helpers::TempSimdChunkA32 as TempSimdChunk; +simd_input_256_bit!(); +algorithm_simd!(); diff --git a/portable/src/lib.rs b/portable/src/lib.rs new file mode 100644 index 00000000..541fe7f4 --- /dev/null +++ b/portable/src/lib.rs @@ -0,0 +1,105 @@ +#![warn(unused_extern_crates)] +#![warn( + clippy::all, + clippy::unwrap_used, + clippy::unnecessary_unwrap, + clippy::pedantic, + clippy::nursery +)] +#![allow(clippy::redundant_pub_crate)] // check is broken (see e.g. https://github.com/rust-lang/rust-clippy/issues/5369) +#![allow(clippy::missing_const_for_fn)] // not necessary most of the times +#![deny(missing_docs)] +#![cfg_attr(not(feature = "std"), no_std)] +#![cfg_attr(docsrs, feature(doc_cfg))] +#![feature(portable_simd)] + +//! Blazingly fast API-compatible UTF-8 validation for Rust using SIMD extensions, based on the implementation from +//! [simdjson](https://github.com/simdjson/simdjson). Originally ported to Rust by the developers of [simd-json.rs](https://simd-json.rs), but now heavily improved. +//! +//! ## Quick start +//! Add the dependency to your Cargo.toml file: +//! ```toml +//! [dependencies] +//! simdutf8 = "0.1.5" +//! ``` +//! +//! Use [`basic::from_utf8()`] as a drop-in replacement for `std::str::from_utf8()`. +//! +//! ```rust +//! use simdutf8::basic::from_utf8; +//! +//! println!("{}", from_utf8(b"I \xE2\x9D\xA4\xEF\xB8\x8F UTF-8!").unwrap()); +//! ``` +//! +//! If you need detailed information on validation failures, use [`compat::from_utf8()`] +//! instead. +//! +//! ```rust +//! use simdutf8::compat::from_utf8; +//! +//! let err = from_utf8(b"I \xE2\x9D\xA4\xEF\xB8 UTF-8!").unwrap_err(); +//! assert_eq!(err.valid_up_to(), 5); +//! assert_eq!(err.error_len(), Some(2)); +//! ``` +//! +//! ## APIs +//! +//! ### Basic flavor +//! Use the `basic` API flavor for maximum speed. It is fastest on valid UTF-8, but only checks +//! for errors after processing the whole byte sequence and does not provide detailed information if the data +//! is not valid UTF-8. [`basic::Utf8Error`] is a zero-sized error struct. +//! +//! ### Compat flavor +//! The `compat` flavor is fully API-compatible with `std::str::from_utf8()`. In particular, [`compat::from_utf8()`] +//! returns a [`compat::Utf8Error`], which has [`valid_up_to()`](compat::Utf8Error#method.valid_up_to) and +//! [`error_len()`](compat::Utf8Error#method.error_len) methods. The first is useful for verification of streamed data. The +//! second is useful e.g. for replacing invalid byte sequences with a replacement character. +//! +//! It also fails early: errors are checked on the fly as the string is processed and once +//! an invalid UTF-8 sequence is encountered, it returns without processing the rest of the data. +//! This comes at a slight performance penalty compared to the [`basic`] API even if the input is valid UTF-8. +//! +//! ## Implementation selection +//! +//! ### X86 +//! The fastest implementation is selected at runtime using the `std::is_x86_feature_detected!` macro, unless the CPU +//! targeted by the compiler supports the fastest available implementation. +//! So if you compile with `RUSTFLAGS="-C target-cpu=native"` on a recent x86-64 machine, the AVX 2 implementation is selected at +//! compile-time and runtime selection is disabled. +//! +//! For no-std support (compiled with `--no-default-features`) the implementation is always selected at compile time based on +//! the targeted CPU. Use `RUSTFLAGS="-C target-feature=+avx2"` for the AVX 2 implementation or `RUSTFLAGS="-C target-feature=+sse4.2"` +//! for the SSE 4.2 implementation. +//! +//! ### ARM64 +//! The SIMD implementation is used automatically since Rust 1.61. +//! +//! ### WASM32 +//! For wasm32 support, the implementation is selected at compile time based on the presence of the `simd128` target feature. +//! Use `RUSTFLAGS="-C target-feature=+simd128"` to enable the WASM SIMD implementation. WASM, at +//! the time of this writing, doesn't have a way to detect SIMD through WASM itself. Although this capability +//! is available in various WASM host environments (e.g., [wasm-feature-detect] in the web browser), there is no portable +//! way from within the library to detect this. +//! +//! [wasm-feature-detect]: https://github.com/GoogleChromeLabs/wasm-feature-detect +//! +//! ### Access to low-level functionality +//! If you want to be able to call a SIMD implementation directly, use the `public_imp` feature flag. The validation +//! implementations are then accessible via [`basic::imp`] and [`compat::imp`]. Traits facilitating streaming validation are available +//! there as well. +//! +//! ## Optimisation flags +//! Do not use [`opt-level = "z"`](https://doc.rust-lang.org/cargo/reference/profiles.html), which prevents inlining and makes +//! the code quite slow. +//! +//! ## Minimum Supported Rust Version (MSRV) +//! This crate's minimum supported Rust version is 1.38.0. +//! +//! ## Algorithm +//! +//! See Validating UTF-8 In Less Than One Instruction Per Byte, Software: Practice and Experience 51 (5), 2021 +//! + +pub mod basic; +pub mod compat; +mod implementation; From 22903f8d94a916ba44a841df70e5e17a19472a68 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 20 Oct 2024 11:20:19 +0200 Subject: [PATCH 02/83] wip --- portable/Cargo.toml | 2 +- portable/src/implementation/mod.rs | 3 +++ portable/src/implementation/portable/mod.rs | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/portable/Cargo.toml b/portable/Cargo.toml index 188d7a0f..597f355a 100644 --- a/portable/Cargo.toml +++ b/portable/Cargo.toml @@ -14,7 +14,7 @@ std = [] # fixme: needed? public_imp = [] # use 256-bit vectors -v256 = [] +simd256 = [] [dependencies] flexpect = "0.0.4" diff --git a/portable/src/implementation/mod.rs b/portable/src/implementation/mod.rs index 15f68519..4733dbf0 100644 --- a/portable/src/implementation/mod.rs +++ b/portable/src/implementation/mod.rs @@ -9,6 +9,9 @@ pub(crate) mod helpers; // UTF-8 validation function types pub(crate) mod portable; +pub(super) use portable::simd128::validate_utf8_basic; +pub(super) use portable::simd128::validate_utf8_compat; + // fallback method implementations #[inline] pub(crate) fn validate_utf8_basic_fallback(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { diff --git a/portable/src/implementation/portable/mod.rs b/portable/src/implementation/portable/mod.rs index 47da8cdb..8a1d49ec 100644 --- a/portable/src/implementation/portable/mod.rs +++ b/portable/src/implementation/portable/mod.rs @@ -1,2 +1,4 @@ +#[cfg(any(not(feature = "simd256"), feature = "public_imp"))] pub(crate) mod simd128; +#[cfg(any(feature = "simd256", feature = "public_imp"))] pub(crate) mod simd256; From 633cd467e13573e4d37da9061dd7eb410bcfd339 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 20 Oct 2024 12:13:10 +0200 Subject: [PATCH 03/83] disable edition 2024 for now --- portable/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/portable/Cargo.toml b/portable/Cargo.toml index 597f355a..8199591a 100644 --- a/portable/Cargo.toml +++ b/portable/Cargo.toml @@ -1,9 +1,9 @@ -cargo-features = ["edition2024"] +#cargo-features = ["edition2024"] # TODO [package] name = "simdutf8-portable" version = "0.1.0" -edition = "2024" +edition = "2021" [features] default = ["std"] From a4c2eb6164f32d23ca2978bdb1e51a9830f85225 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 20 Oct 2024 15:45:10 +0200 Subject: [PATCH 04/83] wip --- portable/src/implementation/mod.rs | 34 ++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/portable/src/implementation/mod.rs b/portable/src/implementation/mod.rs index 4733dbf0..89bd27a7 100644 --- a/portable/src/implementation/mod.rs +++ b/portable/src/implementation/mod.rs @@ -9,8 +9,38 @@ pub(crate) mod helpers; // UTF-8 validation function types pub(crate) mod portable; -pub(super) use portable::simd128::validate_utf8_basic; -pub(super) use portable::simd128::validate_utf8_compat; +#[inline] +pub(crate) unsafe fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { + if input.len() < helpers::SIMD_CHUNK_SIZE { + return validate_utf8_basic_fallback(input); + } + + validate_utf8_basic_simd(input) +} + +#[inline(never)] +unsafe fn validate_utf8_basic_simd(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { + #[cfg(not(feature = "simd256"))] + return portable::simd128::validate_utf8_basic(input); + #[cfg(feature = "simd256")] + return portable::simd256::validate_utf8_basic(input); +} + +#[inline] +pub(crate) unsafe fn validate_utf8_compat(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { + if input.len() < helpers::SIMD_CHUNK_SIZE { + return validate_utf8_compat_fallback(input); + } + + validate_utf8_compat_simd(input) +} + +unsafe fn validate_utf8_compat_simd(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { + #[cfg(not(feature = "simd256"))] + return portable::simd128::validate_utf8_compat(input); + #[cfg(feature = "simd256")] + return portable::simd256::validate_utf8_compat(input); +} // fallback method implementations #[inline] From 9a239d4d4b67a4e24e585dc0d8cc80a5c2d03cf8 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 20 Oct 2024 15:46:14 +0200 Subject: [PATCH 05/83] rm flexpect --- portable/Cargo.toml | 3 --- portable/src/implementation/algorithm.rs | 2 +- portable/src/implementation/helpers.rs | 10 +++++----- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/portable/Cargo.toml b/portable/Cargo.toml index 8199591a..1b644084 100644 --- a/portable/Cargo.toml +++ b/portable/Cargo.toml @@ -15,6 +15,3 @@ public_imp = [] # use 256-bit vectors simd256 = [] - -[dependencies] -flexpect = "0.0.4" diff --git a/portable/src/implementation/algorithm.rs b/portable/src/implementation/algorithm.rs index 41ab183a..66f8eb5f 100644 --- a/portable/src/implementation/algorithm.rs +++ b/portable/src/implementation/algorithm.rs @@ -274,7 +274,7 @@ macro_rules! algorithm_simd { $(#[$feat])* #[inline] - #[flexpect::e(clippy::redundant_else)] // more readable + #[expect(clippy::redundant_else)] // more readable unsafe fn validate_utf8_compat_simd0(input: &[u8]) -> core::result::Result<(), usize> { use crate::implementation::helpers::SIMD_CHUNK_SIZE; let len = input.len(); diff --git a/portable/src/implementation/helpers.rs b/portable/src/implementation/helpers.rs index 0f4ecc9b..8d50e08d 100644 --- a/portable/src/implementation/helpers.rs +++ b/portable/src/implementation/helpers.rs @@ -1,7 +1,7 @@ type Utf8ErrorCompat = crate::compat::Utf8Error; #[inline] -#[flexpect::e(clippy::cast_possible_truncation)] +#[expect(clippy::cast_possible_truncation)] pub(crate) fn validate_utf8_at_offset(input: &[u8], offset: usize) -> Result<(), Utf8ErrorCompat> { match core::str::from_utf8(&input[offset..]) { Ok(_) => Ok(()), @@ -16,7 +16,7 @@ pub(crate) fn validate_utf8_at_offset(input: &[u8], offset: usize) -> Result<(), } #[cold] -#[flexpect::e(clippy::unwrap_used)] +#[expect(clippy::unwrap_used)] #[allow(dead_code)] // only used if there is a SIMD implementation pub(crate) fn get_compat_error(input: &[u8], failing_block_pos: usize) -> Utf8ErrorCompat { let offset = if failing_block_pos == 0 { @@ -47,7 +47,7 @@ pub(crate) unsafe fn memcpy_unaligned_nonoverlapping_inline_opt_lt_64( // This gets properly auto-vectorized on AVX 2 and SSE 4.2. // Needs to be forced because otherwise it is not inlined on armv7 neon. #[inline(always)] - #[flexpect::e(clippy::inline_always)] + #[expect(clippy::inline_always)] unsafe fn memcpy_u64(src: &mut *const u8, dest: &mut *mut u8) { dest.cast::() .write_unaligned(src.cast::().read_unaligned()); @@ -94,7 +94,7 @@ pub(crate) struct TempSimdChunkA16(pub(crate) [u8; SIMD_CHUNK_SIZE]); #[allow(dead_code)] // only used if there is a SIMD implementation impl TempSimdChunkA16 { - #[flexpect::e(clippy::inline_always)] + #[expect(clippy::inline_always)] #[inline(always)] // needs to be forced because otherwise it is not inlined on armv7 neo pub(crate) const fn new() -> Self { Self([0; SIMD_CHUNK_SIZE]) @@ -107,7 +107,7 @@ pub(crate) struct TempSimdChunkA32(pub(crate) [u8; SIMD_CHUNK_SIZE]); #[allow(dead_code)] // only used if there is a SIMD implementation impl TempSimdChunkA32 { - #[flexpect::e(clippy::inline_always)] + #[expect(clippy::inline_always)] #[inline(always)] // needs to be forced because otherwise it is not inlined on armv7 neo pub(crate) const fn new() -> Self { Self([0; SIMD_CHUNK_SIZE]) From b7b762a518216be2e574d9542e13dd5d2a4b3158 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 20 Oct 2024 15:53:38 +0200 Subject: [PATCH 06/83] wip --- portable/src/implementation/algorithm.rs | 6 ------ portable/src/implementation/portable/simd128.rs | 4 ---- portable/src/implementation/portable/simd256.rs | 4 ---- portable/src/lib.rs | 4 ++-- 4 files changed, 2 insertions(+), 16 deletions(-) diff --git a/portable/src/implementation/algorithm.rs b/portable/src/implementation/algorithm.rs index 66f8eb5f..3804e6d1 100644 --- a/portable/src/implementation/algorithm.rs +++ b/portable/src/implementation/algorithm.rs @@ -228,9 +228,6 @@ macro_rules! algorithm_simd { } while idx < iter_lim { - if PREFETCH { - simd_prefetch(input.as_ptr().add(idx + SIMD_CHUNK_SIZE * 2)); - } let input = SimdInput::new(input.as_ptr().add(idx as usize)); algorithm.check_utf8(input); idx += SIMD_CHUNK_SIZE; @@ -301,9 +298,6 @@ macro_rules! algorithm_simd { } } else { while idx < iter_lim { - if PREFETCH { - simd_prefetch(input.as_ptr().add(idx + SIMD_CHUNK_SIZE * 2)); - } let simd_input = SimdInput::new(input.as_ptr().add(idx as usize)); if simd_input.is_ascii() { algorithm.check_incomplete_pending(); diff --git a/portable/src/implementation/portable/simd128.rs b/portable/src/implementation/portable/simd128.rs index 0683b296..2f0da488 100644 --- a/portable/src/implementation/portable/simd128.rs +++ b/portable/src/implementation/portable/simd128.rs @@ -218,9 +218,6 @@ impl Utf8CheckAlgorithm { } } -#[inline] -unsafe fn simd_prefetch(_ptr: *const u8) {} - #[cfg(all( any(target_arch = "aarch64", target_arch = "arm"), target_feature = "neon" @@ -233,7 +230,6 @@ const HAS_FAST_REDUCE_MAX: bool = true; )))] const HAS_FAST_REDUCE_MAX: bool = false; -const PREFETCH: bool = false; use crate::implementation::helpers::TempSimdChunkA16 as TempSimdChunk; simd_input_128_bit!(); algorithm_simd!(); diff --git a/portable/src/implementation/portable/simd256.rs b/portable/src/implementation/portable/simd256.rs index c99916ab..3dc7c0f0 100644 --- a/portable/src/implementation/portable/simd256.rs +++ b/portable/src/implementation/portable/simd256.rs @@ -225,10 +225,6 @@ impl Utf8CheckAlgorithm { } } -#[inline] -unsafe fn simd_prefetch(_ptr: *const u8) {} - -const PREFETCH: bool = false; use crate::implementation::helpers::TempSimdChunkA32 as TempSimdChunk; simd_input_256_bit!(); algorithm_simd!(); diff --git a/portable/src/lib.rs b/portable/src/lib.rs index 541fe7f4..997615f2 100644 --- a/portable/src/lib.rs +++ b/portable/src/lib.rs @@ -6,8 +6,8 @@ clippy::pedantic, clippy::nursery )] -#![allow(clippy::redundant_pub_crate)] // check is broken (see e.g. https://github.com/rust-lang/rust-clippy/issues/5369) -#![allow(clippy::missing_const_for_fn)] // not necessary most of the times +#![expect(clippy::redundant_pub_crate)] // check is broken (see e.g. https://github.com/rust-lang/rust-clippy/issues/5369) +#![expect(clippy::missing_const_for_fn)] // not necessary most of the times #![deny(missing_docs)] #![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(docsrs, feature(doc_cfg))] From 9e3f8eb291d4d15338fd1c8348de18d1a075e0bd Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Tue, 22 Oct 2024 05:55:21 +0200 Subject: [PATCH 07/83] benchmark for portable simdutf8 --- bench/Cargo.toml | 23 ++++++++++++++++++--- bench/benches/throughput_basic_portable.rs | 3 +++ bench/benches/throughput_compat_portable.rs | 3 +++ bench/src/lib.rs | 22 ++++++++++++++++++++ 4 files changed, 48 insertions(+), 3 deletions(-) create mode 100644 bench/benches/throughput_basic_portable.rs create mode 100644 bench/benches/throughput_compat_portable.rs diff --git a/bench/Cargo.toml b/bench/Cargo.toml index 108a817b..20dc7437 100644 --- a/bench/Cargo.toml +++ b/bench/Cargo.toml @@ -10,18 +10,27 @@ repository = "https://github.com/rusticstuff/simdutf8" license = "MIT OR Apache-2.0" [features] -simdjson = [ "simdjson-utf8" ] +simdjson = ["simdjson-utf8"] # Enable wasm benchmarking simdutf8_wasmer = ["wasmer"] -simdutf8_wasmer_cranelift = ["simdutf8_wasmer", "wasmer/default-cranelift", "wasmer/default-universal"] -simdutf8_wasmer_llvm = ["simdutf8_wasmer", "wasmer/default-llvm", "wasmer/default-universal"] +simdutf8_wasmer_cranelift = [ + "simdutf8_wasmer", + "wasmer/default-cranelift", + "wasmer/default-universal", +] +simdutf8_wasmer_llvm = [ + "simdutf8_wasmer", + "wasmer/default-llvm", + "wasmer/default-universal", +] simdutf8_wasmtime = ["wasmtime"] [dependencies] core_affinity = "0.5" criterion = "0.3" simdutf8 = { version = "*", path = "..", features = ["aarch64_neon"] } +simdutf8-portable = { version = "*", path = "../portable" } simdjson-utf8 = { version = "*", path = "simdjson-utf8", optional = true } # default is cranelift which is not as performant as the llvm backend wasmer = { version = "2.1", optional = true, default-features = false } @@ -39,6 +48,14 @@ harness = false name = "throughput_compat" harness = false +[[bench]] +name = "throughput_basic_portable" +harness = false + +[[bench]] +name = "throughput_compat_portable" +harness = false + [[bench]] name = "throughput_std" harness = false diff --git a/bench/benches/throughput_basic_portable.rs b/bench/benches/throughput_basic_portable.rs new file mode 100644 index 00000000..e21f0965 --- /dev/null +++ b/bench/benches/throughput_basic_portable.rs @@ -0,0 +1,3 @@ +use simdutf8_bench::define_throughput_benchmark; + +define_throughput_benchmark!(BenchFn::Basic); diff --git a/bench/benches/throughput_compat_portable.rs b/bench/benches/throughput_compat_portable.rs new file mode 100644 index 00000000..1a7cf383 --- /dev/null +++ b/bench/benches/throughput_compat_portable.rs @@ -0,0 +1,3 @@ +use simdutf8_bench::define_throughput_benchmark; + +define_throughput_benchmark!(BenchFn::Compat); diff --git a/bench/src/lib.rs b/bench/src/lib.rs index cf654211..3ee33aee 100644 --- a/bench/src/lib.rs +++ b/bench/src/lib.rs @@ -1,6 +1,8 @@ use criterion::{measurement::Measurement, BenchmarkGroup, BenchmarkId, Criterion, Throughput}; use simdutf8::basic::from_utf8 as basic_from_utf8; use simdutf8::compat::from_utf8 as compat_from_utf8; +use simdutf8_portable::basic::from_utf8 as basic_from_utf8_portable; +use simdutf8_portable::compat::from_utf8 as compat_from_utf8_portable; use std::str::from_utf8 as std_from_utf8; @@ -29,6 +31,8 @@ pub enum BenchFn { Basic, BasicNoInline, Compat, + BasicPortable, + CompatPortable, Std, #[cfg(feature = "simdjson")] @@ -192,6 +196,24 @@ fn bench_input( }, ); } + BenchFn::BasicPortable => { + group.bench_with_input( + BenchmarkId::from_parameter(format!("{:06}", input.len())), + &input, + |b, &slice| { + b.iter(|| assert_eq!(basic_from_utf8_portable(slice).is_ok(), expected_ok)); + }, + ); + } + BenchFn::CompatPortable => { + group.bench_with_input( + BenchmarkId::from_parameter(format!("{:06}", input.len())), + &input, + |b, &slice| { + b.iter(|| assert_eq!(compat_from_utf8_portable(slice).is_ok(), expected_ok)); + }, + ); + } BenchFn::Std => { group.bench_with_input( BenchmarkId::from_parameter(format!("{:06}", input.len())), From 4839d1fb32008bbb57308d13fc25d4a21c0f7dee Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Tue, 22 Oct 2024 05:55:38 +0200 Subject: [PATCH 08/83] assembly baseline --- portable/baseline/basic.s | 395 +++++++++++++++++++++++++++++++++ portable/baseline/compat.s | 432 +++++++++++++++++++++++++++++++++++++ 2 files changed, 827 insertions(+) create mode 100644 portable/baseline/basic.s create mode 100644 portable/baseline/compat.s diff --git a/portable/baseline/basic.s b/portable/baseline/basic.s new file mode 100644 index 00000000..77aaec02 --- /dev/null +++ b/portable/baseline/basic.s @@ -0,0 +1,395 @@ +.section __TEXT,__text,regular,pure_instructions + .globl simdutf8_portable::implementation::validate_utf8_basic_simd + .p2align 2 +simdutf8_portable::implementation::validate_utf8_basic_simd: +Lfunc_begin4: + .cfi_startproc + sub sp, sp, #128 + .cfi_def_cfa_offset 128 + stp x29, x30, [sp, #112] + add x29, sp, #112 + .cfi_def_cfa w29, 16 + .cfi_offset w30, -8 + .cfi_offset w29, -16 + .cfi_remember_state + mov x10, #0 + and x8, x1, #0xffffffffffffffc0 + mov x9, x8 + cmp x10, x8 + b.hs LBB4_8 + add x11, x0, x10 + ldp q18, q17, [x11] + ldp q16, q7, [x11, #32] + add x10, x10, #64 + orr.16b v0, v17, v18 + orr.16b v1, v16, v7 + orr.16b v0, v0, v1 + umaxv.16b b0, v0 + fmov w11, s0 + tbz w11, #7, LBB4_1 + movi.2d v4, #0000000000000000 + ext.16b v2, v4, v18, #15 + ushr.16b v1, v2, #4 +Lloh24: + adrp x9, lCPI4_0@PAGE +Lloh25: + ldr q0, [x9, lCPI4_0@PAGEOFF] + tbl.16b v5, { v0 }, v1 + movi.16b v1, #15 + and.16b v3, v2, v1 +Lloh26: + adrp x9, lCPI4_1@PAGE +Lloh27: + ldr q2, [x9, lCPI4_1@PAGEOFF] + tbl.16b v6, { v2 }, v3 + ushr.16b v19, v18, #4 +Lloh28: + adrp x9, lCPI4_2@PAGE +Lloh29: + ldr q3, [x9, lCPI4_2@PAGEOFF] + tbl.16b v19, { v3 }, v19 + and.16b v5, v6, v5 + and.16b v19, v5, v19 + ext.16b v5, v4, v18, #14 + ext.16b v6, v4, v18, #13 + movi.16b v4, #223 + cmhi.16b v20, v5, v4 + movi.16b v5, #239 + cmhi.16b v6, v6, v5 + orr.16b v20, v6, v20 + movi.16b v6, #128 + and.16b v20, v20, v6 + eor.16b v19, v19, v20 + ext.16b v20, v18, v17, #15 + ushr.16b v21, v20, #4 + tbl.16b v21, { v0 }, v21 + and.16b v20, v20, v1 + tbl.16b v20, { v2 }, v20 + ushr.16b v22, v17, #4 + tbl.16b v22, { v3 }, v22 + and.16b v20, v20, v21 + and.16b v20, v20, v22 + ext.16b v21, v18, v17, #14 + ext.16b v18, v18, v17, #13 + cmhi.16b v21, v21, v4 + cmhi.16b v18, v18, v5 + orr.16b v18, v18, v21 + and.16b v18, v18, v6 + eor.16b v18, v20, v18 + orr.16b v18, v18, v19 + ext.16b v19, v17, v16, #15 + ushr.16b v20, v19, #4 + tbl.16b v20, { v0 }, v20 + and.16b v19, v19, v1 + tbl.16b v19, { v2 }, v19 + ushr.16b v21, v16, #4 + tbl.16b v21, { v3 }, v21 + and.16b v19, v19, v20 + and.16b v19, v19, v21 + ext.16b v20, v17, v16, #14 + ext.16b v17, v17, v16, #13 + cmhi.16b v20, v20, v4 + cmhi.16b v17, v17, v5 + orr.16b v17, v17, v20 + and.16b v17, v17, v6 + eor.16b v17, v19, v17 + ext.16b v19, v16, v7, #15 + ushr.16b v20, v19, #4 + tbl.16b v20, { v0 }, v20 + and.16b v19, v19, v1 + tbl.16b v19, { v2 }, v19 + ushr.16b v21, v7, #4 + tbl.16b v21, { v3 }, v21 + and.16b v19, v19, v20 + and.16b v19, v19, v21 + ext.16b v20, v16, v7, #14 + ext.16b v16, v16, v7, #13 + cmhi.16b v20, v20, v4 + cmhi.16b v16, v16, v5 + orr.16b v16, v16, v20 + and.16b v16, v16, v6 + eor.16b v16, v19, v16 + orr.16b v16, v17, v16 + orr.16b v23, v18, v16 +Lloh30: + adrp x9, lCPI4_3@PAGE +Lloh31: + ldr q16, [x9, lCPI4_3@PAGEOFF] + uqsub.16b v19, v7, v16 + cmp x10, x8 + b.hs LBB4_22 + mov x9, x10 + b LBB4_6 + ext.16b v19, v7, v20, #15 + ushr.16b v21, v19, #4 + tbl.16b v21, { v0 }, v21 + and.16b v19, v19, v1 + tbl.16b v19, { v2 }, v19 + ushr.16b v22, v20, #4 + tbl.16b v22, { v3 }, v22 + and.16b v19, v19, v21 + and.16b v19, v19, v22 + ext.16b v21, v7, v20, #14 + ext.16b v7, v7, v20, #13 + cmhi.16b v21, v21, v4 + cmhi.16b v7, v7, v5 + orr.16b v7, v7, v21 + and.16b v7, v7, v6 + eor.16b v7, v19, v7 + ext.16b v19, v20, v18, #15 + ushr.16b v21, v19, #4 + tbl.16b v21, { v0 }, v21 + and.16b v19, v19, v1 + tbl.16b v19, { v2 }, v19 + ushr.16b v22, v18, #4 + tbl.16b v22, { v3 }, v22 + and.16b v19, v19, v21 + and.16b v19, v19, v22 + ext.16b v21, v20, v18, #14 + ext.16b v20, v20, v18, #13 + cmhi.16b v21, v21, v4 + cmhi.16b v20, v20, v5 + orr.16b v20, v20, v21 + and.16b v20, v20, v6 + eor.16b v19, v19, v20 + orr.16b v7, v19, v7 + ext.16b v19, v18, v17, #15 + ushr.16b v20, v19, #4 + tbl.16b v20, { v0 }, v20 + and.16b v19, v19, v1 + tbl.16b v19, { v2 }, v19 + ushr.16b v21, v17, #4 + tbl.16b v21, { v3 }, v21 + and.16b v19, v19, v20 + and.16b v19, v19, v21 + ext.16b v20, v18, v17, #14 + ext.16b v18, v18, v17, #13 + cmhi.16b v20, v20, v4 + cmhi.16b v18, v18, v5 + orr.16b v18, v18, v20 + and.16b v18, v18, v6 + eor.16b v18, v19, v18 + ext.16b v19, v17, v24, #15 + ushr.16b v20, v19, #4 + tbl.16b v20, { v0 }, v20 + and.16b v19, v19, v1 + tbl.16b v19, { v2 }, v19 + ushr.16b v21, v24, #4 + tbl.16b v21, { v3 }, v21 + and.16b v19, v19, v20 + and.16b v19, v19, v21 + ext.16b v20, v17, v24, #14 + ext.16b v17, v17, v24, #13 + cmhi.16b v20, v20, v4 + cmhi.16b v17, v17, v5 + orr.16b v17, v17, v20 + and.16b v17, v17, v6 + eor.16b v17, v19, v17 + orr.16b v17, v18, v17 + orr.16b v19, v7, v17 + mov.16b v7, v24 + uqsub.16b v17, v24, v16 + orr.16b v23, v19, v23 + add x9, x9, #64 + mov.16b v19, v17 + cmp x9, x8 + b.hs LBB4_9 +LBB4_6: + add x10, x0, x9 + ldp q20, q18, [x10] + ldp q17, q24, [x10, #32] + orr.16b v21, v18, v20 + orr.16b v22, v17, v24 + orr.16b v21, v21, v22 + umaxv.16b b21, v21 + fmov w10, s21 + tbnz w10, #7, LBB4_5 + mov.16b v17, v19 + mov.16b v24, v7 + orr.16b v23, v19, v23 + add x9, x9, #64 + mov.16b v19, v17 + cmp x9, x8 + b.lo LBB4_6 + b LBB4_9 +LBB4_8: + movi.2d v23, #0000000000000000 + movi.2d v17, #0000000000000000 + movi.2d v24, #0000000000000000 +LBB4_9: + subs x2, x1, x9 + b.ls LBB4_18 +LBB4_10: + movi.2d v0, #0000000000000000 + stp q0, q0, [sp, #80] + stp q0, q0, [sp, #48] + add x1, x0, x9 + add x0, sp, #48 + subs x8, x2, #32 + b.hs LBB4_19 + subs x8, x2, #16 + b.hs LBB4_20 +LBB4_12: + subs x8, x2, #8 + b.hs LBB4_21 +LBB4_13: + cbz x2, LBB4_15 + stp q24, q23, [sp, #16] + str q17, [sp] + bl _memcpy + ldp q17, q24, [sp] + ldr q23, [sp, #32] +LBB4_15: + ldp q3, q2, [sp, #48] + ldp q1, q0, [sp, #80] + orr.16b v4, v2, v3 + orr.16b v5, v1, v0 + orr.16b v4, v4, v5 + umaxv.16b b4, v4 + fmov w8, s4 + mov.16b v4, v17 + tbz w8, #7, LBB4_17 + ext.16b v4, v24, v3, #15 + ushr.16b v5, v4, #4 +Lloh32: + adrp x8, lCPI4_0@PAGE +Lloh33: + ldr q6, [x8, lCPI4_0@PAGEOFF] + tbl.16b v5, { v6 }, v5 + movi.16b v7, #15 + and.16b v4, v4, v7 +Lloh34: + adrp x8, lCPI4_1@PAGE +Lloh35: + ldr q16, [x8, lCPI4_1@PAGEOFF] + tbl.16b v4, { v16 }, v4 + ushr.16b v17, v3, #4 +Lloh36: + adrp x8, lCPI4_2@PAGE +Lloh37: + ldr q18, [x8, lCPI4_2@PAGEOFF] + tbl.16b v17, { v18 }, v17 + and.16b v4, v4, v5 + and.16b v4, v4, v17 + ext.16b v5, v24, v3, #14 + ext.16b v17, v24, v3, #13 + movi.16b v19, #223 + cmhi.16b v5, v5, v19 + movi.16b v20, #239 + cmhi.16b v17, v17, v20 + orr.16b v5, v17, v5 + movi.16b v17, #128 + and.16b v5, v5, v17 + eor.16b v4, v4, v5 + ext.16b v5, v3, v2, #15 + ushr.16b v21, v5, #4 + tbl.16b v21, { v6 }, v21 + and.16b v5, v5, v7 + tbl.16b v5, { v16 }, v5 + ushr.16b v22, v2, #4 + tbl.16b v22, { v18 }, v22 + and.16b v5, v5, v21 + and.16b v5, v5, v22 + ext.16b v21, v3, v2, #14 + ext.16b v3, v3, v2, #13 + cmhi.16b v21, v21, v19 + cmhi.16b v3, v3, v20 + orr.16b v3, v3, v21 + and.16b v3, v3, v17 + eor.16b v3, v5, v3 + orr.16b v3, v3, v4 + ext.16b v4, v2, v1, #15 + ushr.16b v5, v4, #4 + tbl.16b v5, { v6 }, v5 + and.16b v4, v4, v7 + tbl.16b v4, { v16 }, v4 + ushr.16b v21, v1, #4 + tbl.16b v21, { v18 }, v21 + and.16b v4, v4, v5 + and.16b v4, v4, v21 + ext.16b v5, v2, v1, #14 + ext.16b v2, v2, v1, #13 + cmhi.16b v5, v5, v19 + cmhi.16b v2, v2, v20 + orr.16b v2, v2, v5 + and.16b v2, v2, v17 + eor.16b v2, v4, v2 + ext.16b v4, v1, v0, #15 + ushr.16b v5, v4, #4 + tbl.16b v5, { v6 }, v5 + and.16b v4, v4, v7 + tbl.16b v4, { v16 }, v4 + ushr.16b v6, v0, #4 + tbl.16b v6, { v18 }, v6 + and.16b v4, v4, v5 + and.16b v4, v4, v6 + ext.16b v5, v1, v0, #14 + ext.16b v1, v1, v0, #13 + cmhi.16b v5, v5, v19 + cmhi.16b v1, v1, v20 + orr.16b v1, v1, v5 + and.16b v1, v1, v17 + eor.16b v1, v4, v1 + orr.16b v1, v2, v1 + orr.16b v17, v3, v1 +Lloh38: + adrp x8, lCPI4_3@PAGE +Lloh39: + ldr q1, [x8, lCPI4_3@PAGEOFF] + uqsub.16b v4, v0, v1 + orr.16b v23, v17, v23 + mov.16b v17, v4 +LBB4_18: + orr.16b v0, v17, v23 + umaxv.16b b0, v0 + fmov w8, s0 + tst w8, #0xff + cset w0, ne + .cfi_def_cfa wsp, 128 + ldp x29, x30, [sp, #112] + add sp, sp, #128 + .cfi_def_cfa_offset 0 + .cfi_restore w30 + .cfi_restore w29 + ret +LBB4_19: + .cfi_restore_state + ldp q0, q1, [x1], #32 + stp q0, q1, [sp, #48] + add x0, x0, #32 + mov x2, x8 + subs x8, x8, #16 + b.lo LBB4_12 +LBB4_20: + ldr q0, [x1], #16 + str q0, [x0], #16 + mov x2, x8 + subs x8, x8, #8 + b.lo LBB4_13 +LBB4_21: + ldr x9, [x1], #8 + str x9, [x0], #8 + mov x2, x8 + cbnz x8, LBB4_14 + b LBB4_15 +LBB4_22: + mov.16b v17, v19 + mov.16b v24, v7 + mov x9, x10 + subs x2, x1, x10 + b.hi LBB4_10 + b LBB4_18 + .loh AdrpLdr Lloh30, Lloh31 + .loh AdrpAdrp Lloh28, Lloh30 + .loh AdrpLdr Lloh28, Lloh29 + .loh AdrpAdrp Lloh26, Lloh28 + .loh AdrpLdr Lloh26, Lloh27 + .loh AdrpAdrp Lloh24, Lloh26 + .loh AdrpLdr Lloh24, Lloh25 + .loh AdrpLdr Lloh38, Lloh39 + .loh AdrpAdrp Lloh36, Lloh38 + .loh AdrpLdr Lloh36, Lloh37 + .loh AdrpAdrp Lloh34, Lloh36 + .loh AdrpLdr Lloh34, Lloh35 + .loh AdrpAdrp Lloh32, Lloh34 + .loh AdrpLdr Lloh32, Lloh33 diff --git a/portable/baseline/compat.s b/portable/baseline/compat.s new file mode 100644 index 00000000..dcb20098 --- /dev/null +++ b/portable/baseline/compat.s @@ -0,0 +1,432 @@ +.section __TEXT,__text,regular,pure_instructions + .globl simdutf8_portable::implementation::validate_utf8_compat_simd + .p2align 2 +simdutf8_portable::implementation::validate_utf8_compat_simd: +Lfunc_begin5: + .cfi_startproc + sub sp, sp, #224 + .cfi_def_cfa_offset 224 + stp x22, x21, [sp, #176] + stp x20, x19, [sp, #192] + stp x29, x30, [sp, #208] + add x29, sp, #208 + .cfi_def_cfa w29, 16 + .cfi_offset w30, -8 + .cfi_offset w29, -16 + .cfi_offset w19, -24 + .cfi_offset w20, -32 + .cfi_offset w21, -40 + .cfi_offset w22, -48 + .cfi_remember_state + mov x19, x1 + mov x20, x0 + mov x21, #0 + and x9, x1, #0xffffffffffffffc0 + movi.2d v20, #0000000000000000 +Lloh40: + adrp x10, lCPI5_0@PAGE +Lloh41: + ldr q22, [x10, lCPI5_0@PAGEOFF] +Lloh42: + adrp x10, lCPI5_1@PAGE +Lloh43: + ldr q23, [x10, lCPI5_1@PAGEOFF] +Lloh44: + adrp x10, lCPI5_2@PAGE +Lloh45: + ldr q24, [x10, lCPI5_2@PAGEOFF] +Lloh46: + adrp x10, lCPI5_3@PAGE +Lloh47: + ldr q21, [x10, lCPI5_3@PAGEOFF] + mov w10, #1 + movi.16b v0, #15 + movi.16b v1, #223 + movi.16b v2, #239 + movi.16b v3, #128 + movi.2d v18, #0000000000000000 + movi.2d v19, #0000000000000000 + cmp x21, x9 + tbz w10, #0, LBB5_4 +LBB5_1: + b.hs LBB5_15 +LBB5_2: + add x11, x20, x21 + ldp q7, q6, [x11] + ldp q5, q4, [x11, #32] + orr.16b v16, v6, v7 + orr.16b v17, v5, v4 + orr.16b v16, v16, v17 + umaxv.16b b16, v16 + fmov w11, s16 + tbnz w11, #7, LBB5_11 + add x21, x21, #64 + cmp x21, x9 + b.lo LBB5_2 + b LBB5_15 + b.hs LBB5_15 + add x11, x20, x21 + ldp q6, q5, [x11] + ldp q4, q7, [x11, #32] + orr.16b v16, v5, v6 + orr.16b v17, v4, v7 + orr.16b v16, v16, v17 + umaxv.16b b16, v16 + fmov w11, s16 + tbnz w11, #7, LBB5_7 + orr.16b v20, v20, v18 + umaxv.16b b4, v20 + fmov w11, s4 + cbz w11, LBB5_13 + b LBB5_29 + ext.16b v17, v19, v6, #15 + ext.16b v18, v19, v6, #14 + ext.16b v16, v19, v6, #13 + mov.16b v19, v7 + ushr.16b v7, v17, #4 + tbl.16b v7, { v22 }, v7 + and.16b v17, v17, v0 + tbl.16b v17, { v23 }, v17 + and.16b v7, v17, v7 + ushr.16b v17, v6, #4 + tbl.16b v17, { v24 }, v17 + and.16b v7, v7, v17 + cmhi.16b v17, v18, v1 + cmhi.16b v16, v16, v2 + orr.16b v16, v16, v17 + and.16b v16, v16, v3 + eor.16b v7, v7, v16 + ext.16b v16, v6, v5, #15 + ushr.16b v17, v16, #4 + tbl.16b v17, { v22 }, v17 + and.16b v16, v16, v0 + tbl.16b v16, { v23 }, v16 + ushr.16b v18, v5, #4 + tbl.16b v18, { v24 }, v18 + and.16b v16, v16, v17 + and.16b v16, v16, v18 + ext.16b v17, v6, v5, #14 + ext.16b v6, v6, v5, #13 + cmhi.16b v17, v17, v1 + cmhi.16b v6, v6, v2 + orr.16b v6, v6, v17 + and.16b v6, v6, v3 + eor.16b v6, v16, v6 + ext.16b v16, v5, v4, #15 + ushr.16b v17, v16, #4 + tbl.16b v17, { v22 }, v17 + and.16b v16, v16, v0 + tbl.16b v16, { v23 }, v16 + ushr.16b v18, v4, #4 + tbl.16b v18, { v24 }, v18 + and.16b v16, v16, v17 + and.16b v16, v16, v18 + ext.16b v17, v5, v4, #14 + ext.16b v5, v5, v4, #13 + cmhi.16b v17, v17, v1 + cmhi.16b v5, v5, v2 + orr.16b v5, v5, v17 + and.16b v5, v5, v3 + eor.16b v5, v16, v5 + ext.16b v16, v4, v19, #15 + ushr.16b v17, v16, #4 + tbl.16b v17, { v22 }, v17 + and.16b v16, v16, v0 + tbl.16b v16, { v23 }, v16 + ushr.16b v18, v19, #4 + tbl.16b v18, { v24 }, v18 + and.16b v16, v16, v17 + and.16b v16, v16, v18 + ext.16b v17, v4, v19, #14 + ext.16b v4, v4, v19, #13 + cmhi.16b v17, v17, v1 + cmhi.16b v4, v4, v2 + orr.16b v4, v4, v17 + and.16b v4, v4, v3 + eor.16b v4, v16, v4 + orr.16b v7, v7, v20 + orr.16b v5, v6, v5 + orr.16b v5, v7, v5 + orr.16b v20, v5, v4 + umaxv.16b b4, v20 + fmov w11, s4 + cbnz w11, LBB5_29 + add x11, x21, #64 + cmp x11, x9 + b.hs LBB5_14 + add x12, x20, x21 + ldp q6, q5, [x12, #64] + ldp q4, q7, [x12, #96] + orr.16b v16, v5, v6 + orr.16b v17, v4, v7 + orr.16b v16, v16, v17 + umaxv.16b b16, v16 + fmov w12, s16 + mov x21, x11 + tbnz w12, #7, LBB5_7 + uqsub.16b v18, v19, v21 + mov x21, x11 + orr.16b v20, v20, v18 + umaxv.16b b4, v20 + fmov w11, s4 + cbz w11, LBB5_13 + b LBB5_29 + ext.16b v16, v19, v7, #15 + ushr.16b v17, v16, #4 + tbl.16b v17, { v22 }, v17 + and.16b v16, v16, v0 + tbl.16b v16, { v23 }, v16 + ushr.16b v18, v7, #4 + tbl.16b v18, { v24 }, v18 + and.16b v16, v16, v17 + and.16b v16, v16, v18 + ext.16b v17, v19, v7, #14 + ext.16b v18, v19, v7, #13 + cmhi.16b v17, v17, v1 + cmhi.16b v18, v18, v2 + orr.16b v17, v18, v17 + and.16b v17, v17, v3 + eor.16b v16, v16, v17 + ext.16b v17, v7, v6, #15 + ushr.16b v18, v17, #4 + tbl.16b v18, { v22 }, v18 + and.16b v17, v17, v0 + tbl.16b v17, { v23 }, v17 + ushr.16b v19, v6, #4 + tbl.16b v19, { v24 }, v19 + and.16b v17, v17, v18 + and.16b v17, v17, v19 + ext.16b v18, v7, v6, #14 + ext.16b v7, v7, v6, #13 + cmhi.16b v18, v18, v1 + cmhi.16b v7, v7, v2 + orr.16b v7, v7, v18 + and.16b v7, v7, v3 + eor.16b v7, v17, v7 + ext.16b v17, v6, v5, #15 + ushr.16b v18, v17, #4 + tbl.16b v18, { v22 }, v18 + and.16b v17, v17, v0 + tbl.16b v17, { v23 }, v17 + ushr.16b v19, v5, #4 + tbl.16b v19, { v24 }, v19 + and.16b v17, v17, v18 + and.16b v17, v17, v19 + ext.16b v18, v6, v5, #14 + ext.16b v6, v6, v5, #13 + cmhi.16b v18, v18, v1 + cmhi.16b v6, v6, v2 + orr.16b v6, v6, v18 + and.16b v6, v6, v3 + eor.16b v6, v17, v6 + ext.16b v17, v5, v4, #15 + ushr.16b v18, v17, #4 + tbl.16b v18, { v22 }, v18 + and.16b v17, v17, v0 + tbl.16b v17, { v23 }, v17 + ushr.16b v19, v4, #4 + tbl.16b v19, { v24 }, v19 + and.16b v17, v17, v18 + and.16b v17, v17, v19 + ext.16b v18, v5, v4, #14 + ext.16b v5, v5, v4, #13 + cmhi.16b v18, v18, v1 + cmhi.16b v5, v5, v2 + orr.16b v5, v5, v18 + and.16b v5, v5, v3 + eor.16b v5, v17, v5 + orr.16b v7, v16, v7 + orr.16b v5, v6, v5 + orr.16b v5, v7, v5 + orr.16b v20, v5, v20 + umaxv.16b b5, v20 + fmov w11, s5 + cbnz w11, LBB5_29 + uqsub.16b v18, v4, v21 + mov.16b v19, v4 + add x21, x21, #64 + eor w10, w10, #0x1 + cmp x21, x9 + tbz w10, #0, LBB5_4 + b LBB5_1 +LBB5_14: + uqsub.16b v18, v19, v21 + mov x21, x11 +LBB5_15: + subs x2, x19, x21 + b.ls LBB5_24 + movi.2d v0, #0000000000000000 + stp q0, q0, [x29, #-64] + stp q0, q0, [x29, #-96] + add x1, x20, x21 + sub x0, x29, #96 + subs x9, x2, #32 + b.hs LBB5_26 + subs x9, x2, #16 + b.hs LBB5_27 +LBB5_18: + subs x9, x2, #8 + str q20, [sp, #96] + b.hs LBB5_28 +LBB5_19: + cbz x2, LBB5_21 + mov x22, x8 + stp q22, q21, [sp, #64] + stp q24, q23, [sp, #32] + stp q18, q19, [sp] + bl _memcpy + ldp q18, q19, [sp] + ldp q24, q23, [sp, #32] + ldp q22, q21, [sp, #64] + mov x8, x22 +LBB5_21: + ldp q3, q2, [x29, #-96] + ldp q1, q0, [x29, #-64] + orr.16b v4, v2, v3 + orr.16b v5, v1, v0 + orr.16b v4, v4, v5 + umaxv.16b b4, v4 + fmov w9, s4 + mov.16b v4, v18 + tbz w9, #7, LBB5_23 + ext.16b v4, v19, v3, #15 + ushr.16b v5, v4, #4 + tbl.16b v5, { v22 }, v5 + movi.16b v6, #15 + and.16b v4, v4, v6 + tbl.16b v4, { v23 }, v4 + ushr.16b v7, v3, #4 + tbl.16b v7, { v24 }, v7 + and.16b v4, v4, v5 + and.16b v4, v4, v7 + ext.16b v5, v19, v3, #14 + ext.16b v7, v19, v3, #13 + movi.16b v16, #223 + cmhi.16b v5, v5, v16 + movi.16b v17, #239 + cmhi.16b v7, v7, v17 + orr.16b v5, v7, v5 + movi.16b v7, #128 + and.16b v5, v5, v7 + eor.16b v4, v4, v5 + ext.16b v5, v3, v2, #15 + ushr.16b v18, v5, #4 + tbl.16b v18, { v22 }, v18 + and.16b v5, v5, v6 + tbl.16b v5, { v23 }, v5 + ushr.16b v19, v2, #4 + tbl.16b v19, { v24 }, v19 + and.16b v5, v5, v18 + and.16b v5, v5, v19 + ext.16b v18, v3, v2, #14 + ext.16b v3, v3, v2, #13 + cmhi.16b v18, v18, v16 + cmhi.16b v3, v3, v17 + orr.16b v3, v3, v18 + and.16b v3, v3, v7 + eor.16b v3, v5, v3 + orr.16b v3, v3, v4 + ext.16b v4, v2, v1, #15 + ushr.16b v5, v4, #4 + tbl.16b v5, { v22 }, v5 + and.16b v4, v4, v6 + tbl.16b v4, { v23 }, v4 + ushr.16b v18, v1, #4 + tbl.16b v18, { v24 }, v18 + and.16b v4, v4, v5 + and.16b v4, v4, v18 + ext.16b v5, v2, v1, #14 + ext.16b v2, v2, v1, #13 + cmhi.16b v5, v5, v16 + cmhi.16b v2, v2, v17 + orr.16b v2, v2, v5 + and.16b v2, v2, v7 + eor.16b v2, v4, v2 + ext.16b v4, v1, v0, #15 + ushr.16b v5, v4, #4 + tbl.16b v5, { v22 }, v5 + and.16b v4, v4, v6 + tbl.16b v4, { v23 }, v4 + ushr.16b v6, v0, #4 + tbl.16b v6, { v24 }, v6 + and.16b v4, v4, v5 + and.16b v4, v4, v6 + ext.16b v5, v1, v0, #14 + ext.16b v1, v1, v0, #13 + cmhi.16b v5, v5, v16 + cmhi.16b v1, v1, v17 + orr.16b v1, v1, v5 + and.16b v1, v1, v7 + eor.16b v1, v4, v1 + orr.16b v1, v2, v1 + orr.16b v18, v3, v1 + uqsub.16b v4, v0, v21 + ldr q20, [sp, #96] + orr.16b v20, v18, v20 + mov.16b v18, v4 +LBB5_24: + orr.16b v0, v18, v20 + umaxv.16b b0, v0 + fmov w9, s0 + cbnz w9, LBB5_29 + mov w9, #2 + strb w9, [x8, #8] + .cfi_def_cfa wsp, 224 + ldp x29, x30, [sp, #208] + ldp x20, x19, [sp, #192] + ldp x22, x21, [sp, #176] + add sp, sp, #224 + .cfi_def_cfa_offset 0 + .cfi_restore w30 + .cfi_restore w29 + .cfi_restore w19 + .cfi_restore w20 + .cfi_restore w21 + .cfi_restore w22 + ret +LBB5_26: + .cfi_restore_state + ldp q0, q1, [x1], #32 + stp q0, q1, [x29, #-96] + add x0, x0, #32 + mov x2, x9 + subs x9, x9, #16 + b.lo LBB5_18 +LBB5_27: + ldr q0, [x1], #16 + str q0, [x0], #16 + mov x2, x9 + subs x9, x9, #8 + str q20, [sp, #96] + b.lo LBB5_19 +LBB5_28: + ldr x10, [x1], #8 + str x10, [x0], #8 + mov x2, x9 + cbnz x9, LBB5_20 + b LBB5_21 +LBB5_29: + mov x0, x8 + mov x1, x20 + mov x2, x19 + mov x3, x21 + .cfi_def_cfa wsp, 224 + ldp x29, x30, [sp, #208] + ldp x20, x19, [sp, #192] + ldp x22, x21, [sp, #176] + add sp, sp, #224 + .cfi_def_cfa_offset 0 + .cfi_restore w30 + .cfi_restore w29 + .cfi_restore w19 + .cfi_restore w20 + .cfi_restore w21 + .cfi_restore w22 + b simdutf8_portable::implementation::helpers::get_compat_error + .loh AdrpLdr Lloh46, Lloh47 + .loh AdrpAdrp Lloh44, Lloh46 + .loh AdrpLdr Lloh44, Lloh45 + .loh AdrpAdrp Lloh42, Lloh44 + .loh AdrpLdr Lloh42, Lloh43 + .loh AdrpAdrp Lloh40, Lloh42 + .loh AdrpLdr Lloh40, Lloh41 From 906fb2dc1006bf20542e217dcebf8a6645f78039 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Tue, 22 Oct 2024 06:16:07 +0200 Subject: [PATCH 09/83] don't check in Cargo.toml for lib --- portable/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/portable/.gitignore b/portable/.gitignore index cbfad3bb..5038f87b 100644 --- a/portable/.gitignore +++ b/portable/.gitignore @@ -3,3 +3,4 @@ /.idea /.zed /.cargo +/Cargo.lock From 4076f77e7d4ca60383145e8a8fa3e778f963ae6d Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Tue, 22 Oct 2024 06:16:16 +0200 Subject: [PATCH 10/83] Cargo.toml for benchmark --- bench/Cargo.lock | 2869 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2869 insertions(+) create mode 100644 bench/Cargo.lock diff --git a/bench/Cargo.lock b/bench/Cargo.lock new file mode 100644 index 00000000..34062004 --- /dev/null +++ b/bench/Cargo.lock @@ -0,0 +1,2869 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "addr2line" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9ecd88a8c8378ca913a680cd98f0f13ac67383d35993f86c90a70e3f137816b" +dependencies = [ + "gimli 0.26.2", +] + +[[package]] +name = "addr2line" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfbe277e56a376000877090da837660b4427aad530e3028d44e0bffe4f89a1c1" +dependencies = [ + "gimli 0.31.1", +] + +[[package]] +name = "adler2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" + +[[package]] +name = "ahash" +version = "0.7.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi 0.3.9", +] + +[[package]] +name = "anyhow" +version = "1.0.90" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37bf3594c4c988a53154954629820791dde498571819ae4ca50ca811e060cc95" + +[[package]] +name = "async-trait" +version = "0.1.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.82", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi 0.1.19", + "libc", + "winapi 0.3.9", +] + +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "backtrace" +version = "0.3.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +dependencies = [ + "addr2line 0.24.2", + "cfg-if 1.0.0", + "libc", + "miniz_oxide", + "object 0.36.5", + "rustc-demangle", + "windows-targets", +] + +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bindgen" +version = "0.54.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66c0bb6167449588ff70803f4127f0684f9063097eca5016f37eb52b92c2cf36" +dependencies = [ + "bitflags 1.3.2", + "cexpr", + "cfg-if 0.1.10", + "clang-sys", + "clap", + "env_logger 0.7.1", + "lazy_static", + "lazycell", + "log", + "peeking_take_while", + "proc-macro2", + "quote", + "regex", + "rustc-hash", + "shlex 0.1.1", + "which 3.1.1", +] + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] +name = "block-buffer" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4" +dependencies = [ + "generic-array", +] + +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + +[[package]] +name = "bytecheck" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" +dependencies = [ + "bytecheck_derive", + "ptr_meta", + "simdutf8 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "bytecheck_derive" +version = "0.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" + +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + +[[package]] +name = "cc" +version = "1.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2e7962b54006dcfcc61cb72735f4d89bb97061dd6a7ed882ec6b8ee53714c6f" +dependencies = [ + "jobserver", + "libc", + "shlex 1.3.0", +] + +[[package]] +name = "cexpr" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4aedb84272dbe89af497cf81375129abda4fc0a9e7c5d317498c15cc30c0d27" +dependencies = [ + "nom", +] + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clang-sys" +version = "0.29.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe6837df1d5cba2397b835c8530f51723267e16abbf83892e9e5af4f0e5dd10a" +dependencies = [ + "glob", + "libc", + "libloading 0.5.2", +] + +[[package]] +name = "clap" +version = "2.34.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" +dependencies = [ + "ansi_term", + "atty", + "bitflags 1.3.2", + "strsim", + "textwrap", + "unicode-width", + "vec_map", +] + +[[package]] +name = "cmake" +version = "0.1.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb1e43aa7fd152b1f968787f7dbcdeb306d1867ff373c69955211876c053f91a" +dependencies = [ + "cc", +] + +[[package]] +name = "core_affinity" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f8a03115cc34fb0d7c321dd154a3914b3ca082ccc5c11d91bf7117dbbe7171f" +dependencies = [ + "kernel32-sys", + "libc", + "num_cpus", + "winapi 0.2.8", +] + +[[package]] +name = "corosensei" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80128832c58ea9cbd041d2a759ec449224487b2c1e400453d99d244eead87a8e" +dependencies = [ + "autocfg", + "cfg-if 1.0.0", + "libc", + "scopeguard", + "windows-sys 0.33.0", +] + +[[package]] +name = "cpp_demangle" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eeaa953eaad386a53111e47172c2fedba671e5684c8dd601a5f474f4f118710f" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "cpufeatures" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" +dependencies = [ + "libc", +] + +[[package]] +name = "cranelift-bforest" +version = "0.79.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebddaa5d12cb299b0bc7c930aff12c5591d4ba9aa84eea637807e07283b900aa" +dependencies = [ + "cranelift-entity 0.79.1", +] + +[[package]] +name = "cranelift-bforest" +version = "0.82.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38faa2a16616c8e78a18d37b4726b98bfd2de192f2fdc8a39ddf568a408a0f75" +dependencies = [ + "cranelift-entity 0.82.3", +] + +[[package]] +name = "cranelift-codegen" +version = "0.79.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da1daf5641177162644b521b64418564b8ed5deb126275a4d91472d13e7c72df" +dependencies = [ + "cranelift-bforest 0.79.1", + "cranelift-codegen-meta 0.79.1", + "cranelift-codegen-shared 0.79.1", + "cranelift-entity 0.79.1", + "gimli 0.26.2", + "log", + "regalloc 0.0.33", + "smallvec", + "target-lexicon", +] + +[[package]] +name = "cranelift-codegen" +version = "0.82.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26f192472a3ba23860afd07d2b0217dc628f21fcc72617aa1336d98e1671f33b" +dependencies = [ + "cranelift-bforest 0.82.3", + "cranelift-codegen-meta 0.82.3", + "cranelift-codegen-shared 0.82.3", + "cranelift-entity 0.82.3", + "gimli 0.26.2", + "log", + "regalloc 0.0.34", + "smallvec", + "target-lexicon", +] + +[[package]] +name = "cranelift-codegen-meta" +version = "0.79.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "001c1e9e540940c81596e547e732f99c2146c21ea7e82da99be961a1e86feefa" +dependencies = [ + "cranelift-codegen-shared 0.79.1", +] + +[[package]] +name = "cranelift-codegen-meta" +version = "0.82.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f32ddb89e9b89d3d9b36a5b7d7ea3261c98235a76ac95ba46826b8ec40b1a24" +dependencies = [ + "cranelift-codegen-shared 0.82.3", +] + +[[package]] +name = "cranelift-codegen-shared" +version = "0.79.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ebaf07b5d7501cc606f41c81333bd63a5a17eb501362ccb10bc8ff5c03d0232" + +[[package]] +name = "cranelift-codegen-shared" +version = "0.82.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01fd0d9f288cc1b42d9333b7a776b17e278fc888c28e6a0f09b5573d45a150bc" + +[[package]] +name = "cranelift-entity" +version = "0.79.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a27ada0e3ffe5325179fc750252c18d614fa5470d595ce5c8a794c495434d80a" +dependencies = [ + "serde", +] + +[[package]] +name = "cranelift-entity" +version = "0.82.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e3bfe172b83167604601faf9dc60453e0d0a93415b57a9c4d1a7ae6849185cf" + +[[package]] +name = "cranelift-frontend" +version = "0.79.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2912c0eec9fd3df2dcf82b02b642caaa85d762b84ac5a3b27bc93a07eeeb64e2" +dependencies = [ + "cranelift-codegen 0.79.1", + "log", + "smallvec", + "target-lexicon", +] + +[[package]] +name = "cranelift-frontend" +version = "0.82.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a006e3e32d80ce0e4ba7f1f9ddf66066d052a8c884a110b91d05404d6ce26dce" +dependencies = [ + "cranelift-codegen 0.82.3", + "log", + "smallvec", + "target-lexicon", +] + +[[package]] +name = "cranelift-native" +version = "0.79.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fd20f78f378f55a70738a2eb9815dcd7e8455ff091b70701cfd086dd44927da" +dependencies = [ + "cranelift-codegen 0.79.1", + "libc", + "target-lexicon", +] + +[[package]] +name = "cranelift-wasm" +version = "0.79.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "353abcef10511d565b25bd00f3d7b1babcc040d9644c5259467c9a514dc945f0" +dependencies = [ + "cranelift-codegen 0.79.1", + "cranelift-entity 0.79.1", + "cranelift-frontend 0.79.1", + "itertools", + "log", + "smallvec", + "wasmparser 0.81.0", + "wasmtime-types", +] + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "criterion" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f" +dependencies = [ + "atty", + "cast", + "clap", + "criterion-plot", + "csv", + "itertools", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_cbor", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" + +[[package]] +name = "csv" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + +[[package]] +name = "darling" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "syn 2.0.82", +] + +[[package]] +name = "darling_macro" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.82", +] + +[[package]] +name = "digest" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" +dependencies = [ + "generic-array", +] + +[[package]] +name = "directories-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339ee130d97a610ea5a5872d2bbb130fdf68884ff09d3028b81bec8a1ac23bbc" +dependencies = [ + "cfg-if 1.0.0", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi 0.3.9", +] + +[[package]] +name = "either" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" + +[[package]] +name = "enum-iterator" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eeac5c5edb79e4e39fe8439ef35207780a11f69c52cbe424ce3dfad4cb78de6" +dependencies = [ + "enum-iterator-derive", +] + +[[package]] +name = "enum-iterator-derive" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c134c37760b27a871ba422106eedbb8247da973a09e82558bf26d619c882b159" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "enumset" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d07a4b049558765cef5f0c1a273c3fc57084d768b44d2f98127aef4cceb17293" +dependencies = [ + "enumset_derive", +] + +[[package]] +name = "enumset_derive" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59c3b24c345d8c314966bdc1832f6c2635bfcce8e7cf363bd115987bba2ee242" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.82", +] + +[[package]] +name = "env_logger" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" +dependencies = [ + "atty", + "humantime 1.3.0", + "log", + "regex", + "termcolor", +] + +[[package]] +name = "env_logger" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" +dependencies = [ + "humantime 2.1.0", + "is-terminal", + "log", + "regex", + "termcolor", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" +dependencies = [ + "errno-dragonfly", + "libc", + "winapi 0.3.9", +] + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "fallible-iterator" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" + +[[package]] +name = "fastrand" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" + +[[package]] +name = "file-per-thread-logger" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84f2e425d9790201ba4af4630191feac6dcc98765b118d4d18e91d23c2353866" +dependencies = [ + "env_logger 0.10.2", + "log", +] + +[[package]] +name = "flexpect" +version = "0.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bcf7841d06c34459c13046b8f4917cd6132e2fcf0bb5216aa793070b016741d" +dependencies = [ + "rustversion", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "wasi", +] + +[[package]] +name = "gimli" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22030e2c5a68ec659fde1e949a745124b48e6fa8b045b7ed5bd1fe4ccc5c4e5d" +dependencies = [ + "fallible-iterator", + "indexmap 1.9.3", + "stable_deref_trait", +] + +[[package]] +name = "gimli" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "half" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" + +[[package]] +name = "hashbrown" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +dependencies = [ + "ahash", +] + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash", +] + +[[package]] +name = "hashbrown" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "hermit-abi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" + +[[package]] +name = "hermit-abi" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" + +[[package]] +name = "home" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "humantime" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f" +dependencies = [ + "quick-error", +] + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", + "serde", +] + +[[package]] +name = "indexmap" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +dependencies = [ + "equivalent", + "hashbrown 0.15.0", +] + +[[package]] +name = "inkwell" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbac11e485159a525867fb7e6aa61981453e6a72f625fde6a4ab3047b0c6dec9" +dependencies = [ + "either", + "inkwell_internals", + "libc", + "llvm-sys", + "once_cell", + "parking_lot", +] + +[[package]] +name = "inkwell_internals" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87d00c17e264ce02be5bc23d7bff959188ec7137beddd06b8b6b05a7c680ea85" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "io-lifetimes" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "278e90d6f8a6c76a8334b336e306efa3c5f2b604048cbfd486d6f49878e3af14" +dependencies = [ + "rustc_version", + "winapi 0.3.9", +] + +[[package]] +name = "is-terminal" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "261f68e344040fbd0edea105bef17c66edf46f984ddb1115b775ce31be948f4b" +dependencies = [ + "hermit-abi 0.4.0", + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "jobserver" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "kernel32-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" +dependencies = [ + "winapi 0.2.8", + "winapi-build", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "lazycell" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" + +[[package]] +name = "leb128" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "884e2677b40cc8c339eaefcb701c32ef1fd2493d71118dc0ca4b6a736c93bd67" + +[[package]] +name = "libc" +version = "0.2.161" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1" + +[[package]] +name = "libloading" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b111a074963af1d37a139918ac6d49ad1d0d5e47f72fd55388619691a7d753" +dependencies = [ + "cc", + "winapi 0.3.9", +] + +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if 1.0.0", + "winapi 0.3.9", +] + +[[package]] +name = "libredox" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" +dependencies = [ + "bitflags 2.6.0", + "libc", +] + +[[package]] +name = "linux-raw-sys" +version = "0.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a261afc61b7a5e323933b402ca6a1765183687c614789b1e4db7762ed4230bca" + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "llvm-sys" +version = "120.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "624f2692f436769c7eb85a13eeca3f6fb9705a4b2bd0473ac9577c90f19e21ef" +dependencies = [ + "cc", + "lazy_static", + "libc", + "regex", + "semver 0.11.0", +] + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "loupe" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b6a72dfa44fe15b5e76b94307eeb2ff995a8c5b283b55008940c02e0c5b634d" +dependencies = [ + "indexmap 1.9.3", + "loupe-derive", + "rustversion", +] + +[[package]] +name = "loupe-derive" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0fbfc88337168279f2e9ae06e157cfed4efd3316e14dc96ed074d4f2e6c5952" +dependencies = [ + "quote", + "syn 1.0.109", +] + +[[package]] +name = "mach" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa" +dependencies = [ + "libc", +] + +[[package]] +name = "mach2" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b955cdeb2a02b9117f121ce63aa52d08ade45de53e48fe6a38b39c10f6f709" +dependencies = [ + "libc", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "memmap2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" +dependencies = [ + "libc", +] + +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + +[[package]] +name = "miniz_oxide" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +dependencies = [ + "adler2", +] + +[[package]] +name = "more-asserts" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7843ec2de400bcbc6a6328c958dc38e5359da6e93e72e37bc5246bf1ae776389" + +[[package]] +name = "nom" +version = "5.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08959a387a676302eebf4ddbcbc611da04285579f76f88ee0506c63b1a61dd4b" +dependencies = [ + "memchr", + "version_check", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi 0.3.9", + "libc", +] + +[[package]] +name = "object" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67ac1d3f9a1d3616fd9a60c8d74296f22406a238b6a72f5cc1e6f314df4ffbf9" +dependencies = [ + "crc32fast", + "indexmap 1.9.3", + "memchr", +] + +[[package]] +name = "object" +version = "0.28.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e42c982f2d955fac81dd7e1d0e1426a7d702acd9c98d19ab01083a6a0328c424" +dependencies = [ + "crc32fast", + "hashbrown 0.11.2", + "indexmap 1.9.3", + "memchr", +] + +[[package]] +name = "object" +version = "0.36.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "oorandom" +version = "11.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" + +[[package]] +name = "opaque-debug" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "peeking_take_while" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" + +[[package]] +name = "pest" +version = "2.7.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879952a81a83930934cbf1786752d6dedc3b1f29e8f8fb2ad1d0a36f377cf442" +dependencies = [ + "memchr", + "thiserror", + "ucd-trie", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" + +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn 1.0.109", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "1.0.88" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c3a7fc5db1e57d5a779a352c8cdb57b29aa4c40cc69c3a68a7fedc815fbf2f9" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "psm" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa37f80ca58604976033fae9515a8a2989fc13797d953f7c04fb8fa36a11f205" +dependencies = [ + "cc", +] + +[[package]] +name = "ptr_meta" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0738ccf7ea06b608c10564b31debd4f5bc5e197fc8bfe088f68ae5ce81e7a4f1" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16b845dbfca988fa33db069c0e230574d15a3088f147a87b64c7589eb662c9ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + +[[package]] +name = "quote" +version = "1.0.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + +[[package]] +name = "redox_syscall" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +dependencies = [ + "bitflags 2.6.0", +] + +[[package]] +name = "redox_users" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +dependencies = [ + "getrandom", + "libredox", + "thiserror", +] + +[[package]] +name = "regalloc" +version = "0.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d808cff91dfca7b239d40b972ba628add94892b1d9e19a842aedc5cfae8ab1a" +dependencies = [ + "log", + "rustc-hash", + "smallvec", +] + +[[package]] +name = "regalloc" +version = "0.0.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62446b1d3ebf980bdc68837700af1d77b37bc430e524bf95319c6eada2a4cc02" +dependencies = [ + "log", + "rustc-hash", + "smallvec", +] + +[[package]] +name = "regex" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" + +[[package]] +name = "region" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877e54ea2adcd70d80e9179344c97f93ef0dffd6b03e1f4529e6e83ab2fa9ae0" +dependencies = [ + "bitflags 1.3.2", + "libc", + "mach", + "winapi 0.3.9", +] + +[[package]] +name = "region" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6b6ebd13bc009aef9cd476c1310d49ac354d36e240cf1bd753290f3dc7199a7" +dependencies = [ + "bitflags 1.3.2", + "libc", + "mach2", + "windows-sys 0.52.0", +] + +[[package]] +name = "rend" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" +dependencies = [ + "bytecheck", +] + +[[package]] +name = "rkyv" +version = "0.7.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9008cd6385b9e161d8229e1f6549dd23c3d022f132a2ea37ac3a10ac4935779b" +dependencies = [ + "bitvec", + "bytecheck", + "bytes", + "hashbrown 0.12.3", + "ptr_meta", + "rend", + "rkyv_derive", + "seahash", + "tinyvec", + "uuid", +] + +[[package]] +name = "rkyv_derive" +version = "0.7.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "503d1d27590a2b0a3a4ca4c94755aa2875657196ecbf401a42eff41d7de532c0" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver 1.0.23", +] + +[[package]] +name = "rustix" +version = "0.26.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18c44018277ec7195538f5631b90def7ad975bb46370cb0f4eff4012de9333f8" +dependencies = [ + "bitflags 1.3.2", + "errno 0.2.8", + "io-lifetimes", + "libc", + "linux-raw-sys 0.0.36", + "rustc_version", + "winapi 0.3.9", +] + +[[package]] +name = "rustix" +version = "0.38.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" +dependencies = [ + "bitflags 2.6.0", + "errno 0.3.9", + "libc", + "linux-raw-sys 0.4.14", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustversion" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + +[[package]] +name = "semver" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6" +dependencies = [ + "semver-parser", +] + +[[package]] +name = "semver" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" + +[[package]] +name = "semver-parser" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7" +dependencies = [ + "pest", +] + +[[package]] +name = "serde" +version = "1.0.210" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_bytes" +version = "0.11.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "387cc504cb06bb40a96c8e04e951fe01854cf6bc921053c954e4a606d9675c6a" +dependencies = [ + "serde", +] + +[[package]] +name = "serde_cbor" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" +dependencies = [ + "half", + "serde", +] + +[[package]] +name = "serde_derive" +version = "1.0.210" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.82", +] + +[[package]] +name = "serde_json" +version = "1.0.132" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "sha2" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d58a1e1bf39749807d89cf2d98ac2dfa0ff1cb3faa38fbb64dd88ac8013d800" +dependencies = [ + "block-buffer", + "cfg-if 1.0.0", + "cpufeatures", + "digest", + "opaque-debug", +] + +[[package]] +name = "shlex" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2" + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "simdjson-utf8" +version = "0.1.0" +dependencies = [ + "bindgen", + "cmake", +] + +[[package]] +name = "simdutf8" +version = "0.1.5" +dependencies = [ + "flexpect", +] + +[[package]] +name = "simdutf8" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" + +[[package]] +name = "simdutf8-bench" +version = "0.0.1" +dependencies = [ + "core_affinity", + "criterion", + "simdjson-utf8", + "simdutf8 0.1.5", + "simdutf8-portable", + "wasmer", + "wasmtime", +] + +[[package]] +name = "simdutf8-portable" +version = "0.1.0" + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83540f837a8afc019423a8edb95b52a8effe46957ee402287f4292fae35be021" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + +[[package]] +name = "tempfile" +version = "3.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" +dependencies = [ + "cfg-if 1.0.0", + "fastrand", + "once_cell", + "rustix 0.38.37", + "windows-sys 0.59.0", +] + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "thiserror" +version = "1.0.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.82", +] + +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "tinyvec" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "toml" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +dependencies = [ + "serde", +] + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "log", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.82", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", +] + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "ucd-trie" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971" + +[[package]] +name = "unicode-ident" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + +[[package]] +name = "uuid" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" + +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" +dependencies = [ + "cfg-if 1.0.0", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.82", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.82", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" + +[[package]] +name = "wasm-encoder" +version = "0.219.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29cbbd772edcb8e7d524a82ee8cef8dd046fc14033796a754c3ad246d019fa54" +dependencies = [ + "leb128", + "wasmparser 0.219.1", +] + +[[package]] +name = "wasmer" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea8d8361c9d006ea3d7797de7bd6b1492ffd0f91a22430cfda6c1658ad57bedf" +dependencies = [ + "cfg-if 1.0.0", + "indexmap 1.9.3", + "js-sys", + "loupe", + "more-asserts", + "target-lexicon", + "thiserror", + "wasm-bindgen", + "wasmer-artifact", + "wasmer-compiler", + "wasmer-compiler-cranelift", + "wasmer-compiler-llvm", + "wasmer-derive", + "wasmer-engine", + "wasmer-engine-dylib", + "wasmer-engine-universal", + "wasmer-types", + "wasmer-vm", + "winapi 0.3.9", +] + +[[package]] +name = "wasmer-artifact" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7aaf9428c29c1d8ad2ac0e45889ba8a568a835e33fd058964e5e500f2f7ce325" +dependencies = [ + "enumset", + "loupe", + "thiserror", + "wasmer-compiler", + "wasmer-types", +] + +[[package]] +name = "wasmer-compiler" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e67a6cd866aed456656db2cfea96c18baabbd33f676578482b85c51e1ee19d2c" +dependencies = [ + "enumset", + "loupe", + "rkyv", + "serde", + "serde_bytes", + "smallvec", + "target-lexicon", + "thiserror", + "wasmer-types", + "wasmparser 0.83.0", +] + +[[package]] +name = "wasmer-compiler-cranelift" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48be2f9f6495f08649e4f8b946a2cbbe119faf5a654aa1457f9504a99d23dae0" +dependencies = [ + "cranelift-codegen 0.82.3", + "cranelift-entity 0.82.3", + "cranelift-frontend 0.82.3", + "gimli 0.26.2", + "loupe", + "more-asserts", + "rayon", + "smallvec", + "target-lexicon", + "tracing", + "wasmer-compiler", + "wasmer-types", +] + +[[package]] +name = "wasmer-compiler-llvm" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd69f50825c69be2efb71e3059a3222de6e5d06552da51907cac761f701bde83" +dependencies = [ + "byteorder", + "cc", + "inkwell", + "itertools", + "lazy_static", + "libc", + "loupe", + "object 0.28.4", + "rayon", + "regex", + "rustc_version", + "semver 1.0.23", + "smallvec", + "target-lexicon", + "wasmer-compiler", + "wasmer-types", + "wasmer-vm", +] + +[[package]] +name = "wasmer-derive" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00e50405cc2a2f74ff574584710a5f2c1d5c93744acce2ca0866084739284b51" +dependencies = [ + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "wasmer-engine" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f98f010978c244db431b392aeab0661df7ea0822343334f8f2a920763548e45" +dependencies = [ + "backtrace", + "enumset", + "lazy_static", + "loupe", + "memmap2", + "more-asserts", + "rustc-demangle", + "serde", + "serde_bytes", + "target-lexicon", + "thiserror", + "wasmer-artifact", + "wasmer-compiler", + "wasmer-types", + "wasmer-vm", +] + +[[package]] +name = "wasmer-engine-dylib" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0358af9c154724587731175553805648d9acb8f6657880d165e378672b7e53" +dependencies = [ + "cfg-if 1.0.0", + "enum-iterator", + "enumset", + "leb128", + "libloading 0.7.4", + "loupe", + "object 0.28.4", + "rkyv", + "serde", + "tempfile", + "tracing", + "wasmer-artifact", + "wasmer-compiler", + "wasmer-engine", + "wasmer-object", + "wasmer-types", + "wasmer-vm", + "which 4.4.2", +] + +[[package]] +name = "wasmer-engine-universal" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "440dc3d93c9ca47865a4f4edd037ea81bf983b5796b59b3d712d844b32dbef15" +dependencies = [ + "cfg-if 1.0.0", + "enumset", + "leb128", + "loupe", + "region 3.0.2", + "rkyv", + "wasmer-compiler", + "wasmer-engine", + "wasmer-engine-universal-artifact", + "wasmer-types", + "wasmer-vm", + "winapi 0.3.9", +] + +[[package]] +name = "wasmer-engine-universal-artifact" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f1db3f54152657eb6e86c44b66525ff7801dad8328fe677da48dd06af9ad41" +dependencies = [ + "enum-iterator", + "enumset", + "loupe", + "rkyv", + "thiserror", + "wasmer-artifact", + "wasmer-compiler", + "wasmer-types", +] + +[[package]] +name = "wasmer-object" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d831335ff3a44ecf451303f6f891175c642488036b92ceceb24ac8623a8fa8b" +dependencies = [ + "object 0.28.4", + "thiserror", + "wasmer-compiler", + "wasmer-types", +] + +[[package]] +name = "wasmer-types" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39df01ea05dc0a9bab67e054c7cb01521e53b35a7bb90bd02eca564ed0b2667f" +dependencies = [ + "backtrace", + "enum-iterator", + "indexmap 1.9.3", + "loupe", + "more-asserts", + "rkyv", + "serde", + "thiserror", +] + +[[package]] +name = "wasmer-vm" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30d965fa61f4dc4cdb35a54daaf7ecec3563fbb94154a6c35433f879466247dd" +dependencies = [ + "backtrace", + "cc", + "cfg-if 1.0.0", + "corosensei", + "enum-iterator", + "indexmap 1.9.3", + "lazy_static", + "libc", + "loupe", + "mach", + "memoffset", + "more-asserts", + "region 3.0.2", + "rkyv", + "scopeguard", + "serde", + "thiserror", + "wasmer-artifact", + "wasmer-types", + "winapi 0.3.9", +] + +[[package]] +name = "wasmparser" +version = "0.81.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98930446519f63d00a836efdc22f67766ceae8dbcc1571379f2bcabc6b2b9abc" + +[[package]] +name = "wasmparser" +version = "0.83.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "718ed7c55c2add6548cca3ddd6383d738cd73b892df400e96b9aa876f0141d7a" + +[[package]] +name = "wasmparser" +version = "0.219.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c771866898879073c53b565a6c7b49953795159836714ac56a5befb581227c5" +dependencies = [ + "bitflags 2.6.0", + "indexmap 2.6.0", +] + +[[package]] +name = "wasmtime" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d56ceaa60d3019887d6ba5768860fac99f5a6511453e183cb3ba2aaafd9411f3" +dependencies = [ + "anyhow", + "async-trait", + "backtrace", + "bincode", + "cfg-if 1.0.0", + "cpp_demangle", + "indexmap 1.9.3", + "lazy_static", + "libc", + "log", + "object 0.27.1", + "paste", + "psm", + "rayon", + "region 2.2.0", + "rustc-demangle", + "serde", + "target-lexicon", + "wasmparser 0.81.0", + "wasmtime-cache", + "wasmtime-cranelift", + "wasmtime-environ", + "wasmtime-fiber", + "wasmtime-jit", + "wasmtime-runtime", + "wat", + "winapi 0.3.9", +] + +[[package]] +name = "wasmtime-cache" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d45c2c7ed7a2700ff012e97e12324d2ba0bdd943e50c0d3a95b582ef2bfdca4" +dependencies = [ + "anyhow", + "base64", + "bincode", + "directories-next", + "file-per-thread-logger", + "log", + "rustix 0.26.2", + "serde", + "sha2", + "toml", + "winapi 0.3.9", + "zstd", +] + +[[package]] +name = "wasmtime-cranelift" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5793c2d14c7e2b962d1d79408df011190ec8f6214a01efd676f5e2266c44bc8" +dependencies = [ + "anyhow", + "cranelift-codegen 0.79.1", + "cranelift-entity 0.79.1", + "cranelift-frontend 0.79.1", + "cranelift-native", + "cranelift-wasm", + "gimli 0.26.2", + "log", + "more-asserts", + "object 0.27.1", + "target-lexicon", + "thiserror", + "wasmparser 0.81.0", + "wasmtime-environ", +] + +[[package]] +name = "wasmtime-environ" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79131537408f938501b4f540ae0f61b456d9962c2bb590edefb904cf7d1e5f54" +dependencies = [ + "anyhow", + "cranelift-entity 0.79.1", + "gimli 0.26.2", + "indexmap 1.9.3", + "log", + "more-asserts", + "object 0.27.1", + "serde", + "target-lexicon", + "thiserror", + "wasmparser 0.81.0", + "wasmtime-types", +] + +[[package]] +name = "wasmtime-fiber" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cee9bf33ebebf88a353be8961ed87cf2780091e0c166c3ab3a23a3d8304f964a" +dependencies = [ + "cc", + "rustix 0.26.2", + "winapi 0.3.9", +] + +[[package]] +name = "wasmtime-jit" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8031b6e83071b40b0139924024ee0d2e11f65f7677d7b028720df55610cbf994" +dependencies = [ + "addr2line 0.17.0", + "anyhow", + "bincode", + "cfg-if 1.0.0", + "gimli 0.26.2", + "object 0.27.1", + "region 2.2.0", + "rustix 0.26.2", + "serde", + "target-lexicon", + "thiserror", + "wasmtime-environ", + "wasmtime-runtime", + "winapi 0.3.9", +] + +[[package]] +name = "wasmtime-runtime" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c9412d752736938c2a57228fb95e13d40bdbc879ac741874e7f7b49c198ffa" +dependencies = [ + "anyhow", + "backtrace", + "cc", + "cfg-if 1.0.0", + "indexmap 1.9.3", + "lazy_static", + "libc", + "log", + "mach", + "memoffset", + "more-asserts", + "rand", + "region 2.2.0", + "rustix 0.26.2", + "thiserror", + "wasmtime-environ", + "wasmtime-fiber", + "winapi 0.3.9", +] + +[[package]] +name = "wasmtime-types" +version = "0.32.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1602b6ae8b901e60e8b9d51cadbf51a0421b7e67bf4cbe2e647695783fd9d45" +dependencies = [ + "cranelift-entity 0.79.1", + "serde", + "thiserror", + "wasmparser 0.81.0", +] + +[[package]] +name = "wast" +version = "219.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f79a9d9df79986a68689a6b40bcc8d5d40d807487b235bebc2ac69a242b54a1" +dependencies = [ + "bumpalo", + "leb128", + "memchr", + "unicode-width", + "wasm-encoder", +] + +[[package]] +name = "wat" +version = "1.219.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bc3cf014fb336883a411cd662f987abf6a1d2a27f2f0008616a0070bbf6bd0d" +dependencies = [ + "wast", +] + +[[package]] +name = "web-sys" +version = "0.3.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "which" +version = "3.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d011071ae14a2f6671d0b74080ae0cd8ebf3a6f8c9589a2cd45f23126fe29724" +dependencies = [ + "libc", +] + +[[package]] +name = "which" +version = "4.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" +dependencies = [ + "either", + "home", + "once_cell", + "rustix 0.38.37", +] + +[[package]] +name = "winapi" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43dbb096663629518eb1dfa72d80243ca5a6aca764cae62a2df70af760a9be75" +dependencies = [ + "windows_aarch64_msvc 0.33.0", + "windows_i686_gnu 0.33.0", + "windows_i686_msvc 0.33.0", + "windows_x86_64_gnu 0.33.0", + "windows_x86_64_msvc 0.33.0", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd761fd3eb9ab8cc1ed81e56e567f02dd82c4c837e48ac3b2181b9ffc5060807" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab0cf703a96bab2dc0c02c0fa748491294bf9b7feb27e1f4f96340f208ada0e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cfdbe89cc9ad7ce618ba34abc34bbb6c36d99e96cae2245b7943cd75ee773d0" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4dd9b0c0e9ece7bb22e84d70d01b71c6d6248b81a3c60d11869451b4cb24784" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff1e4aa646495048ec7f3ffddc411e1d829c026a2ec62b39da15c1055e406eaa" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.82", +] + +[[package]] +name = "zstd" +version = "0.9.2+zstd.1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2390ea1bf6c038c39674f22d95f0564725fc06034a47129179810b2fc58caa54" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "4.1.3+zstd.1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e99d81b99fb3c2c2c794e3fe56c305c63d5173a16a46b5850b07c935ffc7db79" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "1.6.2+zstd.1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2daf2f248d9ea44454bfcb2516534e8b8ad2fc91bf818a1885495fc42bc8ac9f" +dependencies = [ + "cc", + "libc", +] From 465b29f9642de243dd511f048adfd222ff3a6387 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Tue, 22 Oct 2024 06:36:43 +0200 Subject: [PATCH 11/83] update bench deps --- bench/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bench/Cargo.toml b/bench/Cargo.toml index 20dc7437..a6640796 100644 --- a/bench/Cargo.toml +++ b/bench/Cargo.toml @@ -27,8 +27,8 @@ simdutf8_wasmer_llvm = [ simdutf8_wasmtime = ["wasmtime"] [dependencies] -core_affinity = "0.5" -criterion = "0.3" +core_affinity = "0.8.1" +criterion = "0.5.1" simdutf8 = { version = "*", path = "..", features = ["aarch64_neon"] } simdutf8-portable = { version = "*", path = "../portable" } simdjson-utf8 = { version = "*", path = "simdjson-utf8", optional = true } From ed8e6e1ce55f538178980770efe8cb51bcddbaf6 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Tue, 22 Oct 2024 11:03:29 +0200 Subject: [PATCH 12/83] portable basic tentative impl --- portable/src/implementation/mod.rs | 2 +- .../implementation/portable/algorithm_new.rs | 555 ++++++++++++++++++ portable/src/implementation/portable/mod.rs | 1 + portable/src/lib.rs | 4 +- 4 files changed, 559 insertions(+), 3 deletions(-) create mode 100644 portable/src/implementation/portable/algorithm_new.rs diff --git a/portable/src/implementation/mod.rs b/portable/src/implementation/mod.rs index 89bd27a7..7fad700d 100644 --- a/portable/src/implementation/mod.rs +++ b/portable/src/implementation/mod.rs @@ -21,7 +21,7 @@ pub(crate) unsafe fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basi #[inline(never)] unsafe fn validate_utf8_basic_simd(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { #[cfg(not(feature = "simd256"))] - return portable::simd128::validate_utf8_basic(input); + return portable::algorithm_new::validate_utf8_basic(input); #[cfg(feature = "simd256")] return portable::simd256::validate_utf8_basic(input); } diff --git a/portable/src/implementation/portable/algorithm_new.rs b/portable/src/implementation/portable/algorithm_new.rs new file mode 100644 index 00000000..8ec89c8b --- /dev/null +++ b/portable/src/implementation/portable/algorithm_new.rs @@ -0,0 +1,555 @@ +use std::simd::{ + cmp::SimdPartialOrd, + num::{SimdInt, SimdUint}, + simd_swizzle, u8x16, LaneCount, Simd, SupportedLaneCount, +}; + +use crate::{basic, implementation::helpers::SIMD_CHUNK_SIZE}; + +#[cfg(all( + any(target_arch = "aarch64", target_arch = "arm"), + target_feature = "neon" +))] +const HAS_FAST_REDUCE_MAX: bool = true; + +#[cfg(not(all( + any(target_arch = "aarch64", target_arch = "arm"), + target_feature = "neon" +)))] +const HAS_FAST_REDUCE_MAX: bool = false; + +#[repr(C, align(32))] +#[allow(dead_code)] // only used if a 128-bit SIMD implementation is used +pub(crate) struct TempSimdChunk(pub(crate) [u8; SIMD_CHUNK_SIZE]); + +#[allow(dead_code)] // only used if there is a SIMD implementation +impl TempSimdChunk { + #[expect(clippy::inline_always)] + #[inline(always)] // needs to be forced because otherwise it is not inlined on armv7 neo + pub(crate) const fn new() -> Self { + Self([0; SIMD_CHUNK_SIZE]) + } +} + +#[repr(C)] +struct SimdInput +where + LaneCount: SupportedLaneCount, +{ + vals: [Simd; O], +} + +trait SimdInputTrait { + unsafe fn new(ptr: *const u8) -> Self; + + unsafe fn is_ascii(&self) -> bool; +} + +impl SimdInputTrait for SimdInput<16, 4> { + #[inline] + unsafe fn new(ptr: *const u8) -> Self { + #[expect(clippy::cast_ptr_alignment)] + let ptr = ptr.cast::(); + Self { + vals: [ + ptr.read_unaligned(), + ptr.add(1).read_unaligned(), + ptr.add(2).read_unaligned(), + ptr.add(3).read_unaligned(), + ], + } + } + + #[inline] + unsafe fn is_ascii(&self) -> bool { + (self.vals[0] | self.vals[1]).is_ascii() + } +} + +struct Utf8CheckAlgorithm +where + LaneCount: SupportedLaneCount, +{ + pub(crate) prev: Simd, + pub(crate) incomplete: Simd, // FIXME: should be a mask? + pub(crate) error: Simd, // FIXME: should be a mask? +} + +trait SimdU8Value +where + LaneCount: SupportedLaneCount, +{ + #[expect(clippy::too_many_arguments)] + fn from_32_cut_off_leading( + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + v16: u8, + v17: u8, + v18: u8, + v19: u8, + v20: u8, + v21: u8, + v22: u8, + v23: u8, + v24: u8, + v25: u8, + v26: u8, + v27: u8, + v28: u8, + v29: u8, + v30: u8, + v31: u8, + ) -> Self; + + #[expect(clippy::too_many_arguments)] + fn repeat_16( + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self; + + #[expect(clippy::too_many_arguments)] + fn lookup_16( + self, + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self; + + fn prev1(self, prev: Self) -> Self; // FIXME: generic? + fn prev2(self, prev: Self) -> Self; + fn prev3(self, prev: Self) -> Self; + + fn is_ascii(self) -> bool; +} + +impl SimdU8Value<16> for u8x16 { + #[inline] + fn from_32_cut_off_leading( + _v0: u8, + _v1: u8, + _v2: u8, + _v3: u8, + _v4: u8, + _v5: u8, + _v6: u8, + _v7: u8, + _v8: u8, + _v9: u8, + _v10: u8, + _v11: u8, + _v12: u8, + _v13: u8, + _v14: u8, + _v15: u8, + v16: u8, + v17: u8, + v18: u8, + v19: u8, + v20: u8, + v21: u8, + v22: u8, + v23: u8, + v24: u8, + v25: u8, + v26: u8, + v27: u8, + v28: u8, + v29: u8, + v30: u8, + v31: u8, + ) -> Self { + Self::from_array([ + v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, + ]) + } + + #[inline] + fn repeat_16( + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self { + Self::from_array([ + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, + ]) + } + + #[inline] + fn lookup_16( + self, + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self { + // We need to ensure that 'self' only contains the lower 4 bits, unlike the avx instruction + // this will otherwise lead to bad results + let src = Self::repeat_16( + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, + ); + src.swizzle_dyn(self) + } + + #[inline] + fn prev1(self, prev: Self) -> Self { + simd_swizzle!( + self, + prev, + [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,] + ) + } + + // ugly but prev requires const generics + #[inline] + fn prev2(self, prev: Self) -> Self { + simd_swizzle!( + self, + prev, + [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,] + ) + } + + // ugly but prev requires const generics + #[inline] + fn prev3(self, prev: Self) -> Self { + simd_swizzle!( + self, + prev, + [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,] + ) + } + + #[inline] + fn is_ascii(self) -> bool { + if HAS_FAST_REDUCE_MAX { + self.reduce_max() < 0b1000_0000 + } else { + (self & Self::splat(0b1000_0000)) == Self::splat(0) + } + } +} + +impl Utf8CheckAlgorithm +where + LaneCount: SupportedLaneCount, + Simd: SimdU8Value, + SimdInput: SimdInputTrait, +{ + #[inline] + fn new() -> Self { + Self { + prev: Simd::::splat(0), + incomplete: Simd::::splat(0), + error: Simd::::splat(0), + } + } + + #[inline] + fn check_incomplete_pending(&mut self) { + self.error |= self.incomplete; + } + + #[inline] + unsafe fn is_incomplete(input: Simd) -> Simd { + input.saturating_sub(SimdU8Value::::from_32_cut_off_leading( + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0b1111_0000 - 1, + 0b1110_0000 - 1, + 0b1100_0000 - 1, + )) + } + + #[inline] + unsafe fn check_special_cases(input: Simd, prev1: Simd) -> Simd { + const TOO_SHORT: u8 = 1 << 0; + const TOO_LONG: u8 = 1 << 1; + const OVERLONG_3: u8 = 1 << 2; + const SURROGATE: u8 = 1 << 4; + const OVERLONG_2: u8 = 1 << 5; + const TWO_CONTS: u8 = 1 << 7; + const TOO_LARGE: u8 = 1 << 3; + const TOO_LARGE_1000: u8 = 1 << 6; + const OVERLONG_4: u8 = 1 << 6; + const CARRY: u8 = TOO_SHORT | TOO_LONG | TWO_CONTS; + + let byte_1_high = (prev1 >> 4).lookup_16( + TOO_LONG, + TOO_LONG, + TOO_LONG, + TOO_LONG, + TOO_LONG, + TOO_LONG, + TOO_LONG, + TOO_LONG, + TWO_CONTS, + TWO_CONTS, + TWO_CONTS, + TWO_CONTS, + TOO_SHORT | OVERLONG_2, + TOO_SHORT, + TOO_SHORT | OVERLONG_3 | SURROGATE, + TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4, + ); + + let byte_1_low = (prev1 & Simd::::splat(0x0F)).lookup_16( + CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, + CARRY | OVERLONG_2, + CARRY, + CARRY, + CARRY | TOO_LARGE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, + CARRY | TOO_LARGE | TOO_LARGE_1000, + CARRY | TOO_LARGE | TOO_LARGE_1000, + ); + + let byte_2_high = (input >> 4).lookup_16( + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, + TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + TOO_SHORT, + ); + + byte_1_high & byte_1_low & byte_2_high + } + + #[inline] + fn must_be_2_3_continuation(prev2: Simd, prev3: Simd) -> Simd { + let is_third_byte = prev2 + .simd_gt(Simd::::splat(0b1110_0000 - 1)) + .to_int(); + let is_fourth_byte = prev3 + .simd_gt(Simd::::splat(0b1111_0000 - 1)) + .to_int(); + + (is_third_byte | is_fourth_byte).cast() + } + + #[inline] + unsafe fn check_multibyte_lengths( + input: Simd, + prev: Simd, + special_cases: Simd, + ) -> Simd { + let prev2 = input.prev2(prev); + let prev3 = input.prev3(prev); + let must23 = Self::must_be_2_3_continuation(prev2, prev3); + let must23_80 = must23 & Simd::::splat(0x80); + must23_80 ^ special_cases + } + + #[inline] + unsafe fn has_error(&self) -> bool { + // FIXME: max workaround + if HAS_FAST_REDUCE_MAX { + self.error.reduce_max() != 0 + } else { + self.error != Simd::::splat(0) + } + } + + #[inline] + unsafe fn check_bytes(&mut self, input: Simd) { + let prev1 = input.prev1(self.prev); + let sc = Self::check_special_cases(input, prev1); + self.error |= Self::check_multibyte_lengths(input, self.prev, sc); + self.prev = input; + } + + #[inline] + unsafe fn check_utf8(&mut self, input: SimdInput) { + if input.is_ascii() { + self.check_incomplete_pending(); + } else { + self.check_block(input); + } + } + + #[inline] + unsafe fn check_block(&mut self, input: SimdInput) { + // WORKAROUND + // necessary because the for loop is not unrolled on ARM64 + if input.vals.len() == 2 { + self.check_bytes(*input.vals.as_ptr()); + self.check_bytes(*input.vals.as_ptr().add(1)); + self.incomplete = Self::is_incomplete(*input.vals.as_ptr().add(1)); + } else if input.vals.len() == 4 { + self.check_bytes(*input.vals.as_ptr()); + self.check_bytes(*input.vals.as_ptr().add(1)); + self.check_bytes(*input.vals.as_ptr().add(2)); + self.check_bytes(*input.vals.as_ptr().add(3)); + self.incomplete = Self::is_incomplete(*input.vals.as_ptr().add(3)); + } else { + panic!("Unsupported number of chunks"); + } + } + + /// Validation implementation for CPUs supporting the SIMD extension (see module). + /// + /// # Errors + /// Returns the zero-sized [`basic::Utf8Error`] on failure. + /// + /// # Safety + /// This function is inherently unsafe because it is compiled with SIMD extensions + /// enabled. Make sure that the CPU supports it before calling. + /// + + #[inline] + pub unsafe fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), basic::Utf8Error> { + use crate::implementation::helpers::SIMD_CHUNK_SIZE; + let len = input.len(); + let mut algorithm = Utf8CheckAlgorithm::::new(); + let mut idx: usize = 0; + let iter_lim = len - (len % SIMD_CHUNK_SIZE); + + while idx < iter_lim { + let simd_input = SimdInput::::new(input.as_ptr().add(idx as usize)); + idx += SIMD_CHUNK_SIZE; + if !simd_input.is_ascii() { + algorithm.check_block(simd_input); + break; + } + } + + while idx < iter_lim { + let input = SimdInput::::new(input.as_ptr().add(idx as usize)); + algorithm.check_utf8(input); + idx += SIMD_CHUNK_SIZE; + } + + if idx < len { + let mut tmpbuf = TempSimdChunk::new(); + crate::implementation::helpers::memcpy_unaligned_nonoverlapping_inline_opt_lt_64( + input.as_ptr().add(idx), + tmpbuf.0.as_mut_ptr(), + len - idx, + ); + let simd_input = SimdInput::new(tmpbuf.0.as_ptr()); + algorithm.check_utf8(simd_input); + } + algorithm.check_incomplete_pending(); + if algorithm.has_error() { + Err(basic::Utf8Error {}) + } else { + Ok(()) + } + } +} + +#[inline] +pub unsafe fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), basic::Utf8Error> { + Utf8CheckAlgorithm::<16, 4>::validate_utf8_basic(input) +} diff --git a/portable/src/implementation/portable/mod.rs b/portable/src/implementation/portable/mod.rs index 8a1d49ec..31638b4f 100644 --- a/portable/src/implementation/portable/mod.rs +++ b/portable/src/implementation/portable/mod.rs @@ -1,3 +1,4 @@ +pub(crate) mod algorithm_new; #[cfg(any(not(feature = "simd256"), feature = "public_imp"))] pub(crate) mod simd128; #[cfg(any(feature = "simd256", feature = "public_imp"))] diff --git a/portable/src/lib.rs b/portable/src/lib.rs index 997615f2..dc585a20 100644 --- a/portable/src/lib.rs +++ b/portable/src/lib.rs @@ -26,7 +26,7 @@ //! Use [`basic::from_utf8()`] as a drop-in replacement for `std::str::from_utf8()`. //! //! ```rust -//! use simdutf8::basic::from_utf8; +//! use simdutf8_portable::basic::from_utf8; //! //! println!("{}", from_utf8(b"I \xE2\x9D\xA4\xEF\xB8\x8F UTF-8!").unwrap()); //! ``` @@ -35,7 +35,7 @@ //! instead. //! //! ```rust -//! use simdutf8::compat::from_utf8; +//! use simdutf8_portable::compat::from_utf8; //! //! let err = from_utf8(b"I \xE2\x9D\xA4\xEF\xB8 UTF-8!").unwrap_err(); //! assert_eq!(err.valid_up_to(), 5); From 865f675535adebdd3ec5173154b0f8276d878917 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Tue, 22 Oct 2024 11:56:33 +0200 Subject: [PATCH 13/83] fix --- portable/src/implementation/portable/algorithm_new.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/portable/src/implementation/portable/algorithm_new.rs b/portable/src/implementation/portable/algorithm_new.rs index 8ec89c8b..85a83ca4 100644 --- a/portable/src/implementation/portable/algorithm_new.rs +++ b/portable/src/implementation/portable/algorithm_new.rs @@ -25,7 +25,7 @@ pub(crate) struct TempSimdChunk(pub(crate) [u8; SIMD_CHUNK_SIZE]); #[allow(dead_code)] // only used if there is a SIMD implementation impl TempSimdChunk { #[expect(clippy::inline_always)] - #[inline(always)] // needs to be forced because otherwise it is not inlined on armv7 neo + #[inline(always)] // FIXME needs to be forced because otherwise it is not inlined on armv7 neo pub(crate) const fn new() -> Self { Self([0; SIMD_CHUNK_SIZE]) } @@ -62,7 +62,7 @@ impl SimdInputTrait for SimdInput<16, 4> { #[inline] unsafe fn is_ascii(&self) -> bool { - (self.vals[0] | self.vals[1]).is_ascii() + (self.vals[0] | self.vals[1] | self.vals[2] | self.vals[3]).is_ascii() } } @@ -78,6 +78,7 @@ where trait SimdU8Value where LaneCount: SupportedLaneCount, + Self: Copy, { #[expect(clippy::too_many_arguments)] fn from_32_cut_off_leading( From ef96a12db0414d5da890dcf6a8b763c2f563f342 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Tue, 22 Oct 2024 11:56:37 +0200 Subject: [PATCH 14/83] add tests --- portable/tests/tests.rs | 524 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 524 insertions(+) create mode 100644 portable/tests/tests.rs diff --git a/portable/tests/tests.rs b/portable/tests/tests.rs new file mode 100644 index 00000000..dc8ff222 --- /dev/null +++ b/portable/tests/tests.rs @@ -0,0 +1,524 @@ +#![allow(clippy::non_ascii_literal)] + +use simdutf8_portable::basic::from_utf8 as basic_from_utf8; +use simdutf8_portable::basic::from_utf8_mut as basic_from_utf8_mut; +use simdutf8_portable::compat::from_utf8 as compat_from_utf8; +use simdutf8_portable::compat::from_utf8_mut as compat_from_utf8_mut; + +#[cfg(not(feature = "std"))] +extern crate std; + +#[cfg(not(feature = "std"))] +use std::{borrow::ToOwned, format}; + +pub trait BStrExt { + fn repeat_x(&self, count: usize) -> Vec; +} + +/// b"a".repeat() is not implemented for Rust 1.38.0 (MSRV) +impl BStrExt for T +where + T: AsRef<[u8]>, +{ + #[expect(clippy::unwrap_used)] + fn repeat_x(&self, count: usize) -> Vec { + use std::io::Write; + + let x = self.as_ref(); + let mut res = Vec::with_capacity(x.len() * count); + for _ in 0..count { + res.write_all(x).unwrap(); + } + res + } +} + +fn test_valid(input: &[u8]) { + // std lib sanity check + assert!(std::str::from_utf8(input).is_ok()); + + assert!(basic_from_utf8(input).is_ok()); + assert!(compat_from_utf8(input).is_ok()); + + let mut mut_input = input.to_owned(); + assert!(basic_from_utf8_mut(mut_input.as_mut_slice()).is_ok()); + assert!(compat_from_utf8_mut(mut_input.as_mut_slice()).is_ok()); + + #[cfg(feature = "public_imp")] + public_imp::test_valid(input); +} + +fn test_invalid(input: &[u8], valid_up_to: usize, error_len: Option) { + // std lib sanity check + let err = std::str::from_utf8(input).unwrap_err(); + assert_eq!(err.valid_up_to(), valid_up_to); + assert_eq!(err.error_len(), error_len); + + assert!(basic_from_utf8(input).is_err()); + let err = compat_from_utf8(input).unwrap_err(); + assert_eq!(err.valid_up_to(), valid_up_to); + assert_eq!(err.error_len(), error_len); + + #[cfg(feature = "public_imp")] + public_imp::test_invalid(input, valid_up_to, error_len); +} + +#[cfg(feature = "public_imp")] +mod public_imp { + + #[allow(unused_variables)] // nothing to do if not SIMD implementation is available + pub(super) fn test_valid(input: &[u8]) { + if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { + #[cfg(target_feature = "avx2")] + unsafe { + assert!(simdutf8::basic::imp::x86::avx2::validate_utf8(input).is_ok()); + assert!(simdutf8::compat::imp::x86::avx2::validate_utf8(input).is_ok()); + + test_streaming::(input, true); + test_chunked_streaming::( + input, true, + ); + } + + #[cfg(target_feature = "sse4.2")] + unsafe { + assert!(simdutf8::basic::imp::x86::sse42::validate_utf8(input).is_ok()); + assert!(simdutf8::compat::imp::x86::sse42::validate_utf8(input).is_ok()); + + test_streaming::(input, true); + test_chunked_streaming::( + input, true, + ); + } + } + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + unsafe { + assert!(simdutf8::basic::imp::aarch64::neon::validate_utf8(input).is_ok()); + assert!(simdutf8::compat::imp::aarch64::neon::validate_utf8(input).is_ok()); + + test_streaming::(input, true); + test_chunked_streaming::( + input, true, + ); + } + #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] + unsafe { + assert!(simdutf8::basic::imp::wasm32::simd128::validate_utf8(input).is_ok()); + assert!(simdutf8::compat::imp::wasm32::simd128::validate_utf8(input).is_ok()); + + test_streaming::(input, true); + test_chunked_streaming::( + input, true, + ); + } + #[cfg(feature = "portable_public_imp")] + unsafe { + assert!(simdutf8::basic::imp::portable::simd128::validate_utf8(input).is_ok()); + assert!(simdutf8::compat::imp::portable::simd128::validate_utf8(input).is_ok()); + + test_streaming::( + input, true, + ); + test_chunked_streaming::< + simdutf8::basic::imp::portable::simd128::ChunkedUtf8ValidatorImp, + >(input, true); + + assert!(simdutf8::basic::imp::portable::simd256::validate_utf8(input).is_ok()); + assert!(simdutf8::compat::imp::portable::simd256::validate_utf8(input).is_ok()); + + test_streaming::( + input, true, + ); + test_chunked_streaming::< + simdutf8::basic::imp::portable::simd256::ChunkedUtf8ValidatorImp, + >(input, true); + } + } + + #[allow(unused_variables)] // nothing to do if not SIMD implementation is available + pub(super) fn test_invalid(input: &[u8], valid_up_to: usize, error_len: Option) { + if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { + #[cfg(target_feature = "avx2")] + unsafe { + assert!(simdutf8::basic::imp::x86::avx2::validate_utf8(input).is_err()); + let err = simdutf8::compat::imp::x86::avx2::validate_utf8(input).unwrap_err(); + assert_eq!(err.valid_up_to(), valid_up_to); + assert_eq!(err.error_len(), error_len); + + test_streaming::(input, false); + test_chunked_streaming::( + input, false, + ); + } + #[cfg(target_feature = "sse4.2")] + unsafe { + assert!(simdutf8::basic::imp::x86::sse42::validate_utf8(input).is_err()); + let err = simdutf8::compat::imp::x86::sse42::validate_utf8(input).unwrap_err(); + assert_eq!(err.valid_up_to(), valid_up_to); + assert_eq!(err.error_len(), error_len); + + test_streaming::(input, false); + test_chunked_streaming::( + input, false, + ); + } + } + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + unsafe { + assert!(simdutf8::basic::imp::aarch64::neon::validate_utf8(input).is_err()); + let err = simdutf8::compat::imp::aarch64::neon::validate_utf8(input).unwrap_err(); + assert_eq!(err.valid_up_to(), valid_up_to); + assert_eq!(err.error_len(), error_len); + + test_streaming::(input, false); + test_chunked_streaming::( + input, false, + ); + } + #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] + unsafe { + assert!(simdutf8::basic::imp::wasm32::simd128::validate_utf8(input).is_err()); + let err = simdutf8::compat::imp::wasm32::simd128::validate_utf8(input).unwrap_err(); + assert_eq!(err.valid_up_to(), valid_up_to); + assert_eq!(err.error_len(), error_len); + + test_streaming::(input, false); + test_chunked_streaming::( + input, false, + ); + } + #[cfg(feature = "portable_public_imp")] + unsafe { + assert!(simdutf8::basic::imp::portable::simd128::validate_utf8(input).is_err()); + let err = simdutf8::compat::imp::portable::simd128::validate_utf8(input).unwrap_err(); + assert_eq!(err.valid_up_to(), valid_up_to); + assert_eq!(err.error_len(), error_len); + + test_streaming::( + input, false, + ); + test_chunked_streaming::< + simdutf8::basic::imp::portable::simd128::ChunkedUtf8ValidatorImp, + >(input, false); + + assert!(simdutf8::basic::imp::portable::simd256::validate_utf8(input).is_err()); + let err = simdutf8::compat::imp::portable::simd256::validate_utf8(input).unwrap_err(); + assert_eq!(err.valid_up_to(), valid_up_to); + assert_eq!(err.error_len(), error_len); + + test_streaming::( + input, false, + ); + test_chunked_streaming::< + simdutf8::basic::imp::portable::simd256::ChunkedUtf8ValidatorImp, + >(input, false); + } + } + + #[allow(unused)] // not used if not SIMD implementation is available + fn test_streaming(input: &[u8], ok: bool) { + unsafe { + let mut validator = T::new(); + validator.update(input); + assert_eq!(validator.finalize().is_ok(), ok); + } + for i in [64, 128, 256, 1024, 65536, 1, 2, 3, 36, 99].iter() { + test_streaming_blocks::(input, *i, ok) + } + } + + #[allow(unused)] // not used if not SIMD implementation is available + fn test_streaming_blocks( + input: &[u8], + block_size: usize, + ok: bool, + ) { + unsafe { + let mut validator = T::new(); + for chunk in input.chunks(block_size) { + validator.update(chunk); + } + assert_eq!(validator.finalize().is_ok(), ok); + } + } + + #[allow(unused)] // not used if not SIMD implementation is available + fn test_chunked_streaming( + input: &[u8], + ok: bool, + ) { + for i in [64, 128, 256, 1024, 65536].iter() { + test_chunked_streaming_with_chunk_size::(input, *i, ok) + } + } + + #[allow(unused)] // not used if not SIMD implementation is available + fn test_chunked_streaming_with_chunk_size( + input: &[u8], + chunk_size: usize, + ok: bool, + ) { + unsafe { + let mut validator = T::new(); + let mut chunks = input.chunks_exact(chunk_size); + for chunk in &mut chunks { + validator.update_from_chunks(chunk); + } + assert_eq!(validator.finalize(Some(chunks.remainder())).is_ok(), ok); + } + } + + #[test] + #[should_panic] + #[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "avx2" + ))] + fn test_avx2_chunked_panic() { + test_chunked_streaming_with_chunk_size::< + simdutf8::basic::imp::x86::avx2::ChunkedUtf8ValidatorImp, + >(b"abcd", 1, true); + } + + #[test] + #[should_panic] + #[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "sse4.2" + ))] + fn test_sse42_chunked_panic() { + test_chunked_streaming_with_chunk_size::< + simdutf8::basic::imp::x86::sse42::ChunkedUtf8ValidatorImp, + >(b"abcd", 1, true); + } + + #[test] + #[should_panic] + #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + fn test_neon_chunked_panic() { + test_chunked_streaming_with_chunk_size::< + simdutf8::basic::imp::aarch64::neon::ChunkedUtf8ValidatorImp, + >(b"abcd", 1, true); + } + + // the test runner will ignore this test probably due to limitations of panic handling/threading + // of that target--keeping this here so that when it can be tested properly, it will + // FIXME: remove this comment once this works properly. + #[test] + #[should_panic] + #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] + fn test_simd128_chunked_panic() { + test_chunked_streaming_with_chunk_size::< + simdutf8::basic::imp::wasm32::simd128::ChunkedUtf8ValidatorImp, + >(b"abcd", 1, true); + } +} + +fn test_invalid_after_specific_prefix( + input: &[u8], + valid_up_to: usize, + error_len: Option, + with_suffix_error_len: Option, + repeat: usize, + prefix_bytes: &[u8], +) { + { + let mut prefixed_input = prefix_bytes.repeat_x(repeat); + let prefix_len = prefixed_input.len(); + prefixed_input.extend_from_slice(input); + test_invalid(prefixed_input.as_ref(), valid_up_to + prefix_len, error_len) + } + + if repeat != 0 { + let mut prefixed_input = prefix_bytes.repeat_x(repeat); + let prefix_len = prefixed_input.len(); + prefixed_input.extend_from_slice(input); + prefixed_input.extend_from_slice(prefix_bytes.repeat_x(repeat).as_slice()); + test_invalid( + prefixed_input.as_ref(), + valid_up_to + prefix_len, + with_suffix_error_len, + ) + } +} + +fn test_invalid_after_prefix( + input: &[u8], + valid_up_to: usize, + error_len: Option, + with_suffix_error_len: Option, + repeat: usize, +) { + for prefix in [ + "a", + "ö", + "😊", + "a".repeat(64).as_str(), + ("a".repeat(64) + "ö".repeat(32).as_str()).as_str(), + ] + .iter() + { + test_invalid_after_specific_prefix( + input, + valid_up_to, + error_len, + with_suffix_error_len, + repeat, + prefix.as_bytes(), + ); + } +} + +fn test_invalid_after_prefixes( + input: &[u8], + valid_up_to: usize, + error_len: Option, + with_suffix_error_len: Option, +) { + for repeat in [ + 0, 1, 2, 7, 8, 9, 15, 16, 16, 31, 32, 33, 63, 64, 65, 127, 128, 129, + ] + .iter() + { + test_invalid_after_prefix( + input, + valid_up_to, + error_len, + with_suffix_error_len, + *repeat, + ); + } +} + +#[test] +fn simple_valid() { + test_valid(b""); + + test_valid(b"\0"); + + test_valid(b"a".repeat_x(64).as_ref()); + + test_valid(b"a".repeat_x(128).as_ref()); + + test_valid(b"The quick brown fox jumps over the lazy dog"); + + // umlauts + test_valid("öäüÖÄÜß".as_bytes()); + + // emojis + test_valid("❤️✨🥺🔥😂😊✔️👍🥰".as_bytes()); + + // Chinese + test_valid("断用山昨屈内銀代意検瓶調像。情旗最投任留財夜隆年表高学送意功者。辺図掲記込真通第民国聞平。海帰傷芸記築世防橋整済歳権君注。選紙例並情夕破勢景移情誇進場豊読。景関有権米武野範随惑旬特覧刊野。相毎加共情面教地作減関絡。暖料児違歩致本感閉浦出楽赤何。時選権週邑針格事提一案質名投百定。止感右聞食三年外積文載者別。".as_bytes()); + + // Japanese + test_valid("意ざど禁23費サヒ車園オスミト規更ワエ異67事続トソキ音合岡治こ訪京ぴ日9稿がト明安イ抗的ウクロコ売一エコヨホ必噴塗ッ。索墓ー足議需レ応予ニ質県トぴン学市機だほせフ車捕コニ自校がこで極3力イい増娘汁表製ク。委セヤホネ作誌ミマクソ続新ほし月中報制どてびフ字78完りっせが村惹ヨサコ訳器りそ参受草ムタ大移ッけでつ番足ほこン質北ぽのよう応一ア輝労イ手人う再茨夕へしう。".as_bytes()); + + // Korean + test_valid("3인은 대법원장이 지명하는 자를 임명한다, 대통령은 제3항과 제4항의 사유를 지체없이 공포하여야 한다, 제한하는 경우에도 자유와 권리의 본질적인 내용을 침해할 수 없다, 국가는 전통문화의 계승·발전과 민족문화의 창달에 노력하여야 한다.".as_bytes()); +} + +#[test] +fn simple_invalid() { + test_invalid_after_prefixes(b"\xFF", 0, Some(1), Some(1)); + + // incomplete umlaut + test_invalid_after_prefixes(b"\xC3", 0, None, Some(1)); + + // incomplete emoji + test_invalid_after_prefixes(b"\xF0", 0, None, Some(1)); + test_invalid_after_prefixes(b"\xF0\x9F", 0, None, Some(2)); + test_invalid_after_prefixes(b"\xF0\x9F\x98", 0, None, Some(3)); +} + +#[test] +fn incomplete_on_32nd_byte() { + let mut invalid = b"a".repeat_x(31); + invalid.push(0xf0); + test_invalid(&invalid, 31, None) +} + +#[test] +fn incomplete_on_64th_byte() { + let mut invalid = b"a".repeat_x(63); + invalid.push(0xf0); + test_invalid(&invalid, 63, None) +} + +#[test] +fn incomplete_on_64th_byte_65_bytes_total() { + let mut invalid = b"a".repeat_x(63); + invalid.push(0xf0); + invalid.push(b'a'); + test_invalid(&invalid, 63, Some(1)) +} + +#[test] +fn error_display_basic() { + assert_eq!( + format!("{}", basic_from_utf8(b"\xF0").unwrap_err()), + "invalid utf-8 sequence" + ); + assert_eq!( + format!("{}", basic_from_utf8(b"a\xF0a").unwrap_err()), + "invalid utf-8 sequence" + ); +} + +#[test] +fn error_display_compat() { + assert_eq!( + format!("{}", compat_from_utf8(b"\xF0").unwrap_err()), + "incomplete utf-8 byte sequence from index 0" + ); + assert_eq!( + format!("{}", compat_from_utf8(b"a\xF0a").unwrap_err()), + "invalid utf-8 sequence of 1 bytes from index 1" + ); + assert_eq!( + format!("{}", compat_from_utf8(b"a\xF0\x9Fa").unwrap_err()), + "invalid utf-8 sequence of 2 bytes from index 1" + ); + assert_eq!( + format!("{}", compat_from_utf8(b"a\xF0\x9F\x98a").unwrap_err()), + "invalid utf-8 sequence of 3 bytes from index 1" + ); +} + +#[test] +fn error_debug_basic() { + assert_eq!( + format!("{:?}", basic_from_utf8(b"\xF0").unwrap_err()), + "Utf8Error" + ); +} + +#[test] +fn error_debug_compat() { + assert_eq!( + format!("{:?}", compat_from_utf8(b"\xF0").unwrap_err()), + "Utf8Error { valid_up_to: 0, error_len: None }" + ); + assert_eq!( + format!("{:?}", compat_from_utf8(b"a\xF0a").unwrap_err()), + "Utf8Error { valid_up_to: 1, error_len: Some(1) }" + ); +} + +#[test] +#[expect(clippy::clone_on_copy)] // used for coverage +fn error_derives_basic() { + let err = basic_from_utf8(b"\xF0").unwrap_err(); + let err2 = err.clone(); + assert_eq!(err, err2); + assert!(!(err != err2)); +} + +#[test] +#[expect(clippy::clone_on_copy)] // used for coverage +fn error_derives_compat() { + let err = compat_from_utf8(b"\xF0").unwrap_err(); + let err2 = err.clone(); + assert_eq!(err, err2); + assert!(!(err != err2)); +} From 47abea7dd81b6839107551d67379ef9cb8f9ae1c Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Tue, 22 Oct 2024 11:58:01 +0200 Subject: [PATCH 15/83] clippy --- portable/src/implementation/portable/algorithm_new.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portable/src/implementation/portable/algorithm_new.rs b/portable/src/implementation/portable/algorithm_new.rs index 85a83ca4..7feef38c 100644 --- a/portable/src/implementation/portable/algorithm_new.rs +++ b/portable/src/implementation/portable/algorithm_new.rs @@ -512,7 +512,7 @@ where pub unsafe fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), basic::Utf8Error> { use crate::implementation::helpers::SIMD_CHUNK_SIZE; let len = input.len(); - let mut algorithm = Utf8CheckAlgorithm::::new(); + let mut algorithm = Self::new(); let mut idx: usize = 0; let iter_lim = len - (len % SIMD_CHUNK_SIZE); From 365d671b4674808d5cea34845d980905fc4231df Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Tue, 22 Oct 2024 11:59:52 +0200 Subject: [PATCH 16/83] clippy --- portable/src/implementation/portable/algorithm_new.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/portable/src/implementation/portable/algorithm_new.rs b/portable/src/implementation/portable/algorithm_new.rs index 7feef38c..4f9f99d1 100644 --- a/portable/src/implementation/portable/algorithm_new.rs +++ b/portable/src/implementation/portable/algorithm_new.rs @@ -507,7 +507,6 @@ where /// This function is inherently unsafe because it is compiled with SIMD extensions /// enabled. Make sure that the CPU supports it before calling. /// - #[inline] pub unsafe fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), basic::Utf8Error> { use crate::implementation::helpers::SIMD_CHUNK_SIZE; @@ -517,7 +516,7 @@ where let iter_lim = len - (len % SIMD_CHUNK_SIZE); while idx < iter_lim { - let simd_input = SimdInput::::new(input.as_ptr().add(idx as usize)); + let simd_input = SimdInput::::new(input.as_ptr().add(idx)); idx += SIMD_CHUNK_SIZE; if !simd_input.is_ascii() { algorithm.check_block(simd_input); @@ -526,7 +525,7 @@ where } while idx < iter_lim { - let input = SimdInput::::new(input.as_ptr().add(idx as usize)); + let input = SimdInput::::new(input.as_ptr().add(idx)); algorithm.check_utf8(input); idx += SIMD_CHUNK_SIZE; } From 0f6e5f6da1fe6be460ec8c0d19391ab7dbdad7e1 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Tue, 22 Oct 2024 12:06:42 +0200 Subject: [PATCH 17/83] clippy --- portable/src/implementation/portable/algorithm_new.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/portable/src/implementation/portable/algorithm_new.rs b/portable/src/implementation/portable/algorithm_new.rs index 4f9f99d1..3296984e 100644 --- a/portable/src/implementation/portable/algorithm_new.rs +++ b/portable/src/implementation/portable/algorithm_new.rs @@ -471,7 +471,7 @@ where } #[inline] - unsafe fn check_utf8(&mut self, input: SimdInput) { + unsafe fn check_utf8(&mut self, input: &SimdInput) { if input.is_ascii() { self.check_incomplete_pending(); } else { @@ -480,7 +480,7 @@ where } #[inline] - unsafe fn check_block(&mut self, input: SimdInput) { + unsafe fn check_block(&mut self, input: &SimdInput) { // WORKAROUND // necessary because the for loop is not unrolled on ARM64 if input.vals.len() == 2 { @@ -519,14 +519,14 @@ where let simd_input = SimdInput::::new(input.as_ptr().add(idx)); idx += SIMD_CHUNK_SIZE; if !simd_input.is_ascii() { - algorithm.check_block(simd_input); + algorithm.check_block(&simd_input); break; } } while idx < iter_lim { let input = SimdInput::::new(input.as_ptr().add(idx)); - algorithm.check_utf8(input); + algorithm.check_utf8(&input); idx += SIMD_CHUNK_SIZE; } @@ -538,7 +538,7 @@ where len - idx, ); let simd_input = SimdInput::new(tmpbuf.0.as_ptr()); - algorithm.check_utf8(simd_input); + algorithm.check_utf8(&simd_input); } algorithm.check_incomplete_pending(); if algorithm.has_error() { From 59dfaae07c26d4e11026d7f6f328c7de3d10fe63 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Tue, 22 Oct 2024 12:14:41 +0200 Subject: [PATCH 18/83] move unsafe around --- .../implementation/portable/algorithm_new.rs | 82 ++++++++++--------- 1 file changed, 43 insertions(+), 39 deletions(-) diff --git a/portable/src/implementation/portable/algorithm_new.rs b/portable/src/implementation/portable/algorithm_new.rs index 3296984e..a2b18961 100644 --- a/portable/src/implementation/portable/algorithm_new.rs +++ b/portable/src/implementation/portable/algorithm_new.rs @@ -40,28 +40,30 @@ where } trait SimdInputTrait { - unsafe fn new(ptr: *const u8) -> Self; + fn new(ptr: *const u8) -> Self; - unsafe fn is_ascii(&self) -> bool; + fn is_ascii(&self) -> bool; } impl SimdInputTrait for SimdInput<16, 4> { #[inline] - unsafe fn new(ptr: *const u8) -> Self { + fn new(ptr: *const u8) -> Self { #[expect(clippy::cast_ptr_alignment)] let ptr = ptr.cast::(); - Self { - vals: [ - ptr.read_unaligned(), - ptr.add(1).read_unaligned(), - ptr.add(2).read_unaligned(), - ptr.add(3).read_unaligned(), - ], + unsafe { + Self { + vals: [ + ptr.read_unaligned(), + ptr.add(1).read_unaligned(), + ptr.add(2).read_unaligned(), + ptr.add(3).read_unaligned(), + ], + } } } #[inline] - unsafe fn is_ascii(&self) -> bool { + fn is_ascii(&self) -> bool { (self.vals[0] | self.vals[1] | self.vals[2] | self.vals[3]).is_ascii() } } @@ -317,7 +319,7 @@ where } #[inline] - unsafe fn is_incomplete(input: Simd) -> Simd { + fn is_incomplete(input: Simd) -> Simd { input.saturating_sub(SimdU8Value::::from_32_cut_off_leading( 0xff, 0xff, @@ -355,7 +357,7 @@ where } #[inline] - unsafe fn check_special_cases(input: Simd, prev1: Simd) -> Simd { + fn check_special_cases(input: Simd, prev1: Simd) -> Simd { const TOO_SHORT: u8 = 1 << 0; const TOO_LONG: u8 = 1 << 1; const OVERLONG_3: u8 = 1 << 2; @@ -440,7 +442,7 @@ where } #[inline] - unsafe fn check_multibyte_lengths( + fn check_multibyte_lengths( input: Simd, prev: Simd, special_cases: Simd, @@ -453,7 +455,7 @@ where } #[inline] - unsafe fn has_error(&self) -> bool { + fn has_error(&self) -> bool { // FIXME: max workaround if HAS_FAST_REDUCE_MAX { self.error.reduce_max() != 0 @@ -463,7 +465,7 @@ where } #[inline] - unsafe fn check_bytes(&mut self, input: Simd) { + fn check_bytes(&mut self, input: Simd) { let prev1 = input.prev1(self.prev); let sc = Self::check_special_cases(input, prev1); self.error |= Self::check_multibyte_lengths(input, self.prev, sc); @@ -471,7 +473,7 @@ where } #[inline] - unsafe fn check_utf8(&mut self, input: &SimdInput) { + fn check_utf8(&mut self, input: &SimdInput) { if input.is_ascii() { self.check_incomplete_pending(); } else { @@ -480,19 +482,23 @@ where } #[inline] - unsafe fn check_block(&mut self, input: &SimdInput) { + fn check_block(&mut self, input: &SimdInput) { // WORKAROUND // necessary because the for loop is not unrolled on ARM64 if input.vals.len() == 2 { - self.check_bytes(*input.vals.as_ptr()); - self.check_bytes(*input.vals.as_ptr().add(1)); - self.incomplete = Self::is_incomplete(*input.vals.as_ptr().add(1)); + unsafe { + self.check_bytes(*input.vals.as_ptr()); + self.check_bytes(*input.vals.as_ptr().add(1)); + self.incomplete = Self::is_incomplete(*input.vals.as_ptr().add(1)); + } } else if input.vals.len() == 4 { - self.check_bytes(*input.vals.as_ptr()); - self.check_bytes(*input.vals.as_ptr().add(1)); - self.check_bytes(*input.vals.as_ptr().add(2)); - self.check_bytes(*input.vals.as_ptr().add(3)); - self.incomplete = Self::is_incomplete(*input.vals.as_ptr().add(3)); + unsafe { + self.check_bytes(*input.vals.as_ptr()); + self.check_bytes(*input.vals.as_ptr().add(1)); + self.check_bytes(*input.vals.as_ptr().add(2)); + self.check_bytes(*input.vals.as_ptr().add(3)); + self.incomplete = Self::is_incomplete(*input.vals.as_ptr().add(3)); + } } else { panic!("Unsupported number of chunks"); } @@ -503,12 +509,8 @@ where /// # Errors /// Returns the zero-sized [`basic::Utf8Error`] on failure. /// - /// # Safety - /// This function is inherently unsafe because it is compiled with SIMD extensions - /// enabled. Make sure that the CPU supports it before calling. - /// #[inline] - pub unsafe fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), basic::Utf8Error> { + pub fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), basic::Utf8Error> { use crate::implementation::helpers::SIMD_CHUNK_SIZE; let len = input.len(); let mut algorithm = Self::new(); @@ -516,7 +518,7 @@ where let iter_lim = len - (len % SIMD_CHUNK_SIZE); while idx < iter_lim { - let simd_input = SimdInput::::new(input.as_ptr().add(idx)); + let simd_input = unsafe { SimdInput::::new(input.as_ptr().add(idx)) }; idx += SIMD_CHUNK_SIZE; if !simd_input.is_ascii() { algorithm.check_block(&simd_input); @@ -525,18 +527,20 @@ where } while idx < iter_lim { - let input = SimdInput::::new(input.as_ptr().add(idx)); + let input = unsafe { SimdInput::::new(input.as_ptr().add(idx)) }; algorithm.check_utf8(&input); idx += SIMD_CHUNK_SIZE; } if idx < len { let mut tmpbuf = TempSimdChunk::new(); - crate::implementation::helpers::memcpy_unaligned_nonoverlapping_inline_opt_lt_64( - input.as_ptr().add(idx), - tmpbuf.0.as_mut_ptr(), - len - idx, - ); + unsafe { + crate::implementation::helpers::memcpy_unaligned_nonoverlapping_inline_opt_lt_64( + input.as_ptr().add(idx), + tmpbuf.0.as_mut_ptr(), + len - idx, + ); + } let simd_input = SimdInput::new(tmpbuf.0.as_ptr()); algorithm.check_utf8(&simd_input); } @@ -550,6 +554,6 @@ where } #[inline] -pub unsafe fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), basic::Utf8Error> { +pub fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), basic::Utf8Error> { Utf8CheckAlgorithm::<16, 4>::validate_utf8_basic(input) } From a41d672fc2a7b1be9699936c8c648123bf62723d Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Tue, 22 Oct 2024 15:20:06 +0200 Subject: [PATCH 19/83] updated bench lock file --- bench/Cargo.lock | 186 ++++++++++++++++++++++++++--------------------- 1 file changed, 103 insertions(+), 83 deletions(-) diff --git a/bench/Cargo.lock b/bench/Cargo.lock index 34062004..c3d5eee0 100644 --- a/bench/Cargo.lock +++ b/bench/Cargo.lock @@ -46,15 +46,27 @@ dependencies = [ "memchr", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "ansi_term" version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" dependencies = [ - "winapi 0.3.9", + "winapi", ] +[[package]] +name = "anstyle" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" + [[package]] name = "anyhow" version = "1.0.90" @@ -80,7 +92,7 @@ checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ "hermit-abi 0.1.19", "libc", - "winapi 0.3.9", + "winapi", ] [[package]] @@ -129,7 +141,7 @@ dependencies = [ "cexpr", "cfg-if 0.1.10", "clang-sys", - "clap", + "clap 2.34.0", "env_logger 0.7.1", "lazy_static", "lazycell", @@ -254,6 +266,33 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "clang-sys" version = "0.29.3" @@ -280,6 +319,31 @@ dependencies = [ "vec_map", ] +[[package]] +name = "clap" +version = "4.5.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.5.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" + [[package]] name = "cmake" version = "0.1.51" @@ -291,14 +355,13 @@ dependencies = [ [[package]] name = "core_affinity" -version = "0.5.10" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f8a03115cc34fb0d7c321dd154a3914b3ca082ccc5c11d91bf7117dbbe7171f" +checksum = "622892f5635ce1fc38c8f16dfc938553ed64af482edb5e150bf4caedbfcb2304" dependencies = [ - "kernel32-sys", "libc", "num_cpus", - "winapi 0.2.8", + "winapi", ] [[package]] @@ -491,24 +554,24 @@ dependencies = [ [[package]] name = "criterion" -version = "0.3.6" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b01d6de93b2b6c65e17c634a26653a29d107b3c98c607c765bf38d041531cd8f" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" dependencies = [ - "atty", + "anes", "cast", - "clap", + "ciborium", + "clap 4.5.20", "criterion-plot", - "csv", + "is-terminal", "itertools", - "lazy_static", "num-traits", + "once_cell", "oorandom", "plotters", "rayon", "regex", "serde", - "serde_cbor", "serde_derive", "serde_json", "tinytemplate", @@ -517,9 +580,9 @@ dependencies = [ [[package]] name = "criterion-plot" -version = "0.4.5" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2673cc8207403546f45f5fd319a974b1e6983ad1a3ee7e6041650013be041876" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", "itertools", @@ -551,25 +614,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" [[package]] -name = "csv" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" -dependencies = [ - "csv-core", - "itoa", - "ryu", - "serde", -] - -[[package]] -name = "csv-core" -version = "0.1.11" +name = "crunchy" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" -dependencies = [ - "memchr", -] +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" [[package]] name = "darling" @@ -632,7 +680,7 @@ checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" dependencies = [ "libc", "redox_users", - "winapi 0.3.9", + "winapi", ] [[package]] @@ -722,7 +770,7 @@ checksum = "f639046355ee4f37944e44f60642c6f3a7efa3cf6b78c78a0d989a8ce6c396a1" dependencies = [ "errno-dragonfly", "libc", - "winapi 0.3.9", + "winapi", ] [[package]] @@ -834,9 +882,13 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "half" -version = "1.8.3" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" +checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +dependencies = [ + "cfg-if 1.0.0", + "crunchy", +] [[package]] name = "hashbrown" @@ -966,7 +1018,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "278e90d6f8a6c76a8334b336e306efa3c5f2b604048cbfd486d6f49878e3af14" dependencies = [ "rustc_version", - "winapi 0.3.9", + "winapi", ] [[package]] @@ -1013,16 +1065,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "kernel32-sys" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" -dependencies = [ - "winapi 0.2.8", - "winapi-build", -] - [[package]] name = "lazy_static" version = "1.5.0" @@ -1054,7 +1096,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2b111a074963af1d37a139918ac6d49ad1d0d5e47f72fd55388619691a7d753" dependencies = [ "cc", - "winapi 0.3.9", + "winapi", ] [[package]] @@ -1064,7 +1106,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" dependencies = [ "cfg-if 1.0.0", - "winapi 0.3.9", + "winapi", ] [[package]] @@ -1577,7 +1619,7 @@ dependencies = [ "bitflags 1.3.2", "libc", "mach", - "winapi 0.3.9", + "winapi", ] [[package]] @@ -1663,7 +1705,7 @@ dependencies = [ "libc", "linux-raw-sys 0.0.36", "rustc_version", - "winapi 0.3.9", + "winapi", ] [[package]] @@ -1754,16 +1796,6 @@ dependencies = [ "serde", ] -[[package]] -name = "serde_cbor" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" -dependencies = [ - "half", - "serde", -] - [[package]] name = "serde_derive" version = "1.0.210" @@ -2166,7 +2198,7 @@ dependencies = [ "wasmer-engine-universal", "wasmer-types", "wasmer-vm", - "winapi 0.3.9", + "winapi", ] [[package]] @@ -2323,7 +2355,7 @@ dependencies = [ "wasmer-engine-universal-artifact", "wasmer-types", "wasmer-vm", - "winapi 0.3.9", + "winapi", ] [[package]] @@ -2395,7 +2427,7 @@ dependencies = [ "thiserror", "wasmer-artifact", "wasmer-types", - "winapi 0.3.9", + "winapi", ] [[package]] @@ -2452,7 +2484,7 @@ dependencies = [ "wasmtime-jit", "wasmtime-runtime", "wat", - "winapi 0.3.9", + "winapi", ] [[package]] @@ -2471,7 +2503,7 @@ dependencies = [ "serde", "sha2", "toml", - "winapi 0.3.9", + "winapi", "zstd", ] @@ -2525,7 +2557,7 @@ checksum = "cee9bf33ebebf88a353be8961ed87cf2780091e0c166c3ab3a23a3d8304f964a" dependencies = [ "cc", "rustix 0.26.2", - "winapi 0.3.9", + "winapi", ] [[package]] @@ -2547,7 +2579,7 @@ dependencies = [ "thiserror", "wasmtime-environ", "wasmtime-runtime", - "winapi 0.3.9", + "winapi", ] [[package]] @@ -2573,7 +2605,7 @@ dependencies = [ "thiserror", "wasmtime-environ", "wasmtime-fiber", - "winapi 0.3.9", + "winapi", ] [[package]] @@ -2641,12 +2673,6 @@ dependencies = [ "rustix 0.38.37", ] -[[package]] -name = "winapi" -version = "0.2.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" - [[package]] name = "winapi" version = "0.3.9" @@ -2657,12 +2683,6 @@ dependencies = [ "winapi-x86_64-pc-windows-gnu", ] -[[package]] -name = "winapi-build" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" - [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" From 3e4b66f9b81f01bada45fc17f047213a9685b0bd Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Tue, 22 Oct 2024 15:22:22 +0200 Subject: [PATCH 20/83] don't use custom memcpy --- portable/src/implementation/helpers.rs | 41 ++++---------------------- 1 file changed, 5 insertions(+), 36 deletions(-) diff --git a/portable/src/implementation/helpers.rs b/portable/src/implementation/helpers.rs index 8d50e08d..34b886af 100644 --- a/portable/src/implementation/helpers.rs +++ b/portable/src/implementation/helpers.rs @@ -38,44 +38,13 @@ pub(crate) fn get_compat_error(input: &[u8], failing_block_pos: usize) -> Utf8Er } #[allow(dead_code)] // only used if there is a SIMD implementation -#[inline(always)] // needs to be forced because otherwise it is not inlined on armv7 neon +#[inline] pub(crate) unsafe fn memcpy_unaligned_nonoverlapping_inline_opt_lt_64( - mut src: *const u8, - mut dest: *mut u8, - mut len: usize, + src: *const u8, + dest: *mut u8, + len: usize, ) { - // This gets properly auto-vectorized on AVX 2 and SSE 4.2. - // Needs to be forced because otherwise it is not inlined on armv7 neon. - #[inline(always)] - #[expect(clippy::inline_always)] - unsafe fn memcpy_u64(src: &mut *const u8, dest: &mut *mut u8) { - dest.cast::() - .write_unaligned(src.cast::().read_unaligned()); - *src = src.offset(8); - *dest = dest.offset(8); - } - if len >= 32 { - memcpy_u64(&mut src, &mut dest); - memcpy_u64(&mut src, &mut dest); - memcpy_u64(&mut src, &mut dest); - memcpy_u64(&mut src, &mut dest); - len -= 32; - } - if len >= 16 { - memcpy_u64(&mut src, &mut dest); - memcpy_u64(&mut src, &mut dest); - len -= 16; - } - if len >= 8 { - memcpy_u64(&mut src, &mut dest); - len -= 8; - } - while len > 0 { - *dest = *src; - src = src.offset(1); - dest = dest.offset(1); - len -= 1; - } + src.copy_to_nonoverlapping(dest, len); } pub(crate) const SIMD_CHUNK_SIZE: usize = 64; From c5117f91e5eba326632bf5851ee83947f2773b3c Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Tue, 22 Oct 2024 15:22:48 +0200 Subject: [PATCH 21/83] upd --- portable/baseline/basic.s | 265 +++++++++++++++++--------------------- 1 file changed, 121 insertions(+), 144 deletions(-) diff --git a/portable/baseline/basic.s b/portable/baseline/basic.s index 77aaec02..aa2899a1 100644 --- a/portable/baseline/basic.s +++ b/portable/baseline/basic.s @@ -4,49 +4,52 @@ simdutf8_portable::implementation::validate_utf8_basic_simd: Lfunc_begin4: .cfi_startproc - sub sp, sp, #128 - .cfi_def_cfa_offset 128 - stp x29, x30, [sp, #112] - add x29, sp, #112 + stp x29, x30, [sp, #-16]! + .cfi_def_cfa_offset 16 + mov x29, sp .cfi_def_cfa w29, 16 .cfi_offset w30, -8 .cfi_offset w29, -16 .cfi_remember_state - mov x10, #0 - and x8, x1, #0xffffffffffffffc0 - mov x9, x8 - cmp x10, x8 + sub x9, sp, #144 + and sp, x9, #0xffffffffffffffe0 + mov x8, x0 + mov x11, #0 + and x9, x1, #0xffffffffffffffc0 + mov x10, x9 +LBB4_1: + cmp x11, x9 b.hs LBB4_8 - add x11, x0, x10 - ldp q18, q17, [x11] - ldp q16, q7, [x11, #32] - add x10, x10, #64 + add x12, x8, x11 + ldp q18, q17, [x12] + ldp q16, q7, [x12, #32] + add x11, x11, #64 orr.16b v0, v17, v18 orr.16b v1, v16, v7 orr.16b v0, v0, v1 umaxv.16b b0, v0 - fmov w11, s0 - tbz w11, #7, LBB4_1 + fmov w12, s0 + tbz w12, #7, LBB4_1 movi.2d v4, #0000000000000000 ext.16b v2, v4, v18, #15 ushr.16b v1, v2, #4 Lloh24: - adrp x9, lCPI4_0@PAGE + adrp x10, lCPI4_0@PAGE Lloh25: - ldr q0, [x9, lCPI4_0@PAGEOFF] + ldr q0, [x10, lCPI4_0@PAGEOFF] tbl.16b v5, { v0 }, v1 movi.16b v1, #15 and.16b v3, v2, v1 Lloh26: - adrp x9, lCPI4_1@PAGE + adrp x10, lCPI4_1@PAGE Lloh27: - ldr q2, [x9, lCPI4_1@PAGEOFF] + ldr q2, [x10, lCPI4_1@PAGEOFF] tbl.16b v6, { v2 }, v3 ushr.16b v19, v18, #4 Lloh28: - adrp x9, lCPI4_2@PAGE + adrp x10, lCPI4_2@PAGE Lloh29: - ldr q3, [x9, lCPI4_2@PAGEOFF] + ldr q3, [x10, lCPI4_2@PAGEOFF] tbl.16b v19, { v3 }, v19 and.16b v5, v6, v5 and.16b v19, v5, v19 @@ -112,14 +115,15 @@ Lloh29: orr.16b v16, v17, v16 orr.16b v23, v18, v16 Lloh30: - adrp x9, lCPI4_3@PAGE + adrp x10, lCPI4_3@PAGE Lloh31: - ldr q16, [x9, lCPI4_3@PAGEOFF] + ldr q16, [x10, lCPI4_3@PAGEOFF] uqsub.16b v19, v7, v16 - cmp x10, x8 - b.hs LBB4_22 - mov x9, x10 + cmp x11, x9 + b.hs LBB4_14 + mov x10, x11 b LBB4_6 +LBB4_5: ext.16b v19, v7, v20, #15 ushr.16b v21, v19, #4 tbl.16b v21, { v0 }, v21 @@ -152,64 +156,64 @@ Lloh31: orr.16b v20, v20, v21 and.16b v20, v20, v6 eor.16b v19, v19, v20 - orr.16b v7, v19, v7 - ext.16b v19, v18, v17, #15 - ushr.16b v20, v19, #4 - tbl.16b v20, { v0 }, v20 - and.16b v19, v19, v1 - tbl.16b v19, { v2 }, v19 - ushr.16b v21, v17, #4 - tbl.16b v21, { v3 }, v21 - and.16b v19, v19, v20 - and.16b v19, v19, v21 - ext.16b v20, v18, v17, #14 + ext.16b v20, v18, v17, #15 + ushr.16b v21, v20, #4 + tbl.16b v21, { v0 }, v21 + and.16b v20, v20, v1 + tbl.16b v20, { v2 }, v20 + ushr.16b v22, v17, #4 + tbl.16b v22, { v3 }, v22 + and.16b v20, v20, v21 + and.16b v20, v20, v22 + ext.16b v21, v18, v17, #14 ext.16b v18, v18, v17, #13 - cmhi.16b v20, v20, v4 + cmhi.16b v21, v21, v4 cmhi.16b v18, v18, v5 - orr.16b v18, v18, v20 + orr.16b v18, v18, v21 and.16b v18, v18, v6 - eor.16b v18, v19, v18 - ext.16b v19, v17, v24, #15 - ushr.16b v20, v19, #4 - tbl.16b v20, { v0 }, v20 - and.16b v19, v19, v1 - tbl.16b v19, { v2 }, v19 - ushr.16b v21, v24, #4 - tbl.16b v21, { v3 }, v21 - and.16b v19, v19, v20 - and.16b v19, v19, v21 - ext.16b v20, v17, v24, #14 + eor.16b v18, v20, v18 + ext.16b v20, v17, v24, #15 + ushr.16b v21, v20, #4 + tbl.16b v21, { v0 }, v21 + and.16b v20, v20, v1 + tbl.16b v20, { v2 }, v20 + ushr.16b v22, v24, #4 + tbl.16b v22, { v3 }, v22 + and.16b v20, v20, v21 + and.16b v20, v20, v22 + ext.16b v21, v17, v24, #14 ext.16b v17, v17, v24, #13 - cmhi.16b v20, v20, v4 + cmhi.16b v21, v21, v4 cmhi.16b v17, v17, v5 - orr.16b v17, v17, v20 + orr.16b v17, v17, v21 and.16b v17, v17, v6 - eor.16b v17, v19, v17 + eor.16b v17, v20, v17 + orr.16b v7, v19, v7 orr.16b v17, v18, v17 orr.16b v19, v7, v17 mov.16b v7, v24 uqsub.16b v17, v24, v16 orr.16b v23, v19, v23 - add x9, x9, #64 + add x10, x10, #64 mov.16b v19, v17 - cmp x9, x8 + cmp x10, x9 b.hs LBB4_9 LBB4_6: - add x10, x0, x9 - ldp q20, q18, [x10] - ldp q17, q24, [x10, #32] + add x11, x8, x10 + ldp q20, q18, [x11] + ldp q17, q24, [x11, #32] orr.16b v21, v18, v20 orr.16b v22, v17, v24 orr.16b v21, v21, v22 umaxv.16b b21, v21 - fmov w10, s21 - tbnz w10, #7, LBB4_5 + fmov w11, s21 + tbnz w11, #7, LBB4_5 mov.16b v17, v19 mov.16b v24, v7 orr.16b v23, v19, v23 - add x9, x9, #64 + add x10, x10, #64 mov.16b v19, v17 - cmp x9, x8 + cmp x10, x9 b.lo LBB4_6 b LBB4_9 LBB4_8: @@ -217,39 +221,29 @@ LBB4_8: movi.2d v17, #0000000000000000 movi.2d v24, #0000000000000000 LBB4_9: - subs x2, x1, x9 - b.ls LBB4_18 + subs x2, x1, x10 + b.ls LBB4_13 LBB4_10: + stp q24, q17, [sp, #16] movi.2d v0, #0000000000000000 - stp q0, q0, [sp, #80] - stp q0, q0, [sp, #48] - add x1, x0, x9 - add x0, sp, #48 - subs x8, x2, #32 - b.hs LBB4_19 - subs x8, x2, #16 - b.hs LBB4_20 -LBB4_12: - subs x8, x2, #8 - b.hs LBB4_21 -LBB4_13: - cbz x2, LBB4_15 - stp q24, q23, [sp, #16] - str q17, [sp] + stp q0, q0, [sp, #96] + str q0, [sp, #80] + stp q23, q0, [sp, #48] + add x0, sp, #64 + add x1, x8, x10 bl _memcpy - ldp q17, q24, [sp] - ldr q23, [sp, #32] -LBB4_15: - ldp q3, q2, [sp, #48] - ldp q1, q0, [sp, #80] + ldr q6, [sp, #32] + ldp q3, q2, [sp, #64] + ldp q1, q0, [sp, #96] orr.16b v4, v2, v3 orr.16b v5, v1, v0 orr.16b v4, v4, v5 umaxv.16b b4, v4 fmov w8, s4 - mov.16b v4, v17 - tbz w8, #7, LBB4_17 - ext.16b v4, v24, v3, #15 + mov.16b v4, v6 + tbz w8, #7, LBB4_12 + ldr q19, [sp, #16] + ext.16b v4, v19, v3, #15 ushr.16b v5, v4, #4 Lloh32: adrp x8, lCPI4_0@PAGE @@ -271,8 +265,8 @@ Lloh37: tbl.16b v17, { v18 }, v17 and.16b v4, v4, v5 and.16b v4, v4, v17 - ext.16b v5, v24, v3, #14 - ext.16b v17, v24, v3, #13 + ext.16b v5, v19, v3, #14 + ext.16b v17, v19, v3, #13 movi.16b v19, #223 cmhi.16b v5, v5, v19 movi.16b v20, #239 @@ -297,88 +291,71 @@ Lloh37: orr.16b v3, v3, v21 and.16b v3, v3, v17 eor.16b v3, v5, v3 - orr.16b v3, v3, v4 - ext.16b v4, v2, v1, #15 - ushr.16b v5, v4, #4 - tbl.16b v5, { v6 }, v5 - and.16b v4, v4, v7 - tbl.16b v4, { v16 }, v4 - ushr.16b v21, v1, #4 - tbl.16b v21, { v18 }, v21 - and.16b v4, v4, v5 - and.16b v4, v4, v21 - ext.16b v5, v2, v1, #14 + ext.16b v5, v2, v1, #15 + ushr.16b v21, v5, #4 + tbl.16b v21, { v6 }, v21 + and.16b v5, v5, v7 + tbl.16b v5, { v16 }, v5 + ushr.16b v22, v1, #4 + tbl.16b v22, { v18 }, v22 + and.16b v5, v5, v21 + and.16b v5, v5, v22 + ext.16b v21, v2, v1, #14 ext.16b v2, v2, v1, #13 - cmhi.16b v5, v5, v19 + cmhi.16b v21, v21, v19 cmhi.16b v2, v2, v20 - orr.16b v2, v2, v5 + orr.16b v2, v2, v21 and.16b v2, v2, v17 - eor.16b v2, v4, v2 - ext.16b v4, v1, v0, #15 - ushr.16b v5, v4, #4 - tbl.16b v5, { v6 }, v5 - and.16b v4, v4, v7 - tbl.16b v4, { v16 }, v4 - ushr.16b v6, v0, #4 - tbl.16b v6, { v18 }, v6 - and.16b v4, v4, v5 - and.16b v4, v4, v6 - ext.16b v5, v1, v0, #14 + eor.16b v2, v5, v2 + ext.16b v5, v1, v0, #15 + ushr.16b v21, v5, #4 + tbl.16b v6, { v6 }, v21 + and.16b v5, v5, v7 + tbl.16b v5, { v16 }, v5 + ushr.16b v7, v0, #4 + tbl.16b v7, { v18 }, v7 + and.16b v5, v5, v6 + and.16b v5, v5, v7 + ext.16b v6, v1, v0, #14 ext.16b v1, v1, v0, #13 - cmhi.16b v5, v5, v19 + cmhi.16b v6, v6, v19 cmhi.16b v1, v1, v20 - orr.16b v1, v1, v5 + orr.16b v1, v1, v6 and.16b v1, v1, v17 - eor.16b v1, v4, v1 + eor.16b v1, v5, v1 + orr.16b v3, v3, v4 orr.16b v1, v2, v1 - orr.16b v17, v3, v1 + orr.16b v6, v3, v1 Lloh38: adrp x8, lCPI4_3@PAGE Lloh39: ldr q1, [x8, lCPI4_3@PAGEOFF] uqsub.16b v4, v0, v1 - orr.16b v23, v17, v23 +LBB4_12: + ldr q23, [sp, #48] + orr.16b v23, v6, v23 mov.16b v17, v4 -LBB4_18: +LBB4_13: orr.16b v0, v17, v23 umaxv.16b b0, v0 fmov w8, s0 tst w8, #0xff cset w0, ne - .cfi_def_cfa wsp, 128 - ldp x29, x30, [sp, #112] - add sp, sp, #128 + mov sp, x29 + .cfi_def_cfa wsp, 16 + ldp x29, x30, [sp], #16 .cfi_def_cfa_offset 0 .cfi_restore w30 .cfi_restore w29 ret -LBB4_19: +LBB4_14: .cfi_restore_state - ldp q0, q1, [x1], #32 - stp q0, q1, [sp, #48] - add x0, x0, #32 - mov x2, x8 - subs x8, x8, #16 - b.lo LBB4_12 -LBB4_20: - ldr q0, [x1], #16 - str q0, [x0], #16 - mov x2, x8 - subs x8, x8, #8 - b.lo LBB4_13 -LBB4_21: - ldr x9, [x1], #8 - str x9, [x0], #8 - mov x2, x8 - cbnz x8, LBB4_14 - b LBB4_15 -LBB4_22: mov.16b v17, v19 mov.16b v24, v7 - mov x9, x10 - subs x2, x1, x10 + mov x10, x11 + subs x2, x1, x11 b.hi LBB4_10 - b LBB4_18 + b LBB4_13 .loh AdrpLdr Lloh30, Lloh31 .loh AdrpAdrp Lloh28, Lloh30 .loh AdrpLdr Lloh28, Lloh29 From b2b3314709d4ad3a31dda0e2fbbd85d76af767d4 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Tue, 22 Oct 2024 15:27:22 +0200 Subject: [PATCH 22/83] simplify --- portable/src/implementation/portable/algorithm_new.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/portable/src/implementation/portable/algorithm_new.rs b/portable/src/implementation/portable/algorithm_new.rs index a2b18961..58e4a56b 100644 --- a/portable/src/implementation/portable/algorithm_new.rs +++ b/portable/src/implementation/portable/algorithm_new.rs @@ -535,11 +535,10 @@ where if idx < len { let mut tmpbuf = TempSimdChunk::new(); unsafe { - crate::implementation::helpers::memcpy_unaligned_nonoverlapping_inline_opt_lt_64( - input.as_ptr().add(idx), - tmpbuf.0.as_mut_ptr(), - len - idx, - ); + input + .as_ptr() + .add(idx) + .copy_to_nonoverlapping(tmpbuf.0.as_mut_ptr(), len - idx); } let simd_input = SimdInput::new(tmpbuf.0.as_ptr()); algorithm.check_utf8(&simd_input); From de5f06b8ff747b34671d35423d04375c1a1e44af Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Tue, 22 Oct 2024 15:35:43 +0200 Subject: [PATCH 23/83] simplify --- .../implementation/portable/algorithm_new.rs | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/portable/src/implementation/portable/algorithm_new.rs b/portable/src/implementation/portable/algorithm_new.rs index 58e4a56b..4f67e10c 100644 --- a/portable/src/implementation/portable/algorithm_new.rs +++ b/portable/src/implementation/portable/algorithm_new.rs @@ -41,7 +41,7 @@ where trait SimdInputTrait { fn new(ptr: *const u8) -> Self; - + fn new_partial(ptr: *const u8, len: usize) -> Self; fn is_ascii(&self) -> bool; } @@ -62,6 +62,15 @@ impl SimdInputTrait for SimdInput<16, 4> { } } + #[inline] + fn new_partial(ptr: *const u8, len: usize) -> Self { + let mut tmpbuf = TempSimdChunk::new(); + unsafe { + ptr.copy_to_nonoverlapping(tmpbuf.0.as_mut_ptr(), len); + } + Self::new(tmpbuf.0.as_ptr()) + } + #[inline] fn is_ascii(&self) -> bool { (self.vals[0] | self.vals[1] | self.vals[2] | self.vals[3]).is_ascii() @@ -533,14 +542,7 @@ where } if idx < len { - let mut tmpbuf = TempSimdChunk::new(); - unsafe { - input - .as_ptr() - .add(idx) - .copy_to_nonoverlapping(tmpbuf.0.as_mut_ptr(), len - idx); - } - let simd_input = SimdInput::new(tmpbuf.0.as_ptr()); + let simd_input = unsafe { SimdInput::new_partial(input.as_ptr().add(idx), len - idx) }; algorithm.check_utf8(&simd_input); } algorithm.check_incomplete_pending(); From 437304706628e1031b396394936cac6e5bda93f3 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Tue, 22 Oct 2024 15:36:13 +0200 Subject: [PATCH 24/83] upd --- portable/baseline/basic.s | 1 - 1 file changed, 1 deletion(-) diff --git a/portable/baseline/basic.s b/portable/baseline/basic.s index aa2899a1..21e1d5d9 100644 --- a/portable/baseline/basic.s +++ b/portable/baseline/basic.s @@ -10,7 +10,6 @@ Lfunc_begin4: .cfi_def_cfa w29, 16 .cfi_offset w30, -8 .cfi_offset w29, -16 - .cfi_remember_state sub x9, sp, #144 and sp, x9, #0xffffffffffffffe0 mov x8, x0 From fbd42070fec74998653454fec42b8b5657dd1860 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Wed, 23 Oct 2024 10:06:50 +0200 Subject: [PATCH 25/83] masked load experimentation --- .../implementation/portable/algorithm_new.rs | 97 ++++++++++++++++++- 1 file changed, 92 insertions(+), 5 deletions(-) diff --git a/portable/src/implementation/portable/algorithm_new.rs b/portable/src/implementation/portable/algorithm_new.rs index 4f67e10c..179fcc12 100644 --- a/portable/src/implementation/portable/algorithm_new.rs +++ b/portable/src/implementation/portable/algorithm_new.rs @@ -1,7 +1,8 @@ +use core::{hint::assert_unchecked, ptr::slice_from_raw_parts, slice}; use std::simd::{ cmp::SimdPartialOrd, num::{SimdInt, SimdUint}, - simd_swizzle, u8x16, LaneCount, Simd, SupportedLaneCount, + simd_swizzle, u8x16, LaneCount, Mask, Simd, SupportedLaneCount, }; use crate::{basic, implementation::helpers::SIMD_CHUNK_SIZE}; @@ -64,11 +65,29 @@ impl SimdInputTrait for SimdInput<16, 4> { #[inline] fn new_partial(ptr: *const u8, len: usize) -> Self { - let mut tmpbuf = TempSimdChunk::new(); - unsafe { - ptr.copy_to_nonoverlapping(tmpbuf.0.as_mut_ptr(), len); + let mut slice = unsafe { slice::from_raw_parts(ptr, len) }; + let val0 = load_masked(slice); + let val1 = if slice.len() > 16 { + slice = &slice[16..]; + load_masked(slice) + } else { + u8x16::default() + }; + let val2 = if slice.len() > 16 { + slice = &slice[16..]; + load_masked(slice) + } else { + u8x16::default() + }; + let val3 = if slice.len() > 16 { + slice = &slice[16..]; + load_masked(slice) + } else { + u8x16::default() + }; + Self { + vals: [val0, val1, val2, val3], } - Self::new(tmpbuf.0.as_ptr()) } #[inline] @@ -77,6 +96,74 @@ impl SimdInputTrait for SimdInput<16, 4> { } } +#[inline(never)] +fn load_masked(slice: &[u8]) -> Simd { + let mut val = u8x16::default(); + if slice.len() > 0 { + val[0] = slice[0]; + if slice.len() > 1 { + val[1] = slice[1]; + if slice.len() > 2 { + val[2] = slice[2]; + if slice.len() > 3 { + val[3] = slice[3]; + if slice.len() > 4 { + val[4] = slice[4]; + if slice.len() > 5 { + val[5] = slice[5]; + if slice.len() > 6 { + val[6] = slice[6]; + if slice.len() > 7 { + val[7] = slice[7]; + if slice.len() > 8 { + val[8] = slice[8]; + if slice.len() > 9 { + val[9] = slice[9]; + if slice.len() > 10 { + val[10] = slice[10]; + if slice.len() > 11 { + val[11] = slice[11]; + if slice.len() > 12 { + val[12] = slice[12]; + if slice.len() > 13 { + val[13] = slice[13]; + if slice.len() > 14 { + val[14] = slice[14]; + if slice.len() > 15 { + val[15] = slice[15]; + } + } + } + } + } + } + } + } + } + } + } + } + } + } + } + } + val + // for i in 0..slice.len().min(16) { + // val[i] = slice[i]; + // } + // val + // + // unsafe { + // u8x16::load_select_unchecked( + // slice, + // Mask::from_bitmask((1u64 << slice.len()) - 1), + // u8x16::default(), + // ) + // } + // + // u8x16::load_or_default(slice) +} + struct Utf8CheckAlgorithm where LaneCount: SupportedLaneCount, From 69dad0f81939b78cb200132ef268634a7b3acc83 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Wed, 23 Oct 2024 12:49:51 +0200 Subject: [PATCH 26/83] more experiments --- .../implementation/portable/algorithm_new.rs | 80 ++++++++++++++----- 1 file changed, 59 insertions(+), 21 deletions(-) diff --git a/portable/src/implementation/portable/algorithm_new.rs b/portable/src/implementation/portable/algorithm_new.rs index 179fcc12..182fdf18 100644 --- a/portable/src/implementation/portable/algorithm_new.rs +++ b/portable/src/implementation/portable/algorithm_new.rs @@ -64,30 +64,56 @@ impl SimdInputTrait for SimdInput<16, 4> { } #[inline] - fn new_partial(ptr: *const u8, len: usize) -> Self { + fn new_partial(ptr: *const u8, mut len: usize) -> Self { + unsafe { + assert_unchecked(len < 64); + } let mut slice = unsafe { slice::from_raw_parts(ptr, len) }; - let val0 = load_masked(slice); - let val1 = if slice.len() > 16 { - slice = &slice[16..]; - load_masked(slice) - } else { - u8x16::default() - }; - let val2 = if slice.len() > 16 { - slice = &slice[16..]; - load_masked(slice) - } else { - u8x16::default() - }; - let val3 = if slice.len() > 16 { - slice = &slice[16..]; - load_masked(slice) - } else { - u8x16::default() - }; + let val0 = load_masked_opt(slice); + slice = &slice[slice.len().min(16)..]; + let val1 = load_masked_opt(slice); + slice = &slice[slice.len().min(16)..]; + let val2 = load_masked_opt(slice); + slice = &slice[slice.len().min(16)..]; + let val3 = load_masked_opt(slice); Self { vals: [val0, val1, val2, val3], } + + // let mut vals = [Simd::::default(); 4]; + // let mut i = 0; + // while len > 16 { + // vals[i] = unsafe { ptr.cast::().read_unaligned() }; + // i += 1; + // len -= 16; + // } + // if len > 0 { + // vals[i] = u8x16::load_or_default(unsafe { slice::from_raw_parts(ptr, len) }); + // } + // Self { vals } + // let mut slice = unsafe { slice::from_raw_parts(ptr, len) }; + // let val0 = load_masked(slice); + // let val1 = if slice.len() > 16 { + // slice = &slice[16..]; + // load_masked(slice) + // } else { + // u8x16::default() + // }; + // let val2 = if slice.len() > 16 { + // slice = &slice[16..]; + // load_masked(slice) + // } else { + // u8x16::default() + // }; + // let val3 = if slice.len() > 16 { + // slice = &slice[16..]; + // load_masked(slice) + // } else { + // u8x16::default() + // }; + // Self { + // vals: [val0, val1, val2, val3], + // } } #[inline] @@ -96,7 +122,17 @@ impl SimdInputTrait for SimdInput<16, 4> { } } -#[inline(never)] +fn load_masked_opt(slice: &[u8]) -> Simd { + if slice.is_empty() { + u8x16::splat(0) + } else if slice.len() > 15 { + unsafe { slice.as_ptr().cast::().read_unaligned() } + } else { + load_masked(slice) + } +} + +#[inline] fn load_masked(slice: &[u8]) -> Simd { let mut val = u8x16::default(); if slice.len() > 0 { @@ -148,6 +184,8 @@ fn load_masked(slice: &[u8]) -> Simd { } } val + // + // let mut val = u8x16::default(); // for i in 0..slice.len().min(16) { // val[i] = slice[i]; // } From 7eb9ab0262468eb08c24cb715ecd16ecded4fd4e Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Thu, 24 Oct 2024 06:07:02 +0200 Subject: [PATCH 27/83] wip --- .../implementation/portable/algorithm_new.rs | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/portable/src/implementation/portable/algorithm_new.rs b/portable/src/implementation/portable/algorithm_new.rs index 182fdf18..3f5b4050 100644 --- a/portable/src/implementation/portable/algorithm_new.rs +++ b/portable/src/implementation/portable/algorithm_new.rs @@ -66,15 +66,31 @@ impl SimdInputTrait for SimdInput<16, 4> { #[inline] fn new_partial(ptr: *const u8, mut len: usize) -> Self { unsafe { + assert_unchecked(len > 0); assert_unchecked(len < 64); } let mut slice = unsafe { slice::from_raw_parts(ptr, len) }; let val0 = load_masked_opt(slice); slice = &slice[slice.len().min(16)..]; + if slice.is_empty() { + return Self { + vals: [val0, u8x16::default(), u8x16::default(), u8x16::default()], + }; + } let val1 = load_masked_opt(slice); slice = &slice[slice.len().min(16)..]; + if slice.is_empty() { + return Self { + vals: [val0, val1, u8x16::default(), u8x16::default()], + }; + } let val2 = load_masked_opt(slice); slice = &slice[slice.len().min(16)..]; + if slice.is_empty() { + return Self { + vals: [val0, val1, val2, u8x16::default()], + }; + } let val3 = load_masked_opt(slice); Self { vals: [val0, val1, val2, val3], @@ -123,9 +139,7 @@ impl SimdInputTrait for SimdInput<16, 4> { } fn load_masked_opt(slice: &[u8]) -> Simd { - if slice.is_empty() { - u8x16::splat(0) - } else if slice.len() > 15 { + if slice.len() > 15 { unsafe { slice.as_ptr().cast::().read_unaligned() } } else { load_masked(slice) From 898d1244b204225e3027b48487ec1bdcf0268ded Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Thu, 24 Oct 2024 06:07:20 +0200 Subject: [PATCH 28/83] new baseline asm --- portable/baseline/basic.s | 41 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/portable/baseline/basic.s b/portable/baseline/basic.s index 21e1d5d9..8ebeaa21 100644 --- a/portable/baseline/basic.s +++ b/portable/baseline/basic.s @@ -119,7 +119,7 @@ Lloh31: ldr q16, [x10, lCPI4_3@PAGEOFF] uqsub.16b v19, v7, v16 cmp x11, x9 - b.hs LBB4_14 + b.hs LBB4_12 mov x10, x11 b LBB4_6 LBB4_5: @@ -221,7 +221,7 @@ LBB4_8: movi.2d v24, #0000000000000000 LBB4_9: subs x2, x1, x10 - b.ls LBB4_13 + b.ls LBB4_14 LBB4_10: stp q24, q17, [sp, #16] movi.2d v0, #0000000000000000 @@ -231,7 +231,6 @@ LBB4_10: add x0, sp, #64 add x1, x8, x10 bl _memcpy - ldr q6, [sp, #32] ldp q3, q2, [sp, #64] ldp q1, q0, [sp, #96] orr.16b v4, v2, v3 @@ -239,8 +238,18 @@ LBB4_10: orr.16b v4, v4, v5 umaxv.16b b4, v4 fmov w8, s4 - mov.16b v4, v6 - tbz w8, #7, LBB4_12 + tbnz w8, #7, LBB4_13 + ldp q17, q23, [sp, #32] + orr.16b v23, v17, v23 + b LBB4_14 +LBB4_12: + mov.16b v17, v19 + mov.16b v24, v7 + mov x10, x11 + subs x2, x1, x11 + b.hi LBB4_10 + b LBB4_14 +LBB4_13: ldr q19, [sp, #16] ext.16b v4, v19, v3, #15 ushr.16b v5, v4, #4 @@ -322,19 +331,17 @@ Lloh37: orr.16b v1, v1, v6 and.16b v1, v1, v17 eor.16b v1, v5, v1 - orr.16b v3, v3, v4 + orr.16b v3, v4, v3 orr.16b v1, v2, v1 - orr.16b v6, v3, v1 + orr.16b v1, v3, v1 + ldr q23, [sp, #48] + orr.16b v23, v1, v23 Lloh38: adrp x8, lCPI4_3@PAGE Lloh39: ldr q1, [x8, lCPI4_3@PAGEOFF] - uqsub.16b v4, v0, v1 -LBB4_12: - ldr q23, [sp, #48] - orr.16b v23, v6, v23 - mov.16b v17, v4 -LBB4_13: + uqsub.16b v17, v0, v1 +LBB4_14: orr.16b v0, v17, v23 umaxv.16b b0, v0 fmov w8, s0 @@ -347,14 +354,6 @@ LBB4_13: .cfi_restore w30 .cfi_restore w29 ret -LBB4_14: - .cfi_restore_state - mov.16b v17, v19 - mov.16b v24, v7 - mov x10, x11 - subs x2, x1, x11 - b.hi LBB4_10 - b LBB4_13 .loh AdrpLdr Lloh30, Lloh31 .loh AdrpAdrp Lloh28, Lloh30 .loh AdrpLdr Lloh28, Lloh29 From c3c24a00468623c7ea7ac78008c0e006c06b1982 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Thu, 24 Oct 2024 10:49:02 +0200 Subject: [PATCH 29/83] only used masked loads if fast (avx512 later) --- .../implementation/portable/algorithm_new.rs | 130 ++++-------------- 1 file changed, 23 insertions(+), 107 deletions(-) diff --git a/portable/src/implementation/portable/algorithm_new.rs b/portable/src/implementation/portable/algorithm_new.rs index 3f5b4050..9fddbb90 100644 --- a/portable/src/implementation/portable/algorithm_new.rs +++ b/portable/src/implementation/portable/algorithm_new.rs @@ -19,6 +19,8 @@ const HAS_FAST_REDUCE_MAX: bool = true; )))] const HAS_FAST_REDUCE_MAX: bool = false; +const HAS_FAST_MASKED_LOAD: bool = false; + #[repr(C, align(32))] #[allow(dead_code)] // only used if a 128-bit SIMD implementation is used pub(crate) struct TempSimdChunk(pub(crate) [u8; SIMD_CHUNK_SIZE]); @@ -42,8 +44,19 @@ where trait SimdInputTrait { fn new(ptr: *const u8) -> Self; - fn new_partial(ptr: *const u8, len: usize) -> Self; fn is_ascii(&self) -> bool; + fn new_partial_masked_load(ptr: *const u8, len: usize) -> Self; + fn new_partial_copy(ptr: *const u8, len: usize) -> Self; + fn new_partial(ptr: *const u8, len: usize) -> Self + where + Self: Sized, + { + if HAS_FAST_MASKED_LOAD { + Self::new_partial_masked_load(ptr, len) + } else { + Self::new_partial_copy(ptr, len) + } + } } impl SimdInputTrait for SimdInput<16, 4> { @@ -64,7 +77,7 @@ impl SimdInputTrait for SimdInput<16, 4> { } #[inline] - fn new_partial(ptr: *const u8, mut len: usize) -> Self { + fn new_partial_masked_load(ptr: *const u8, len: usize) -> Self { unsafe { assert_unchecked(len > 0); assert_unchecked(len < 64); @@ -95,41 +108,14 @@ impl SimdInputTrait for SimdInput<16, 4> { Self { vals: [val0, val1, val2, val3], } + } - // let mut vals = [Simd::::default(); 4]; - // let mut i = 0; - // while len > 16 { - // vals[i] = unsafe { ptr.cast::().read_unaligned() }; - // i += 1; - // len -= 16; - // } - // if len > 0 { - // vals[i] = u8x16::load_or_default(unsafe { slice::from_raw_parts(ptr, len) }); - // } - // Self { vals } - // let mut slice = unsafe { slice::from_raw_parts(ptr, len) }; - // let val0 = load_masked(slice); - // let val1 = if slice.len() > 16 { - // slice = &slice[16..]; - // load_masked(slice) - // } else { - // u8x16::default() - // }; - // let val2 = if slice.len() > 16 { - // slice = &slice[16..]; - // load_masked(slice) - // } else { - // u8x16::default() - // }; - // let val3 = if slice.len() > 16 { - // slice = &slice[16..]; - // load_masked(slice) - // } else { - // u8x16::default() - // }; - // Self { - // vals: [val0, val1, val2, val3], - // } + fn new_partial_copy(ptr: *const u8, len: usize) -> Self { + let mut buf = [0; 64]; + unsafe { + ptr.copy_to_nonoverlapping(buf.as_mut_ptr(), len); + } + Self::new(buf.as_ptr()) } #[inline] @@ -142,78 +128,8 @@ fn load_masked_opt(slice: &[u8]) -> Simd { if slice.len() > 15 { unsafe { slice.as_ptr().cast::().read_unaligned() } } else { - load_masked(slice) - } -} - -#[inline] -fn load_masked(slice: &[u8]) -> Simd { - let mut val = u8x16::default(); - if slice.len() > 0 { - val[0] = slice[0]; - if slice.len() > 1 { - val[1] = slice[1]; - if slice.len() > 2 { - val[2] = slice[2]; - if slice.len() > 3 { - val[3] = slice[3]; - if slice.len() > 4 { - val[4] = slice[4]; - if slice.len() > 5 { - val[5] = slice[5]; - if slice.len() > 6 { - val[6] = slice[6]; - if slice.len() > 7 { - val[7] = slice[7]; - if slice.len() > 8 { - val[8] = slice[8]; - if slice.len() > 9 { - val[9] = slice[9]; - if slice.len() > 10 { - val[10] = slice[10]; - if slice.len() > 11 { - val[11] = slice[11]; - if slice.len() > 12 { - val[12] = slice[12]; - if slice.len() > 13 { - val[13] = slice[13]; - if slice.len() > 14 { - val[14] = slice[14]; - if slice.len() > 15 { - val[15] = slice[15]; - } - } - } - } - } - } - } - } - } - } - } - } - } - } - } + u8x16::load_or_default(slice) } - val - // - // let mut val = u8x16::default(); - // for i in 0..slice.len().min(16) { - // val[i] = slice[i]; - // } - // val - // - // unsafe { - // u8x16::load_select_unchecked( - // slice, - // Mask::from_bitmask((1u64 << slice.len()) - 1), - // u8x16::default(), - // ) - // } - // - // u8x16::load_or_default(slice) } struct Utf8CheckAlgorithm From df997871e61594515538a928e8644f33421a9f4e Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Thu, 24 Oct 2024 10:52:42 +0200 Subject: [PATCH 30/83] inlining --- portable/src/implementation/portable/algorithm_new.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/portable/src/implementation/portable/algorithm_new.rs b/portable/src/implementation/portable/algorithm_new.rs index 9fddbb90..88e476fe 100644 --- a/portable/src/implementation/portable/algorithm_new.rs +++ b/portable/src/implementation/portable/algorithm_new.rs @@ -110,6 +110,7 @@ impl SimdInputTrait for SimdInput<16, 4> { } } + #[inline] fn new_partial_copy(ptr: *const u8, len: usize) -> Self { let mut buf = [0; 64]; unsafe { @@ -124,6 +125,7 @@ impl SimdInputTrait for SimdInput<16, 4> { } } +#[inline] fn load_masked_opt(slice: &[u8]) -> Simd { if slice.len() > 15 { unsafe { slice.as_ptr().cast::().read_unaligned() } From b7d0bc8e0a2ea69bb30e2891ba0299b4588a998f Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Thu, 24 Oct 2024 10:53:44 +0200 Subject: [PATCH 31/83] cleanup --- portable/src/implementation/portable/algorithm_new.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/portable/src/implementation/portable/algorithm_new.rs b/portable/src/implementation/portable/algorithm_new.rs index 88e476fe..e1444cc1 100644 --- a/portable/src/implementation/portable/algorithm_new.rs +++ b/portable/src/implementation/portable/algorithm_new.rs @@ -1,8 +1,8 @@ -use core::{hint::assert_unchecked, ptr::slice_from_raw_parts, slice}; +use core::{hint::assert_unchecked, slice}; use std::simd::{ cmp::SimdPartialOrd, num::{SimdInt, SimdUint}, - simd_swizzle, u8x16, LaneCount, Mask, Simd, SupportedLaneCount, + simd_swizzle, u8x16, LaneCount, Simd, SupportedLaneCount, }; use crate::{basic, implementation::helpers::SIMD_CHUNK_SIZE}; From c0c728914ed21dd46fd829b234f90eadf8dee6e8 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Thu, 24 Oct 2024 11:40:56 +0200 Subject: [PATCH 32/83] rm unsafe --- .../implementation/portable/algorithm_new.rs | 96 +++++++------------ 1 file changed, 37 insertions(+), 59 deletions(-) diff --git a/portable/src/implementation/portable/algorithm_new.rs b/portable/src/implementation/portable/algorithm_new.rs index e1444cc1..33ac9535 100644 --- a/portable/src/implementation/portable/algorithm_new.rs +++ b/portable/src/implementation/portable/algorithm_new.rs @@ -1,4 +1,3 @@ -use core::{hint::assert_unchecked, slice}; use std::simd::{ cmp::SimdPartialOrd, num::{SimdInt, SimdUint}, @@ -43,46 +42,37 @@ where } trait SimdInputTrait { - fn new(ptr: *const u8) -> Self; - fn is_ascii(&self) -> bool; - fn new_partial_masked_load(ptr: *const u8, len: usize) -> Self; - fn new_partial_copy(ptr: *const u8, len: usize) -> Self; - fn new_partial(ptr: *const u8, len: usize) -> Self + fn new(ptr: &[u8]) -> Self; + fn new_partial_masked_load(slice: &[u8]) -> Self; + fn new_partial_copy(slice: &[u8]) -> Self; + fn new_partial(slice: &[u8]) -> Self where Self: Sized, { if HAS_FAST_MASKED_LOAD { - Self::new_partial_masked_load(ptr, len) + Self::new_partial_masked_load(slice) } else { - Self::new_partial_copy(ptr, len) + Self::new_partial_copy(slice) } } + fn is_ascii(&self) -> bool; } impl SimdInputTrait for SimdInput<16, 4> { #[inline] - fn new(ptr: *const u8) -> Self { - #[expect(clippy::cast_ptr_alignment)] - let ptr = ptr.cast::(); - unsafe { - Self { - vals: [ - ptr.read_unaligned(), - ptr.add(1).read_unaligned(), - ptr.add(2).read_unaligned(), - ptr.add(3).read_unaligned(), - ], - } + fn new(s: &[u8]) -> Self { + Self { + vals: [ + u8x16::from_slice(&s[..16]), + u8x16::from_slice(&s[16..32]), + u8x16::from_slice(&s[32..48]), + u8x16::from_slice(&s[48..64]), + ], } } #[inline] - fn new_partial_masked_load(ptr: *const u8, len: usize) -> Self { - unsafe { - assert_unchecked(len > 0); - assert_unchecked(len < 64); - } - let mut slice = unsafe { slice::from_raw_parts(ptr, len) }; + fn new_partial_masked_load(mut slice: &[u8]) -> Self { let val0 = load_masked_opt(slice); slice = &slice[slice.len().min(16)..]; if slice.is_empty() { @@ -111,12 +101,10 @@ impl SimdInputTrait for SimdInput<16, 4> { } #[inline] - fn new_partial_copy(ptr: *const u8, len: usize) -> Self { + fn new_partial_copy(slice: &[u8]) -> Self { let mut buf = [0; 64]; - unsafe { - ptr.copy_to_nonoverlapping(buf.as_mut_ptr(), len); - } - Self::new(buf.as_ptr()) + buf[..slice.len()].copy_from_slice(slice); + Self::new(&buf) } #[inline] @@ -128,7 +116,7 @@ impl SimdInputTrait for SimdInput<16, 4> { #[inline] fn load_masked_opt(slice: &[u8]) -> Simd { if slice.len() > 15 { - unsafe { slice.as_ptr().cast::().read_unaligned() } + u8x16::from_slice(&slice[..16]) } else { u8x16::load_or_default(slice) } @@ -552,19 +540,15 @@ where // WORKAROUND // necessary because the for loop is not unrolled on ARM64 if input.vals.len() == 2 { - unsafe { - self.check_bytes(*input.vals.as_ptr()); - self.check_bytes(*input.vals.as_ptr().add(1)); - self.incomplete = Self::is_incomplete(*input.vals.as_ptr().add(1)); - } + self.check_bytes(input.vals[0]); + self.check_bytes(input.vals[1]); + self.incomplete = Self::is_incomplete(input.vals[1]); } else if input.vals.len() == 4 { - unsafe { - self.check_bytes(*input.vals.as_ptr()); - self.check_bytes(*input.vals.as_ptr().add(1)); - self.check_bytes(*input.vals.as_ptr().add(2)); - self.check_bytes(*input.vals.as_ptr().add(3)); - self.incomplete = Self::is_incomplete(*input.vals.as_ptr().add(3)); - } + self.check_bytes(input.vals[0]); + self.check_bytes(input.vals[1]); + self.check_bytes(input.vals[2]); + self.check_bytes(input.vals[3]); + self.incomplete = Self::is_incomplete(input.vals[3]); } else { panic!("Unsupported number of chunks"); } @@ -578,28 +562,22 @@ where #[inline] pub fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), basic::Utf8Error> { use crate::implementation::helpers::SIMD_CHUNK_SIZE; - let len = input.len(); let mut algorithm = Self::new(); - let mut idx: usize = 0; - let iter_lim = len - (len % SIMD_CHUNK_SIZE); - - while idx < iter_lim { - let simd_input = unsafe { SimdInput::::new(input.as_ptr().add(idx)) }; - idx += SIMD_CHUNK_SIZE; + let mut chunks = input.chunks_exact(SIMD_CHUNK_SIZE); + for chunk in chunks.by_ref() { + let simd_input = SimdInput::::new(chunk); if !simd_input.is_ascii() { algorithm.check_block(&simd_input); break; } } - - while idx < iter_lim { - let input = unsafe { SimdInput::::new(input.as_ptr().add(idx)) }; - algorithm.check_utf8(&input); - idx += SIMD_CHUNK_SIZE; + for chunk in chunks.by_ref() { + let simd_input = SimdInput::::new(chunk); + algorithm.check_utf8(&simd_input); } - - if idx < len { - let simd_input = unsafe { SimdInput::new_partial(input.as_ptr().add(idx), len - idx) }; + let rem = chunks.remainder(); + if !rem.is_ascii() { + let simd_input = SimdInput::::new_partial(rem); algorithm.check_utf8(&simd_input); } algorithm.check_incomplete_pending(); From e14fd378fda4abc17a85d579092ecdb689114aed Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Thu, 24 Oct 2024 11:49:44 +0200 Subject: [PATCH 33/83] upd --- portable/src/implementation/portable/algorithm_new.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/portable/src/implementation/portable/algorithm_new.rs b/portable/src/implementation/portable/algorithm_new.rs index 33ac9535..e29b83d2 100644 --- a/portable/src/implementation/portable/algorithm_new.rs +++ b/portable/src/implementation/portable/algorithm_new.rs @@ -18,7 +18,7 @@ const HAS_FAST_REDUCE_MAX: bool = true; )))] const HAS_FAST_REDUCE_MAX: bool = false; -const HAS_FAST_MASKED_LOAD: bool = false; +const HAS_FAST_MASKED_LOAD: bool = false; // FIXME avx512, avx2 (?) #[repr(C, align(32))] #[allow(dead_code)] // only used if a 128-bit SIMD implementation is used @@ -61,6 +61,7 @@ trait SimdInputTrait { impl SimdInputTrait for SimdInput<16, 4> { #[inline] fn new(s: &[u8]) -> Self { + assert!(s.len() == 64); Self { vals: [ u8x16::from_slice(&s[..16]), From 556d85ea6d8422c98d702bcb1fad864a591d05bb Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Fri, 25 Oct 2024 05:48:51 +0200 Subject: [PATCH 34/83] doc --- portable/src/implementation/portable/algorithm_new.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/portable/src/implementation/portable/algorithm_new.rs b/portable/src/implementation/portable/algorithm_new.rs index e29b83d2..34fc9fad 100644 --- a/portable/src/implementation/portable/algorithm_new.rs +++ b/portable/src/implementation/portable/algorithm_new.rs @@ -214,7 +214,9 @@ where v15: u8, ) -> Self; - fn prev1(self, prev: Self) -> Self; // FIXME: generic? + // const generics would be more awkward and verbose with the current + // portable SIMD swizzle implementation and compiler limitations. + fn prev1(self, prev: Self) -> Self; fn prev2(self, prev: Self) -> Self; fn prev3(self, prev: Self) -> Self; @@ -323,7 +325,6 @@ impl SimdU8Value<16> for u8x16 { ) } - // ugly but prev requires const generics #[inline] fn prev2(self, prev: Self) -> Self { simd_swizzle!( @@ -333,7 +334,6 @@ impl SimdU8Value<16> for u8x16 { ) } - // ugly but prev requires const generics #[inline] fn prev3(self, prev: Self) -> Self { simd_swizzle!( From 17fb65ccd5bafcb3f9f0746923ac601539eb2c84 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Fri, 25 Oct 2024 06:07:00 +0200 Subject: [PATCH 35/83] compat impl --- .../implementation/portable/algorithm_new.rs | 81 ++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/portable/src/implementation/portable/algorithm_new.rs b/portable/src/implementation/portable/algorithm_new.rs index 34fc9fad..e4e92f68 100644 --- a/portable/src/implementation/portable/algorithm_new.rs +++ b/portable/src/implementation/portable/algorithm_new.rs @@ -4,7 +4,7 @@ use std::simd::{ simd_swizzle, u8x16, LaneCount, Simd, SupportedLaneCount, }; -use crate::{basic, implementation::helpers::SIMD_CHUNK_SIZE}; +use crate::{basic, compat, implementation::helpers::SIMD_CHUNK_SIZE}; #[cfg(all( any(target_arch = "aarch64", target_arch = "arm"), @@ -578,6 +578,7 @@ where } let rem = chunks.remainder(); if !rem.is_ascii() { + // FIXME: simd??? let simd_input = SimdInput::::new_partial(rem); algorithm.check_utf8(&simd_input); } @@ -588,9 +589,87 @@ where Ok(()) } } + + #[inline] + #[expect(clippy::redundant_else)] // more readable + fn validate_utf8_compat_simd0(input: &[u8]) -> core::result::Result<(), usize> { + use crate::implementation::helpers::SIMD_CHUNK_SIZE; + let mut algorithm = Self::new(); + let mut idx = 0; + let mut chunks = input.chunks_exact(SIMD_CHUNK_SIZE); + let mut only_ascii = true; + + 'outer: loop { + if only_ascii { + while let Some(chunk) = chunks.next() { + let simd_input = SimdInput::new(chunk); + if !simd_input.is_ascii() { + algorithm.check_block(&simd_input); + if algorithm.has_error() { + return Err(idx); + } else { + only_ascii = false; + idx += SIMD_CHUNK_SIZE; + continue 'outer; + } + } + idx += SIMD_CHUNK_SIZE; + } + } else { + while let Some(chunk) = chunks.next() { + let simd_input = SimdInput::new(chunk); + if simd_input.is_ascii() { + algorithm.check_incomplete_pending(); + if algorithm.has_error() { + return Err(idx); + } else { + // we are in pure ASCII territory again + only_ascii = true; + idx += SIMD_CHUNK_SIZE; + continue 'outer; + } + } else { + algorithm.check_block(&simd_input); + if algorithm.has_error() { + return Err(idx); + } + } + idx += SIMD_CHUNK_SIZE; + } + } + break; + } + let rem = chunks.remainder(); + if !rem.is_ascii() { + // FIXME: simd??? + let simd_input = SimdInput::::new_partial(rem); + algorithm.check_utf8(&simd_input); + } + algorithm.check_incomplete_pending(); + if algorithm.has_error() { + Err(idx) + } else { + Ok(()) + } + } } #[inline] pub fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), basic::Utf8Error> { Utf8CheckAlgorithm::<16, 4>::validate_utf8_basic(input) } + +/// Validation implementation for CPUs supporting the SIMD extension (see module). +/// +/// # Errors +/// Returns [`compat::Utf8Error`] with detailed error information on failure. +/// +/// # Safety +/// This function is inherently unsafe because it is compiled with SIMD extensions +/// enabled. Make sure that the CPU supports it before calling. +/// +#[inline] +pub fn validate_utf8_compat(input: &[u8]) -> core::result::Result<(), compat::Utf8Error> { + Utf8CheckAlgorithm::<16, 4>::validate_utf8_compat_simd0(input) + .map_err(|idx| crate::implementation::helpers::get_compat_error(input, idx)) +} From 723462ec6e34f6f6553d3fe151634cf662774b12 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Fri, 25 Oct 2024 06:07:54 +0200 Subject: [PATCH 36/83] wip --- portable/src/implementation/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portable/src/implementation/mod.rs b/portable/src/implementation/mod.rs index 7fad700d..648e0d51 100644 --- a/portable/src/implementation/mod.rs +++ b/portable/src/implementation/mod.rs @@ -37,7 +37,7 @@ pub(crate) unsafe fn validate_utf8_compat(input: &[u8]) -> Result<(), crate::com unsafe fn validate_utf8_compat_simd(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { #[cfg(not(feature = "simd256"))] - return portable::simd128::validate_utf8_compat(input); + return portable::algorithm_new::validate_utf8_compat(input); #[cfg(feature = "simd256")] return portable::simd256::validate_utf8_compat(input); } From c66398863fb8079a0d9d48ef44a35ab9eb116ee2 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Fri, 25 Oct 2024 06:09:29 +0200 Subject: [PATCH 37/83] rename --- portable/src/implementation/mod.rs | 4 ++-- .../portable/{algorithm_new.rs => algorithm_portable.rs} | 0 portable/src/implementation/portable/mod.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename portable/src/implementation/portable/{algorithm_new.rs => algorithm_portable.rs} (100%) diff --git a/portable/src/implementation/mod.rs b/portable/src/implementation/mod.rs index 648e0d51..e7108e53 100644 --- a/portable/src/implementation/mod.rs +++ b/portable/src/implementation/mod.rs @@ -21,7 +21,7 @@ pub(crate) unsafe fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basi #[inline(never)] unsafe fn validate_utf8_basic_simd(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { #[cfg(not(feature = "simd256"))] - return portable::algorithm_new::validate_utf8_basic(input); + return portable::algorithm_portable::validate_utf8_basic(input); #[cfg(feature = "simd256")] return portable::simd256::validate_utf8_basic(input); } @@ -37,7 +37,7 @@ pub(crate) unsafe fn validate_utf8_compat(input: &[u8]) -> Result<(), crate::com unsafe fn validate_utf8_compat_simd(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { #[cfg(not(feature = "simd256"))] - return portable::algorithm_new::validate_utf8_compat(input); + return portable::algorithm_portable::validate_utf8_compat(input); #[cfg(feature = "simd256")] return portable::simd256::validate_utf8_compat(input); } diff --git a/portable/src/implementation/portable/algorithm_new.rs b/portable/src/implementation/portable/algorithm_portable.rs similarity index 100% rename from portable/src/implementation/portable/algorithm_new.rs rename to portable/src/implementation/portable/algorithm_portable.rs diff --git a/portable/src/implementation/portable/mod.rs b/portable/src/implementation/portable/mod.rs index 31638b4f..83c41f11 100644 --- a/portable/src/implementation/portable/mod.rs +++ b/portable/src/implementation/portable/mod.rs @@ -1,4 +1,4 @@ -pub(crate) mod algorithm_new; +pub(crate) mod algorithm_portable; #[cfg(any(not(feature = "simd256"), feature = "public_imp"))] pub(crate) mod simd128; #[cfg(any(feature = "simd256", feature = "public_imp"))] From 8fe083dbad5be68c02e235879823a4e3ba1dbcbb Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Fri, 25 Oct 2024 06:10:31 +0200 Subject: [PATCH 38/83] rename --- portable/src/implementation/mod.rs | 4 ++-- .../portable/{algorithm_portable.rs => algorithm_safe.rs} | 0 portable/src/implementation/portable/mod.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename portable/src/implementation/portable/{algorithm_portable.rs => algorithm_safe.rs} (100%) diff --git a/portable/src/implementation/mod.rs b/portable/src/implementation/mod.rs index e7108e53..fb8ac5e8 100644 --- a/portable/src/implementation/mod.rs +++ b/portable/src/implementation/mod.rs @@ -21,7 +21,7 @@ pub(crate) unsafe fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basi #[inline(never)] unsafe fn validate_utf8_basic_simd(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { #[cfg(not(feature = "simd256"))] - return portable::algorithm_portable::validate_utf8_basic(input); + return portable::algorithm_safe::validate_utf8_basic(input); #[cfg(feature = "simd256")] return portable::simd256::validate_utf8_basic(input); } @@ -37,7 +37,7 @@ pub(crate) unsafe fn validate_utf8_compat(input: &[u8]) -> Result<(), crate::com unsafe fn validate_utf8_compat_simd(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { #[cfg(not(feature = "simd256"))] - return portable::algorithm_portable::validate_utf8_compat(input); + return portable::algorithm_safe::validate_utf8_compat(input); #[cfg(feature = "simd256")] return portable::simd256::validate_utf8_compat(input); } diff --git a/portable/src/implementation/portable/algorithm_portable.rs b/portable/src/implementation/portable/algorithm_safe.rs similarity index 100% rename from portable/src/implementation/portable/algorithm_portable.rs rename to portable/src/implementation/portable/algorithm_safe.rs diff --git a/portable/src/implementation/portable/mod.rs b/portable/src/implementation/portable/mod.rs index 83c41f11..ec6c7b5a 100644 --- a/portable/src/implementation/portable/mod.rs +++ b/portable/src/implementation/portable/mod.rs @@ -1,4 +1,4 @@ -pub(crate) mod algorithm_portable; +pub(crate) mod algorithm_safe; #[cfg(any(not(feature = "simd256"), feature = "public_imp"))] pub(crate) mod simd128; #[cfg(any(feature = "simd256", feature = "public_imp"))] From 6f47f130cafb17bf8adc268d8806d81ef7b410f6 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Fri, 25 Oct 2024 06:11:59 +0200 Subject: [PATCH 39/83] clippy --- portable/src/implementation/portable/algorithm_safe.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/portable/src/implementation/portable/algorithm_safe.rs b/portable/src/implementation/portable/algorithm_safe.rs index e4e92f68..126ec45c 100644 --- a/portable/src/implementation/portable/algorithm_safe.rs +++ b/portable/src/implementation/portable/algorithm_safe.rs @@ -601,7 +601,7 @@ where 'outer: loop { if only_ascii { - while let Some(chunk) = chunks.next() { + for chunk in chunks.by_ref() { let simd_input = SimdInput::new(chunk); if !simd_input.is_ascii() { algorithm.check_block(&simd_input); @@ -616,7 +616,7 @@ where idx += SIMD_CHUNK_SIZE; } } else { - while let Some(chunk) = chunks.next() { + for chunk in chunks.by_ref() { let simd_input = SimdInput::new(chunk); if simd_input.is_ascii() { algorithm.check_incomplete_pending(); From a562a7021e92bd3bcd51c7bb0b104c77a473ca9f Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Fri, 25 Oct 2024 06:27:43 +0200 Subject: [PATCH 40/83] public imp --- portable/src/basic.rs | 6 +- portable/src/compat.rs | 1 + .../implementation/portable/algorithm_safe.rs | 70 +++++++ portable/tests/tests.rs | 184 ++++-------------- 4 files changed, 107 insertions(+), 154 deletions(-) diff --git a/portable/src/basic.rs b/portable/src/basic.rs index 1187e625..7f167e12 100644 --- a/portable/src/basic.rs +++ b/portable/src/basic.rs @@ -74,7 +74,7 @@ pub mod imp { /// /// General usage: /// ```rust - /// use simdutf8::basic::imp::Utf8Validator; + /// use simdutf8_portable::basic::imp::Utf8Validator; /// use std::io::{stdin, Read, Result}; /// /// # #[cfg(target_arch = "x86_64")] @@ -198,16 +198,16 @@ pub mod imp { pub mod portable { /// Includes the validation implementation using 128-bit portable SIMD. pub mod simd128 { + pub use crate::implementation::portable::simd128::validate_utf8_basic as validate_utf8; pub use crate::implementation::portable::simd128::ChunkedUtf8ValidatorImp; pub use crate::implementation::portable::simd128::Utf8ValidatorImp; - pub use crate::implementation::portable::simd128::validate_utf8_basic as validate_utf8; } /// Includes the validation implementation using 256-bit portable SIMD. pub mod simd256 { + pub use crate::implementation::portable::simd256::validate_utf8_basic as validate_utf8; pub use crate::implementation::portable::simd256::ChunkedUtf8ValidatorImp; pub use crate::implementation::portable::simd256::Utf8ValidatorImp; - pub use crate::implementation::portable::simd256::validate_utf8_basic as validate_utf8; } } } diff --git a/portable/src/compat.rs b/portable/src/compat.rs index ecea303c..9a043a7e 100644 --- a/portable/src/compat.rs +++ b/portable/src/compat.rs @@ -102,6 +102,7 @@ pub fn from_utf8_mut(input: &mut [u8]) -> Result<&mut str, Utf8Error> { /// Allows direct access to the platform-specific unsafe validation implementations. #[cfg(feature = "public_imp")] pub mod imp { + /// FIXME: add docs pub mod portable { /// Includes the validation implementation for 128-bit portable SIMD. pub mod simd128 { diff --git a/portable/src/implementation/portable/algorithm_safe.rs b/portable/src/implementation/portable/algorithm_safe.rs index 126ec45c..384a41a3 100644 --- a/portable/src/implementation/portable/algorithm_safe.rs +++ b/portable/src/implementation/portable/algorithm_safe.rs @@ -673,3 +673,73 @@ pub fn validate_utf8_compat(input: &[u8]) -> core::result::Result<(), compat::Ut Utf8CheckAlgorithm::<16, 4>::validate_utf8_compat_simd0(input) .map_err(|idx| crate::implementation::helpers::get_compat_error(input, idx)) } + +/// Low-level implementation of the [`basic::imp::ChunkedUtf8Validator`] trait. +/// +/// This is implementation requires CPU SIMD features specified by the module it resides in. +/// It is undefined behavior to call it if the required CPU features are not +/// available. +#[cfg(feature = "public_imp")] +pub struct ChunkedUtf8ValidatorImp { + algorithm: Utf8CheckAlgorithm<16, 4>, +} + +#[cfg(feature = "public_imp")] +impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp { + #[inline] + #[must_use] + unsafe fn new() -> Self { + Self { + algorithm: Utf8CheckAlgorithm::<16, 4>::new(), + } + } + + #[inline] + unsafe fn update_from_chunks(&mut self, input: &[u8]) { + use crate::implementation::helpers::SIMD_CHUNK_SIZE; + + assert!( + input.len() % SIMD_CHUNK_SIZE == 0, + "Input size must be a multiple of 64." + ); + for chunk in input.chunks_exact(SIMD_CHUNK_SIZE) { + let input = SimdInput::new(chunk); + self.algorithm.check_utf8(&input); + } + } + + #[inline] + unsafe fn finalize( + mut self, + remaining_input: core::option::Option<&[u8]>, + ) -> core::result::Result<(), basic::Utf8Error> { + use crate::implementation::helpers::SIMD_CHUNK_SIZE; + + if let Some(mut remaining_input) = remaining_input { + if !remaining_input.is_empty() { + let len = remaining_input.len(); + let chunks_lim = len - (len % SIMD_CHUNK_SIZE); + if chunks_lim > 0 { + self.update_from_chunks(&remaining_input[..chunks_lim]); + } + let rem = len - chunks_lim; + if rem > 0 { + remaining_input = &remaining_input[chunks_lim..]; + let mut tmpbuf = TempSimdChunk::new(); + tmpbuf + .0 + .as_mut_ptr() + .copy_from_nonoverlapping(remaining_input.as_ptr(), remaining_input.len()); + let simd_input = SimdInput::new(&tmpbuf.0); + self.algorithm.check_utf8(&simd_input); + } + } + } + self.algorithm.check_incomplete_pending(); + if self.algorithm.has_error() { + Err(basic::Utf8Error {}) + } else { + Ok(()) + } + } +} diff --git a/portable/tests/tests.rs b/portable/tests/tests.rs index dc8ff222..40a6d850 100644 --- a/portable/tests/tests.rs +++ b/portable/tests/tests.rs @@ -68,155 +68,72 @@ mod public_imp { #[allow(unused_variables)] // nothing to do if not SIMD implementation is available pub(super) fn test_valid(input: &[u8]) { - if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { - #[cfg(target_feature = "avx2")] - unsafe { - assert!(simdutf8::basic::imp::x86::avx2::validate_utf8(input).is_ok()); - assert!(simdutf8::compat::imp::x86::avx2::validate_utf8(input).is_ok()); - - test_streaming::(input, true); - test_chunked_streaming::( - input, true, - ); - } - - #[cfg(target_feature = "sse4.2")] - unsafe { - assert!(simdutf8::basic::imp::x86::sse42::validate_utf8(input).is_ok()); - assert!(simdutf8::compat::imp::x86::sse42::validate_utf8(input).is_ok()); - - test_streaming::(input, true); - test_chunked_streaming::( - input, true, - ); - } - } - #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] + #[cfg(feature = "public_imp")] unsafe { - assert!(simdutf8::basic::imp::aarch64::neon::validate_utf8(input).is_ok()); - assert!(simdutf8::compat::imp::aarch64::neon::validate_utf8(input).is_ok()); - - test_streaming::(input, true); - test_chunked_streaming::( - input, true, + assert!(simdutf8_portable::basic::imp::portable::simd128::validate_utf8(input).is_ok()); + assert!( + simdutf8_portable::compat::imp::portable::simd128::validate_utf8(input).is_ok() ); - } - #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] - unsafe { - assert!(simdutf8::basic::imp::wasm32::simd128::validate_utf8(input).is_ok()); - assert!(simdutf8::compat::imp::wasm32::simd128::validate_utf8(input).is_ok()); - test_streaming::(input, true); - test_chunked_streaming::( - input, true, - ); - } - #[cfg(feature = "portable_public_imp")] - unsafe { - assert!(simdutf8::basic::imp::portable::simd128::validate_utf8(input).is_ok()); - assert!(simdutf8::compat::imp::portable::simd128::validate_utf8(input).is_ok()); - - test_streaming::( + test_streaming::( input, true, ); test_chunked_streaming::< - simdutf8::basic::imp::portable::simd128::ChunkedUtf8ValidatorImp, + simdutf8_portable::basic::imp::portable::simd128::ChunkedUtf8ValidatorImp, >(input, true); - assert!(simdutf8::basic::imp::portable::simd256::validate_utf8(input).is_ok()); - assert!(simdutf8::compat::imp::portable::simd256::validate_utf8(input).is_ok()); + assert!(simdutf8_portable::basic::imp::portable::simd256::validate_utf8(input).is_ok()); + assert!( + simdutf8_portable::compat::imp::portable::simd256::validate_utf8(input).is_ok() + ); - test_streaming::( + test_streaming::( input, true, ); test_chunked_streaming::< - simdutf8::basic::imp::portable::simd256::ChunkedUtf8ValidatorImp, + simdutf8_portable::basic::imp::portable::simd256::ChunkedUtf8ValidatorImp, >(input, true); } } #[allow(unused_variables)] // nothing to do if not SIMD implementation is available pub(super) fn test_invalid(input: &[u8], valid_up_to: usize, error_len: Option) { - if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { - #[cfg(target_feature = "avx2")] - unsafe { - assert!(simdutf8::basic::imp::x86::avx2::validate_utf8(input).is_err()); - let err = simdutf8::compat::imp::x86::avx2::validate_utf8(input).unwrap_err(); - assert_eq!(err.valid_up_to(), valid_up_to); - assert_eq!(err.error_len(), error_len); - - test_streaming::(input, false); - test_chunked_streaming::( - input, false, - ); - } - #[cfg(target_feature = "sse4.2")] - unsafe { - assert!(simdutf8::basic::imp::x86::sse42::validate_utf8(input).is_err()); - let err = simdutf8::compat::imp::x86::sse42::validate_utf8(input).unwrap_err(); - assert_eq!(err.valid_up_to(), valid_up_to); - assert_eq!(err.error_len(), error_len); - - test_streaming::(input, false); - test_chunked_streaming::( - input, false, - ); - } - } - #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] - unsafe { - assert!(simdutf8::basic::imp::aarch64::neon::validate_utf8(input).is_err()); - let err = simdutf8::compat::imp::aarch64::neon::validate_utf8(input).unwrap_err(); - assert_eq!(err.valid_up_to(), valid_up_to); - assert_eq!(err.error_len(), error_len); - - test_streaming::(input, false); - test_chunked_streaming::( - input, false, - ); - } - #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] + #[cfg(feature = "public_imp")] unsafe { - assert!(simdutf8::basic::imp::wasm32::simd128::validate_utf8(input).is_err()); - let err = simdutf8::compat::imp::wasm32::simd128::validate_utf8(input).unwrap_err(); - assert_eq!(err.valid_up_to(), valid_up_to); - assert_eq!(err.error_len(), error_len); - - test_streaming::(input, false); - test_chunked_streaming::( - input, false, + assert!( + simdutf8_portable::basic::imp::portable::simd128::validate_utf8(input).is_err() ); - } - #[cfg(feature = "portable_public_imp")] - unsafe { - assert!(simdutf8::basic::imp::portable::simd128::validate_utf8(input).is_err()); - let err = simdutf8::compat::imp::portable::simd128::validate_utf8(input).unwrap_err(); + let err = simdutf8_portable::compat::imp::portable::simd128::validate_utf8(input) + .unwrap_err(); assert_eq!(err.valid_up_to(), valid_up_to); assert_eq!(err.error_len(), error_len); - test_streaming::( + test_streaming::( input, false, ); test_chunked_streaming::< - simdutf8::basic::imp::portable::simd128::ChunkedUtf8ValidatorImp, + simdutf8_portable::basic::imp::portable::simd128::ChunkedUtf8ValidatorImp, >(input, false); - assert!(simdutf8::basic::imp::portable::simd256::validate_utf8(input).is_err()); - let err = simdutf8::compat::imp::portable::simd256::validate_utf8(input).unwrap_err(); + assert!( + simdutf8_portable::basic::imp::portable::simd256::validate_utf8(input).is_err() + ); + let err = simdutf8_portable::compat::imp::portable::simd256::validate_utf8(input) + .unwrap_err(); assert_eq!(err.valid_up_to(), valid_up_to); assert_eq!(err.error_len(), error_len); - test_streaming::( + test_streaming::( input, false, ); test_chunked_streaming::< - simdutf8::basic::imp::portable::simd256::ChunkedUtf8ValidatorImp, + simdutf8_portable::basic::imp::portable::simd256::ChunkedUtf8ValidatorImp, >(input, false); } } #[allow(unused)] // not used if not SIMD implementation is available - fn test_streaming(input: &[u8], ok: bool) { + fn test_streaming(input: &[u8], ok: bool) { unsafe { let mut validator = T::new(); validator.update(input); @@ -228,7 +145,7 @@ mod public_imp { } #[allow(unused)] // not used if not SIMD implementation is available - fn test_streaming_blocks( + fn test_streaming_blocks( input: &[u8], block_size: usize, ok: bool, @@ -243,7 +160,7 @@ mod public_imp { } #[allow(unused)] // not used if not SIMD implementation is available - fn test_chunked_streaming( + fn test_chunked_streaming( input: &[u8], ok: bool, ) { @@ -253,7 +170,9 @@ mod public_imp { } #[allow(unused)] // not used if not SIMD implementation is available - fn test_chunked_streaming_with_chunk_size( + fn test_chunked_streaming_with_chunk_size< + T: simdutf8_portable::basic::imp::ChunkedUtf8Validator, + >( input: &[u8], chunk_size: usize, ok: bool, @@ -270,46 +189,9 @@ mod public_imp { #[test] #[should_panic] - #[cfg(all( - any(target_arch = "x86", target_arch = "x86_64"), - target_feature = "avx2" - ))] - fn test_avx2_chunked_panic() { - test_chunked_streaming_with_chunk_size::< - simdutf8::basic::imp::x86::avx2::ChunkedUtf8ValidatorImp, - >(b"abcd", 1, true); - } - - #[test] - #[should_panic] - #[cfg(all( - any(target_arch = "x86", target_arch = "x86_64"), - target_feature = "sse4.2" - ))] - fn test_sse42_chunked_panic() { - test_chunked_streaming_with_chunk_size::< - simdutf8::basic::imp::x86::sse42::ChunkedUtf8ValidatorImp, - >(b"abcd", 1, true); - } - - #[test] - #[should_panic] - #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] fn test_neon_chunked_panic() { test_chunked_streaming_with_chunk_size::< - simdutf8::basic::imp::aarch64::neon::ChunkedUtf8ValidatorImp, - >(b"abcd", 1, true); - } - - // the test runner will ignore this test probably due to limitations of panic handling/threading - // of that target--keeping this here so that when it can be tested properly, it will - // FIXME: remove this comment once this works properly. - #[test] - #[should_panic] - #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] - fn test_simd128_chunked_panic() { - test_chunked_streaming_with_chunk_size::< - simdutf8::basic::imp::wasm32::simd128::ChunkedUtf8ValidatorImp, + simdutf8_portable::basic::imp::portable::simd128::ChunkedUtf8ValidatorImp, >(b"abcd", 1, true); } } From f088015270aa15253286e80eef98bf1aab28d0b7 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Fri, 25 Oct 2024 06:39:07 +0200 Subject: [PATCH 41/83] wip --- portable/src/implementation/algorithm.rs | 569 ------------------ portable/src/implementation/mod.rs | 4 - .../implementation/portable/algorithm_safe.rs | 97 +++ .../src/implementation/portable/simd128.rs | 239 +------- .../src/implementation/portable/simd256.rs | 235 +------- 5 files changed, 106 insertions(+), 1038 deletions(-) delete mode 100644 portable/src/implementation/algorithm.rs diff --git a/portable/src/implementation/algorithm.rs b/portable/src/implementation/algorithm.rs deleted file mode 100644 index 3804e6d1..00000000 --- a/portable/src/implementation/algorithm.rs +++ /dev/null @@ -1,569 +0,0 @@ -/// Macros requires newtypes in scope: -/// `SimdU8Value` - implementation of SIMD primitives -/// `SimdInput` - which holds 64 bytes of SIMD input -/// `TempSimdChunk` - correctly aligned `TempSimdChunk`, either `TempSimdChunkA16` or `TempSimdChunkA32` -macro_rules! algorithm_simd { - ($(#[$feat:meta])*) => { - use crate::{basic, compat}; - - impl Utf8CheckAlgorithm { - $(#[$feat])* - #[inline] - unsafe fn default() -> Self { - Self { - prev: SimdU8Value::splat0(), - incomplete: SimdU8Value::splat0(), - error: SimdU8Value::splat0(), - } - } - - $(#[$feat])* - #[inline] - unsafe fn check_incomplete_pending(&mut self) { - self.error = self.error.or(self.incomplete); - } - - $(#[$feat])* - #[inline] - unsafe fn is_incomplete(input: SimdU8Value) -> SimdU8Value { - input.saturating_sub(SimdU8Value::from_32_cut_off_leading( - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0xff, - 0b1111_0000 - 1, - 0b1110_0000 - 1, - 0b1100_0000 - 1, - )) - } - - $(#[$feat])* - #[inline] - unsafe fn check_special_cases(input: SimdU8Value, prev1: SimdU8Value) -> SimdU8Value { - const TOO_SHORT: u8 = 1 << 0; - const TOO_LONG: u8 = 1 << 1; - const OVERLONG_3: u8 = 1 << 2; - const SURROGATE: u8 = 1 << 4; - const OVERLONG_2: u8 = 1 << 5; - const TWO_CONTS: u8 = 1 << 7; - const TOO_LARGE: u8 = 1 << 3; - const TOO_LARGE_1000: u8 = 1 << 6; - const OVERLONG_4: u8 = 1 << 6; - const CARRY: u8 = TOO_SHORT | TOO_LONG | TWO_CONTS; - - let byte_1_high = prev1.shr4().lookup_16( - TOO_LONG, - TOO_LONG, - TOO_LONG, - TOO_LONG, - TOO_LONG, - TOO_LONG, - TOO_LONG, - TOO_LONG, - TWO_CONTS, - TWO_CONTS, - TWO_CONTS, - TWO_CONTS, - TOO_SHORT | OVERLONG_2, - TOO_SHORT, - TOO_SHORT | OVERLONG_3 | SURROGATE, - TOO_SHORT | TOO_LARGE | TOO_LARGE_1000 | OVERLONG_4, - ); - - let byte_1_low = prev1.and(SimdU8Value::splat(0x0F)).lookup_16( - CARRY | OVERLONG_3 | OVERLONG_2 | OVERLONG_4, - CARRY | OVERLONG_2, - CARRY, - CARRY, - CARRY | TOO_LARGE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000 | SURROGATE, - CARRY | TOO_LARGE | TOO_LARGE_1000, - CARRY | TOO_LARGE | TOO_LARGE_1000, - ); - - let byte_2_high = input.shr4().lookup_16( - TOO_SHORT, - TOO_SHORT, - TOO_SHORT, - TOO_SHORT, - TOO_SHORT, - TOO_SHORT, - TOO_SHORT, - TOO_SHORT, - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE_1000 | OVERLONG_4, - TOO_LONG | OVERLONG_2 | TWO_CONTS | OVERLONG_3 | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_LONG | OVERLONG_2 | TWO_CONTS | SURROGATE | TOO_LARGE, - TOO_SHORT, - TOO_SHORT, - TOO_SHORT, - TOO_SHORT, - ); - - byte_1_high.and(byte_1_low).and(byte_2_high) - } - - $(#[$feat])* - #[inline] - unsafe fn check_multibyte_lengths( - input: SimdU8Value, - prev: SimdU8Value, - special_cases: SimdU8Value, - ) -> SimdU8Value { - let prev2 = input.prev2(prev); - let prev3 = input.prev3(prev); - let must23 = Self::must_be_2_3_continuation(prev2, prev3); - let must23_80 = must23.and(SimdU8Value::splat(0x80)); - must23_80.xor(special_cases) - } - - $(#[$feat])* - #[inline] - unsafe fn has_error(&self) -> bool { - self.error.any_bit_set() - } - - $(#[$feat])* - #[inline] - unsafe fn check_bytes(&mut self, input: SimdU8Value) { - let prev1 = input.prev1(self.prev); - let sc = Self::check_special_cases(input, prev1); - self.error = self - .error - .or(Self::check_multibyte_lengths(input, self.prev, sc)); - self.prev = input; - } - - $(#[$feat])* - #[inline] - unsafe fn check_utf8(&mut self, input: SimdInput) { - if input.is_ascii() { - self.check_incomplete_pending(); - } else { - self.check_block(input); - } - } - - $(#[$feat])* - #[inline] - unsafe fn check_block(&mut self, input: SimdInput) { - // WORKAROUND - // necessary because the for loop is not unrolled on ARM64 - if input.vals.len() == 2 { - self.check_bytes(*input.vals.as_ptr()); - self.check_bytes(*input.vals.as_ptr().add(1)); - self.incomplete = Self::is_incomplete(*input.vals.as_ptr().add(1)); - } else if input.vals.len() == 4 { - self.check_bytes(*input.vals.as_ptr()); - self.check_bytes(*input.vals.as_ptr().add(1)); - self.check_bytes(*input.vals.as_ptr().add(2)); - self.check_bytes(*input.vals.as_ptr().add(3)); - self.incomplete = Self::is_incomplete(*input.vals.as_ptr().add(3)); - } else { - panic!("Unsupported number of chunks"); - } - } - } - - /// Validation implementation for CPUs supporting the SIMD extension (see module). - /// - /// # Errors - /// Returns the zero-sized [`basic::Utf8Error`] on failure. - /// - /// # Safety - /// This function is inherently unsafe because it is compiled with SIMD extensions - /// enabled. Make sure that the CPU supports it before calling. - /// - $(#[$feat])* - #[inline] - pub unsafe fn validate_utf8_basic( - input: &[u8], - ) -> core::result::Result<(), basic::Utf8Error> { - use crate::implementation::helpers::SIMD_CHUNK_SIZE; - let len = input.len(); - let mut algorithm = Utf8CheckAlgorithm::::default(); - let mut idx: usize = 0; - let iter_lim = len - (len % SIMD_CHUNK_SIZE); - - while idx < iter_lim { - let simd_input = SimdInput::new(input.as_ptr().add(idx as usize)); - idx += SIMD_CHUNK_SIZE; - if !simd_input.is_ascii() { - algorithm.check_block(simd_input); - break; - } - } - - while idx < iter_lim { - let input = SimdInput::new(input.as_ptr().add(idx as usize)); - algorithm.check_utf8(input); - idx += SIMD_CHUNK_SIZE; - } - - if idx < len { - let mut tmpbuf = TempSimdChunk::new(); - crate::implementation::helpers::memcpy_unaligned_nonoverlapping_inline_opt_lt_64( - input.as_ptr().add(idx), - tmpbuf.0.as_mut_ptr(), - len - idx, - ); - let simd_input = SimdInput::new(tmpbuf.0.as_ptr()); - algorithm.check_utf8(simd_input); - } - algorithm.check_incomplete_pending(); - if algorithm.has_error() { - Err(basic::Utf8Error {}) - } else { - Ok(()) - } - } - - /// Validation implementation for CPUs supporting the SIMD extension (see module). - /// - /// # Errors - /// Returns [`compat::Utf8Error`] with detailed error information on failure. - /// - /// # Safety - /// This function is inherently unsafe because it is compiled with SIMD extensions - /// enabled. Make sure that the CPU supports it before calling. - /// - $(#[$feat])* - #[inline] - pub unsafe fn validate_utf8_compat( - input: &[u8], - ) -> core::result::Result<(), compat::Utf8Error> { - validate_utf8_compat_simd0(input) - .map_err(|idx| crate::implementation::helpers::get_compat_error(input, idx)) - } - - $(#[$feat])* - #[inline] - #[expect(clippy::redundant_else)] // more readable - unsafe fn validate_utf8_compat_simd0(input: &[u8]) -> core::result::Result<(), usize> { - use crate::implementation::helpers::SIMD_CHUNK_SIZE; - let len = input.len(); - let mut algorithm = Utf8CheckAlgorithm::::default(); - let mut idx: usize = 0; - let mut only_ascii = true; - let iter_lim = len - (len % SIMD_CHUNK_SIZE); - - 'outer: loop { - if only_ascii { - while idx < iter_lim { - let simd_input = SimdInput::new(input.as_ptr().add(idx as usize)); - if !simd_input.is_ascii() { - algorithm.check_block(simd_input); - if algorithm.has_error() { - return Err(idx); - } else { - only_ascii = false; - idx += SIMD_CHUNK_SIZE; - continue 'outer; - } - } - idx += SIMD_CHUNK_SIZE; - } - } else { - while idx < iter_lim { - let simd_input = SimdInput::new(input.as_ptr().add(idx as usize)); - if simd_input.is_ascii() { - algorithm.check_incomplete_pending(); - if algorithm.has_error() { - return Err(idx); - } else { - // we are in pure ASCII territory again - only_ascii = true; - idx += SIMD_CHUNK_SIZE; - continue 'outer; - } - } else { - algorithm.check_block(simd_input); - if algorithm.has_error() { - return Err(idx); - } - } - idx += SIMD_CHUNK_SIZE; - } - } - break; - } - if idx < len { - let mut tmpbuf = TempSimdChunk::new(); - crate::implementation::helpers::memcpy_unaligned_nonoverlapping_inline_opt_lt_64( - input.as_ptr().add(idx), - tmpbuf.0.as_mut_ptr(), - len - idx, - ); - let simd_input = SimdInput::new(tmpbuf.0.as_ptr()); - - algorithm.check_utf8(simd_input); - } - algorithm.check_incomplete_pending(); - if algorithm.has_error() { - Err(idx) - } else { - Ok(()) - } - } - - /// Low-level implementation of the [`basic::imp::Utf8Validator`] trait. - /// - /// This is implementation requires CPU SIMD features specified by the module it resides in. - /// It is undefined behavior to call it if the required CPU features are not - /// available. - #[cfg(feature = "public_imp")] - pub struct Utf8ValidatorImp { - algorithm: Utf8CheckAlgorithm, - incomplete_data: [u8; 64], - incomplete_len: usize, - } - - #[cfg(feature = "public_imp")] - impl Utf8ValidatorImp { - $(#[$feat])* - #[inline] - unsafe fn update_from_incomplete_data(&mut self) { - let simd_input = SimdInput::new(self.incomplete_data.as_ptr()); - self.algorithm.check_utf8(simd_input); - self.incomplete_len = 0; - } - } - - #[cfg(feature = "public_imp")] - impl basic::imp::Utf8Validator for Utf8ValidatorImp { - $(#[$feat])* - #[inline] - #[must_use] - unsafe fn new() -> Self { - Self { - algorithm: Utf8CheckAlgorithm::::default(), - incomplete_data: [0; 64], - incomplete_len: 0, - } - } - - $(#[$feat])* - #[inline] - unsafe fn update(&mut self, mut input: &[u8]) { - use crate::implementation::helpers::SIMD_CHUNK_SIZE; - if input.is_empty() { - return; - } - if self.incomplete_len != 0 { - let to_copy = - core::cmp::min(SIMD_CHUNK_SIZE - self.incomplete_len, input.len()); - self.incomplete_data - .as_mut_ptr() - .add(self.incomplete_len) - .copy_from_nonoverlapping(input.as_ptr(), to_copy); - if self.incomplete_len + to_copy == SIMD_CHUNK_SIZE { - self.update_from_incomplete_data(); - input = &input[to_copy..]; - } else { - self.incomplete_len += to_copy; - return; - } - } - let len = input.len(); - let mut idx: usize = 0; - let iter_lim = len - (len % SIMD_CHUNK_SIZE); - while idx < iter_lim { - let input = SimdInput::new(input.as_ptr().add(idx as usize)); - self.algorithm.check_utf8(input); - idx += SIMD_CHUNK_SIZE; - } - if idx < len { - let to_copy = len - idx; - self.incomplete_data - .as_mut_ptr() - .copy_from_nonoverlapping(input.as_ptr().add(idx), to_copy); - self.incomplete_len = to_copy; - } - } - - $(#[$feat])* - #[inline] - unsafe fn finalize(mut self) -> core::result::Result<(), basic::Utf8Error> { - if self.incomplete_len != 0 { - for i in &mut self.incomplete_data[self.incomplete_len..] { - *i = 0; - } - self.update_from_incomplete_data(); - } - self.algorithm.check_incomplete_pending(); - if self.algorithm.has_error() { - Err(basic::Utf8Error {}) - } else { - Ok(()) - } - } - } - - /// Low-level implementation of the [`basic::imp::ChunkedUtf8Validator`] trait. - /// - /// This is implementation requires CPU SIMD features specified by the module it resides in. - /// It is undefined behavior to call it if the required CPU features are not - /// available. - #[cfg(feature = "public_imp")] - pub struct ChunkedUtf8ValidatorImp { - algorithm: Utf8CheckAlgorithm, - } - - #[cfg(feature = "public_imp")] - impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp { - $(#[$feat])* - #[inline] - #[must_use] - unsafe fn new() -> Self { - Self { - algorithm: Utf8CheckAlgorithm::::default(), - } - } - - $(#[$feat])* - #[inline] - unsafe fn update_from_chunks(&mut self, input: &[u8]) { - use crate::implementation::helpers::SIMD_CHUNK_SIZE; - - assert!( - input.len() % SIMD_CHUNK_SIZE == 0, - "Input size must be a multiple of 64." - ); - for chunk in input.chunks_exact(SIMD_CHUNK_SIZE) { - let input = SimdInput::new(chunk.as_ptr()); - self.algorithm.check_utf8(input); - } - } - - $(#[$feat])* - #[inline] - unsafe fn finalize( - mut self, - remaining_input: core::option::Option<&[u8]>, - ) -> core::result::Result<(), basic::Utf8Error> { - use crate::implementation::helpers::SIMD_CHUNK_SIZE; - - if let Some(mut remaining_input) = remaining_input { - if !remaining_input.is_empty() { - let len = remaining_input.len(); - let chunks_lim = len - (len % SIMD_CHUNK_SIZE); - if chunks_lim > 0 { - self.update_from_chunks(&remaining_input[..chunks_lim]); - } - let rem = len - chunks_lim; - if rem > 0 { - remaining_input = &remaining_input[chunks_lim..]; - let mut tmpbuf = TempSimdChunk::new(); - tmpbuf.0.as_mut_ptr().copy_from_nonoverlapping( - remaining_input.as_ptr(), - remaining_input.len(), - ); - let simd_input = SimdInput::new(tmpbuf.0.as_ptr()); - self.algorithm.check_utf8(simd_input); - } - } - } - self.algorithm.check_incomplete_pending(); - if self.algorithm.has_error() { - Err(basic::Utf8Error {}) - } else { - Ok(()) - } - } - } - }; -} - -macro_rules! simd_input_128_bit { - ($(#[$feat:meta])*) => { - #[repr(C)] - struct SimdInput { - vals: [SimdU8Value; 4], - } - - impl SimdInput { - $(#[$feat])* - #[inline] - unsafe fn new(ptr: *const u8) -> Self { - Self { - vals: [ - SimdU8Value::load_from(ptr), - SimdU8Value::load_from(ptr.add(16)), - SimdU8Value::load_from(ptr.add(32)), - SimdU8Value::load_from(ptr.add(48)), - ], - } - } - - $(#[$feat])* - #[inline] - unsafe fn is_ascii(&self) -> bool { - let r1 = self.vals[0].or(self.vals[1]); - let r2 = self.vals[2].or(self.vals[3]); - let r = r1.or(r2); - r.is_ascii() - } - } - }; -} - -macro_rules! simd_input_256_bit { - ($(#[$feat:meta])*) => { - #[repr(C)] - struct SimdInput { - vals: [SimdU8Value; 2], - } - - impl SimdInput { - $(#[$feat])* - #[inline] - unsafe fn new(ptr: *const u8) -> Self { - Self { - vals: [ - SimdU8Value::load_from(ptr), - SimdU8Value::load_from(ptr.add(32)), - ], - } - } - - $(#[$feat])* - #[inline] - unsafe fn is_ascii(&self) -> bool { - self.vals[0].or(self.vals[1]).is_ascii() - } - } - }; -} diff --git a/portable/src/implementation/mod.rs b/portable/src/implementation/mod.rs index fb8ac5e8..c8345730 100644 --- a/portable/src/implementation/mod.rs +++ b/portable/src/implementation/mod.rs @@ -1,9 +1,5 @@ //! Contains UTF-8 validation implementations. -#[macro_use] -#[allow(unused_macros)] // only used if there is a SIMD implementation -mod algorithm; - pub(crate) mod helpers; // UTF-8 validation function types diff --git a/portable/src/implementation/portable/algorithm_safe.rs b/portable/src/implementation/portable/algorithm_safe.rs index 384a41a3..a34b4f9d 100644 --- a/portable/src/implementation/portable/algorithm_safe.rs +++ b/portable/src/implementation/portable/algorithm_safe.rs @@ -654,6 +654,15 @@ where } } +/// Validation implementation for CPUs supporting the SIMD extension (see module). +/// +/// # Errors +/// Returns the zero-sized [`basic::Utf8Error`] on failure. +/// +/// # Safety +/// This function is inherently unsafe because it is compiled with SIMD extensions +/// enabled. Make sure that the CPU supports it before calling. +/// #[inline] pub fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), basic::Utf8Error> { Utf8CheckAlgorithm::<16, 4>::validate_utf8_basic(input) @@ -674,6 +683,94 @@ pub fn validate_utf8_compat(input: &[u8]) -> core::result::Result<(), compat::Ut .map_err(|idx| crate::implementation::helpers::get_compat_error(input, idx)) } +/// Low-level implementation of the [`basic::imp::Utf8Validator`] trait. +/// +/// This is implementation requires CPU SIMD features specified by the module it resides in. +/// It is undefined behavior to call it if the required CPU features are not +/// available. +#[cfg(feature = "public_imp")] +pub struct Utf8ValidatorImp { + algorithm: Utf8CheckAlgorithm<16, 4>, + incomplete_data: [u8; 64], + incomplete_len: usize, +} + +#[cfg(feature = "public_imp")] +impl Utf8ValidatorImp { + #[inline] + unsafe fn update_from_incomplete_data(&mut self) { + let simd_input = SimdInput::new(&self.incomplete_data); + self.algorithm.check_utf8(&simd_input); + self.incomplete_len = 0; + } +} + +#[cfg(feature = "public_imp")] +impl basic::imp::Utf8Validator for Utf8ValidatorImp { + #[inline] + #[must_use] + unsafe fn new() -> Self { + Self { + algorithm: Utf8CheckAlgorithm::<16, 4>::new(), + incomplete_data: [0; 64], + incomplete_len: 0, + } + } + + #[inline] + unsafe fn update(&mut self, mut input: &[u8]) { + use crate::implementation::helpers::SIMD_CHUNK_SIZE; + if input.is_empty() { + return; + } + if self.incomplete_len != 0 { + let to_copy = core::cmp::min(SIMD_CHUNK_SIZE - self.incomplete_len, input.len()); + self.incomplete_data + .as_mut_ptr() + .add(self.incomplete_len) + .copy_from_nonoverlapping(input.as_ptr(), to_copy); + if self.incomplete_len + to_copy == SIMD_CHUNK_SIZE { + self.update_from_incomplete_data(); + input = &input[to_copy..]; + } else { + self.incomplete_len += to_copy; + return; + } + } + let len = input.len(); + let mut idx: usize = 0; + let iter_lim = len - (len % SIMD_CHUNK_SIZE); + while idx < iter_lim { + let input = SimdInput::new(&input[idx..idx + SIMD_CHUNK_SIZE]); + self.algorithm.check_utf8(&input); + idx += SIMD_CHUNK_SIZE; + } + if idx < len { + let to_copy = len - idx; + self.incomplete_data + .as_mut_ptr() + .copy_from_nonoverlapping(input.as_ptr().add(idx), to_copy); + self.incomplete_len = to_copy; + } + } + + #[inline] + unsafe fn finalize(mut self) -> core::result::Result<(), basic::Utf8Error> { + if self.incomplete_len != 0 { + for i in &mut self.incomplete_data[self.incomplete_len..] { + *i = 0; + } + self.update_from_incomplete_data(); + } + self.algorithm.check_incomplete_pending(); + if self.algorithm.has_error() { + Err(basic::Utf8Error {}) + } else { + Ok(()) + } + } +} + /// Low-level implementation of the [`basic::imp::ChunkedUtf8Validator`] trait. /// /// This is implementation requires CPU SIMD features specified by the module it resides in. diff --git a/portable/src/implementation/portable/simd128.rs b/portable/src/implementation/portable/simd128.rs index 2f0da488..fbd4b4e9 100644 --- a/portable/src/implementation/portable/simd128.rs +++ b/portable/src/implementation/portable/simd128.rs @@ -1,235 +1,4 @@ -//! Contains the portable SIMD UTF-8 validation implementation. - -#![allow(clippy::too_many_arguments)] -#![allow(clippy::needless_pass_by_value, clippy::pedantic, clippy::all)] -use crate::implementation::helpers::Utf8CheckAlgorithm; -use core::simd::prelude::*; -use core::simd::{simd_swizzle, u8x16}; - -// Portable SIMD primitives -type SimdU8Value = crate::implementation::helpers::SimdU8Value; - -impl SimdU8Value { - #[inline] - fn from_32_cut_off_leading( - _v0: u8, - _v1: u8, - _v2: u8, - _v3: u8, - _v4: u8, - _v5: u8, - _v6: u8, - _v7: u8, - _v8: u8, - _v9: u8, - _v10: u8, - _v11: u8, - _v12: u8, - _v13: u8, - _v14: u8, - _v15: u8, - v16: u8, - v17: u8, - v18: u8, - v19: u8, - v20: u8, - v21: u8, - v22: u8, - v23: u8, - v24: u8, - v25: u8, - v26: u8, - v27: u8, - v28: u8, - v29: u8, - v30: u8, - v31: u8, - ) -> Self { - Self::from(u8x16::from_array([ - v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, - ])) - } - - #[inline] - fn repeat_16( - v0: u8, - v1: u8, - v2: u8, - v3: u8, - v4: u8, - v5: u8, - v6: u8, - v7: u8, - v8: u8, - v9: u8, - v10: u8, - v11: u8, - v12: u8, - v13: u8, - v14: u8, - v15: u8, - ) -> Self { - Self::from(u8x16::from_array([ - v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, - ])) - } - - #[inline] - unsafe fn load_from(ptr: *const u8) -> Self { - Self::from(ptr.cast::().read_unaligned()) - } - - #[inline] - fn lookup_16( - self, - v0: u8, - v1: u8, - v2: u8, - v3: u8, - v4: u8, - v5: u8, - v6: u8, - v7: u8, - v8: u8, - v9: u8, - v10: u8, - v11: u8, - v12: u8, - v13: u8, - v14: u8, - v15: u8, - ) -> Self { - // We need to ensure that 'self' only contains the lower 4 bits, unlike the avx instruction - // this will otherwise lead to bad results - let idx: u8x16 = self.0; - let src: u8x16 = Self::repeat_16( - v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, - ) - .0; - let res = src.swizzle_dyn(idx); - Self::from(res) - } - - #[inline] - fn splat(val: u8) -> Self { - #[allow(clippy::cast_possible_wrap)] - Self::from(u8x16::splat(val)) - } - - #[inline] - fn splat0() -> Self { - Self::from(u8x16::splat(0)) - } - - #[inline] - fn or(self, b: Self) -> Self { - Self::from(self.0 | b.0) - } - - #[inline] - fn and(self, b: Self) -> Self { - Self::from(self.0 & b.0) - } - - #[inline] - fn xor(self, b: Self) -> Self { - Self::from(self.0 ^ b.0) - } - - #[inline] - fn saturating_sub(self, b: Self) -> Self { - Self::from(self.0.saturating_sub(b.0)) - } - - // ugly but shr requires const generics - #[inline] - fn shr4(self) -> Self { - Self::from(self.0 >> 4) - } - - #[inline] - fn prev1(self, prev: Self) -> Self { - Self::from(simd_swizzle!( - self.0, - prev.0, - [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,] - )) - } - - // ugly but prev requires const generics - #[inline] - fn prev2(self, prev: Self) -> Self { - Self::from(simd_swizzle!( - self.0, - prev.0, - [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,] - )) - } - - // ugly but prev requires const generics - #[inline] - fn prev3(self, prev: Self) -> Self { - Self::from(simd_swizzle!( - self.0, - prev.0, - [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,] - )) - } - - #[inline] - fn unsigned_gt(self, other: Self) -> Self { - let gt = self.0.simd_gt(other.0).to_int(); - Self::from(gt.cast()) - } - - #[inline] - fn any_bit_set(self) -> bool { - if HAS_FAST_REDUCE_MAX { - self.0.reduce_max() != 0 - } else { - self.0 != u8x16::splat(0) - } - } - - #[inline] - fn is_ascii(self) -> bool { - if HAS_FAST_REDUCE_MAX { - self.0.reduce_max() < 0b1000_0000 - } else { - (self.0 & u8x16::splat(0b1000_0000)) == u8x16::splat(0) - } - } -} - -impl From for SimdU8Value { - #[inline] - fn from(val: u8x16) -> Self { - Self(val) - } -} - -impl Utf8CheckAlgorithm { - #[inline] - fn must_be_2_3_continuation(prev2: SimdU8Value, prev3: SimdU8Value) -> SimdU8Value { - let is_third_byte = prev2.unsigned_gt(SimdU8Value::splat(0b1110_0000 - 1)); - let is_fourth_byte = prev3.unsigned_gt(SimdU8Value::splat(0b1111_0000 - 1)); - - is_third_byte.or(is_fourth_byte) - } -} - -#[cfg(all( - any(target_arch = "aarch64", target_arch = "arm"), - target_feature = "neon" -))] -const HAS_FAST_REDUCE_MAX: bool = true; - -#[cfg(not(all( - any(target_arch = "aarch64", target_arch = "arm"), - target_feature = "neon" -)))] -const HAS_FAST_REDUCE_MAX: bool = false; - -use crate::implementation::helpers::TempSimdChunkA16 as TempSimdChunk; -simd_input_128_bit!(); -algorithm_simd!(); +pub use super::algorithm_safe::validate_utf8_basic; +pub use super::algorithm_safe::validate_utf8_compat; +pub use super::algorithm_safe::ChunkedUtf8ValidatorImp; +pub use super::algorithm_safe::Utf8ValidatorImp; diff --git a/portable/src/implementation/portable/simd256.rs b/portable/src/implementation/portable/simd256.rs index 3dc7c0f0..8d4986e7 100644 --- a/portable/src/implementation/portable/simd256.rs +++ b/portable/src/implementation/portable/simd256.rs @@ -1,230 +1,5 @@ -//! Contains the portable SIMD UTF-8 validation implementation. - -#![allow(clippy::too_many_arguments)] -#![allow(clippy::needless_pass_by_value, clippy::pedantic, clippy::all)] -use crate::implementation::helpers::Utf8CheckAlgorithm; -use core::simd::prelude::*; -use core::simd::{simd_swizzle, u8x32}; - -// Portable SIMD primitives -type SimdU8Value = crate::implementation::helpers::SimdU8Value; - -impl SimdU8Value { - #[inline] - fn from_32_cut_off_leading( - v0: u8, - v1: u8, - v2: u8, - v3: u8, - v4: u8, - v5: u8, - v6: u8, - v7: u8, - v8: u8, - v9: u8, - v10: u8, - v11: u8, - v12: u8, - v13: u8, - v14: u8, - v15: u8, - v16: u8, - v17: u8, - v18: u8, - v19: u8, - v20: u8, - v21: u8, - v22: u8, - v23: u8, - v24: u8, - v25: u8, - v26: u8, - v27: u8, - v28: u8, - v29: u8, - v30: u8, - v31: u8, - ) -> Self { - Self::from(u8x32::from_array([ - v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, - v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, - ])) - } - - #[inline] - fn repeat_16( - v0: u8, - v1: u8, - v2: u8, - v3: u8, - v4: u8, - v5: u8, - v6: u8, - v7: u8, - v8: u8, - v9: u8, - v10: u8, - v11: u8, - v12: u8, - v13: u8, - v14: u8, - v15: u8, - ) -> Self { - Self::from_32_cut_off_leading( - v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v0, v1, v2, v3, - v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, - ) - } - - #[inline] - unsafe fn load_from(ptr: *const u8) -> Self { - Self::from(ptr.cast::().read_unaligned()) - } - - #[inline] - fn lookup_16( - self, - v0: u8, - v1: u8, - v2: u8, - v3: u8, - v4: u8, - v5: u8, - v6: u8, - v7: u8, - v8: u8, - v9: u8, - v10: u8, - v11: u8, - v12: u8, - v13: u8, - v14: u8, - v15: u8, - ) -> Self { - // We need to ensure that 'self' only contains the lower 4 bits, unlike the avx instruction - // this will otherwise lead to bad results - let idx: u8x32 = self.0.cast(); - let src: u8x32 = Self::repeat_16( - v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, - ) - .0 - .cast(); - let res = src.swizzle_dyn(idx); - Self::from(res.cast()) - } - - #[inline] - fn splat(val: u8) -> Self { - #[allow(clippy::cast_possible_wrap)] - Self::from(u8x32::splat(val)) - } - - #[inline] - fn splat0() -> Self { - Self::from(u8x32::splat(0)) - } - - #[inline] - fn or(self, b: Self) -> Self { - Self::from(self.0 | b.0) - } - - #[inline] - fn and(self, b: Self) -> Self { - Self::from(self.0 & b.0) - } - - #[inline] - fn xor(self, b: Self) -> Self { - Self::from(self.0 ^ b.0) - } - - #[inline] - fn saturating_sub(self, b: Self) -> Self { - Self::from(self.0.saturating_sub(b.0)) - } - - // ugly but shr requires const generics - #[inline] - fn shr4(self) -> Self { - Self::from(self.0 >> 4) - } - - #[inline] - fn prev1(self, prev: Self) -> Self { - Self::from(simd_swizzle!( - self.0, - prev.0, - [ - 63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, - 22, 23, 24, 25, 26, 27, 28, 29, 30, - ] - )) - } - - // ugly but prev requires const generics - #[inline] - fn prev2(self, prev: Self) -> Self { - Self::from(simd_swizzle!( - self.0, - prev.0, - [ - 62, 63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, 26, 27, 28, 29, - ] - )) - } - - // ugly but prev requires const generics - #[inline] - fn prev3(self, prev: Self) -> Self { - Self::from(simd_swizzle!( - self.0, - prev.0, - [ - 61, 62, 63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, - ] - )) - } - - #[inline] - fn unsigned_gt(self, other: Self) -> Self { - let gt = self.0.simd_gt(other.0).to_int(); - Self::from(gt.cast()) - } - - #[inline] - fn any_bit_set(self) -> bool { - self.0 != u8x32::splat(0) - } - - #[inline] - fn is_ascii(self) -> bool { - let significan_bits = self.0 & u8x32::from_array([0b1000_0000; 32]); - significan_bits == u8x32::from_array([0; 32]) - } -} - -impl From for SimdU8Value { - #[inline] - fn from(val: u8x32) -> Self { - Self(val) - } -} - -impl Utf8CheckAlgorithm { - #[inline] - fn must_be_2_3_continuation(prev2: SimdU8Value, prev3: SimdU8Value) -> SimdU8Value { - let is_third_byte = prev2.saturating_sub(SimdU8Value::splat(0b1110_0000 - 1)); - let is_fourth_byte = prev3.saturating_sub(SimdU8Value::splat(0b1111_0000 - 1)); - - is_third_byte - .or(is_fourth_byte) - .unsigned_gt(SimdU8Value::splat0()) - } -} - -use crate::implementation::helpers::TempSimdChunkA32 as TempSimdChunk; -simd_input_256_bit!(); -algorithm_simd!(); +// FIXME: 256 bit +pub use super::algorithm_safe::validate_utf8_basic; +pub use super::algorithm_safe::validate_utf8_compat; +pub use super::algorithm_safe::ChunkedUtf8ValidatorImp; +pub use super::algorithm_safe::Utf8ValidatorImp; From d5ed0642c111b5352e66b85f1bddee0576f43869 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Fri, 25 Oct 2024 07:25:10 +0200 Subject: [PATCH 42/83] forbid unsafe impl, restructure --- portable/src/basic.rs | 42 ++++----- portable/src/compat.rs | 20 ++--- portable/src/implementation/helpers.rs | 10 --- portable/src/implementation/mod.rs | 22 ++--- portable/src/implementation/portable/mod.rs | 5 -- .../src/implementation/portable/simd128.rs | 4 - .../src/implementation/portable/simd256.rs | 5 -- .../{portable/algorithm_safe.rs => simd.rs} | 57 ++++++------ portable/tests/tests.rs | 86 +++++++------------ 9 files changed, 102 insertions(+), 149 deletions(-) delete mode 100644 portable/src/implementation/portable/mod.rs delete mode 100644 portable/src/implementation/portable/simd128.rs delete mode 100644 portable/src/implementation/portable/simd256.rs rename portable/src/implementation/{portable/algorithm_safe.rs => simd.rs} (95%) diff --git a/portable/src/basic.rs b/portable/src/basic.rs index 7f167e12..ec4d4275 100644 --- a/portable/src/basic.rs +++ b/portable/src/basic.rs @@ -115,7 +115,7 @@ pub mod imp { /// This implementation requires CPU SIMD features specified by the module it resides in. /// It is undefined behavior to call it if the required CPU features are not available. #[must_use] - unsafe fn new() -> Self + fn new() -> Self where Self: Sized; @@ -124,7 +124,7 @@ pub mod imp { /// # Safety /// This implementation requires CPU SIMD features specified by the module it resides in. /// It is undefined behavior to call it if the required CPU features are not available. - unsafe fn update(&mut self, input: &[u8]); + fn update(&mut self, input: &[u8]); /// Finishes the validation and returns `Ok(())` if the input was valid UTF-8. /// @@ -135,7 +135,7 @@ pub mod imp { /// # Safety /// This implementation requires CPU SIMD features specified by the module it resides in. /// It is undefined behavior to call it if the required CPU features are not available. - unsafe fn finalize(self) -> core::result::Result<(), basic::Utf8Error>; + fn finalize(self) -> core::result::Result<(), basic::Utf8Error>; } /// Like [`Utf8Validator`] this low-level API is for streaming validation of UTF-8 data. @@ -162,7 +162,7 @@ pub mod imp { /// This implementation requires CPU SIMD features specified by the module it resides in. /// It is undefined behavior to call it if the required CPU features are not available. #[must_use] - unsafe fn new() -> Self + fn new() -> Self where Self: Sized; @@ -174,7 +174,7 @@ pub mod imp { /// # Safety /// This implementation requires CPU SIMD features specified by the module it resides in. /// It is undefined behavior to call it if the required CPU features are not available. - unsafe fn update_from_chunks(&mut self, input: &[u8]); + fn update_from_chunks(&mut self, input: &[u8]); /// Updates the validator with remaining input if any. There is no restriction on the /// data provided. @@ -188,26 +188,26 @@ pub mod imp { /// # Safety /// This implementation requires CPU SIMD features specified by the module it resides in. /// It is undefined behavior to call it if the required CPU features are not available. - unsafe fn finalize( + fn finalize( self, remaining_input: core::option::Option<&[u8]>, ) -> core::result::Result<(), basic::Utf8Error>; } - /// Includes the portable SIMD implementations. - pub mod portable { - /// Includes the validation implementation using 128-bit portable SIMD. - pub mod simd128 { - pub use crate::implementation::portable::simd128::validate_utf8_basic as validate_utf8; - pub use crate::implementation::portable::simd128::ChunkedUtf8ValidatorImp; - pub use crate::implementation::portable::simd128::Utf8ValidatorImp; - } - - /// Includes the validation implementation using 256-bit portable SIMD. - pub mod simd256 { - pub use crate::implementation::portable::simd256::validate_utf8_basic as validate_utf8; - pub use crate::implementation::portable::simd256::ChunkedUtf8ValidatorImp; - pub use crate::implementation::portable::simd256::Utf8ValidatorImp; - } + /// Best for current target + pub use v128 as auto; + + /// Includes the validation implementation using 128-bit portable SIMD. + pub mod v128 { + pub use crate::implementation::simd::v128::validate_utf8_basic as validate_utf8; + pub use crate::implementation::simd::v128::ChunkedUtf8ValidatorImp; + pub use crate::implementation::simd::v128::Utf8ValidatorImp; + } + + /// Includes the validation implementation using 256-bit portable SIMD. + pub mod v256 { + pub use crate::implementation::simd::v256::validate_utf8_basic as validate_utf8; + pub use crate::implementation::simd::v256::ChunkedUtf8ValidatorImp; + pub use crate::implementation::simd::v256::Utf8ValidatorImp; } } diff --git a/portable/src/compat.rs b/portable/src/compat.rs index 9a043a7e..4d09998f 100644 --- a/portable/src/compat.rs +++ b/portable/src/compat.rs @@ -102,16 +102,16 @@ pub fn from_utf8_mut(input: &mut [u8]) -> Result<&mut str, Utf8Error> { /// Allows direct access to the platform-specific unsafe validation implementations. #[cfg(feature = "public_imp")] pub mod imp { - /// FIXME: add docs - pub mod portable { - /// Includes the validation implementation for 128-bit portable SIMD. - pub mod simd128 { - pub use crate::implementation::portable::simd128::validate_utf8_compat as validate_utf8; - } + /// Best for current target FIXME: 256-bit support + pub use v128 as auto; - /// Includes the validation implementation for 256-bit portable SIMD. - pub mod simd256 { - pub use crate::implementation::portable::simd256::validate_utf8_compat as validate_utf8; - } + /// Includes the validation implementation for 128-bit portable SIMD. + pub mod v128 { + pub use crate::implementation::simd::v128::validate_utf8_compat as validate_utf8; + } + + /// Includes the validation implementation for 256-bit portable SIMD. + pub mod v256 { + pub use crate::implementation::simd::v256::validate_utf8_compat as validate_utf8; } } diff --git a/portable/src/implementation/helpers.rs b/portable/src/implementation/helpers.rs index 34b886af..42b132d3 100644 --- a/portable/src/implementation/helpers.rs +++ b/portable/src/implementation/helpers.rs @@ -37,16 +37,6 @@ pub(crate) fn get_compat_error(input: &[u8], failing_block_pos: usize) -> Utf8Er validate_utf8_at_offset(input, offset).unwrap_err() } -#[allow(dead_code)] // only used if there is a SIMD implementation -#[inline] -pub(crate) unsafe fn memcpy_unaligned_nonoverlapping_inline_opt_lt_64( - src: *const u8, - dest: *mut u8, - len: usize, -) { - src.copy_to_nonoverlapping(dest, len); -} - pub(crate) const SIMD_CHUNK_SIZE: usize = 64; #[repr(C, align(32))] diff --git a/portable/src/implementation/mod.rs b/portable/src/implementation/mod.rs index c8345730..38fe56eb 100644 --- a/portable/src/implementation/mod.rs +++ b/portable/src/implementation/mod.rs @@ -1,12 +1,12 @@ //! Contains UTF-8 validation implementations. -pub(crate) mod helpers; +#![forbid(unsafe_code)] -// UTF-8 validation function types -pub(crate) mod portable; +pub(crate) mod helpers; +pub(crate) mod simd; #[inline] -pub(crate) unsafe fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { +pub(crate) fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { if input.len() < helpers::SIMD_CHUNK_SIZE { return validate_utf8_basic_fallback(input); } @@ -15,15 +15,15 @@ pub(crate) unsafe fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basi } #[inline(never)] -unsafe fn validate_utf8_basic_simd(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { +fn validate_utf8_basic_simd(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { #[cfg(not(feature = "simd256"))] - return portable::algorithm_safe::validate_utf8_basic(input); + return simd::v128::validate_utf8_basic(input); #[cfg(feature = "simd256")] - return portable::simd256::validate_utf8_basic(input); + return simd::v256::validate_utf8_basic(input); } #[inline] -pub(crate) unsafe fn validate_utf8_compat(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { +pub(crate) fn validate_utf8_compat(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { if input.len() < helpers::SIMD_CHUNK_SIZE { return validate_utf8_compat_fallback(input); } @@ -31,11 +31,11 @@ pub(crate) unsafe fn validate_utf8_compat(input: &[u8]) -> Result<(), crate::com validate_utf8_compat_simd(input) } -unsafe fn validate_utf8_compat_simd(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { +fn validate_utf8_compat_simd(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { #[cfg(not(feature = "simd256"))] - return portable::algorithm_safe::validate_utf8_compat(input); + return simd::v128::validate_utf8_compat(input); #[cfg(feature = "simd256")] - return portable::simd256::validate_utf8_compat(input); + return simd::v256::validate_utf8_compat(input); } // fallback method implementations diff --git a/portable/src/implementation/portable/mod.rs b/portable/src/implementation/portable/mod.rs deleted file mode 100644 index ec6c7b5a..00000000 --- a/portable/src/implementation/portable/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -pub(crate) mod algorithm_safe; -#[cfg(any(not(feature = "simd256"), feature = "public_imp"))] -pub(crate) mod simd128; -#[cfg(any(feature = "simd256", feature = "public_imp"))] -pub(crate) mod simd256; diff --git a/portable/src/implementation/portable/simd128.rs b/portable/src/implementation/portable/simd128.rs deleted file mode 100644 index fbd4b4e9..00000000 --- a/portable/src/implementation/portable/simd128.rs +++ /dev/null @@ -1,4 +0,0 @@ -pub use super::algorithm_safe::validate_utf8_basic; -pub use super::algorithm_safe::validate_utf8_compat; -pub use super::algorithm_safe::ChunkedUtf8ValidatorImp; -pub use super::algorithm_safe::Utf8ValidatorImp; diff --git a/portable/src/implementation/portable/simd256.rs b/portable/src/implementation/portable/simd256.rs deleted file mode 100644 index 8d4986e7..00000000 --- a/portable/src/implementation/portable/simd256.rs +++ /dev/null @@ -1,5 +0,0 @@ -// FIXME: 256 bit -pub use super::algorithm_safe::validate_utf8_basic; -pub use super::algorithm_safe::validate_utf8_compat; -pub use super::algorithm_safe::ChunkedUtf8ValidatorImp; -pub use super::algorithm_safe::Utf8ValidatorImp; diff --git a/portable/src/implementation/portable/algorithm_safe.rs b/portable/src/implementation/simd.rs similarity index 95% rename from portable/src/implementation/portable/algorithm_safe.rs rename to portable/src/implementation/simd.rs index a34b4f9d..4fc1bf81 100644 --- a/portable/src/implementation/portable/algorithm_safe.rs +++ b/portable/src/implementation/simd.rs @@ -658,11 +658,6 @@ where /// /// # Errors /// Returns the zero-sized [`basic::Utf8Error`] on failure. -/// -/// # Safety -/// This function is inherently unsafe because it is compiled with SIMD extensions -/// enabled. Make sure that the CPU supports it before calling. -/// #[inline] pub fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), basic::Utf8Error> { Utf8CheckAlgorithm::<16, 4>::validate_utf8_basic(input) @@ -672,11 +667,6 @@ pub fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), basic::Utf8 /// /// # Errors /// Returns [`compat::Utf8Error`] with detailed error information on failure. -/// -/// # Safety -/// This function is inherently unsafe because it is compiled with SIMD extensions -/// enabled. Make sure that the CPU supports it before calling. -/// #[inline] pub fn validate_utf8_compat(input: &[u8]) -> core::result::Result<(), compat::Utf8Error> { Utf8CheckAlgorithm::<16, 4>::validate_utf8_compat_simd0(input) @@ -698,7 +688,7 @@ pub struct Utf8ValidatorImp { #[cfg(feature = "public_imp")] impl Utf8ValidatorImp { #[inline] - unsafe fn update_from_incomplete_data(&mut self) { + fn update_from_incomplete_data(&mut self) { let simd_input = SimdInput::new(&self.incomplete_data); self.algorithm.check_utf8(&simd_input); self.incomplete_len = 0; @@ -709,7 +699,7 @@ impl Utf8ValidatorImp { impl basic::imp::Utf8Validator for Utf8ValidatorImp { #[inline] #[must_use] - unsafe fn new() -> Self { + fn new() -> Self { Self { algorithm: Utf8CheckAlgorithm::<16, 4>::new(), incomplete_data: [0; 64], @@ -718,17 +708,15 @@ impl basic::imp::Utf8Validator for Utf8ValidatorImp { } #[inline] - unsafe fn update(&mut self, mut input: &[u8]) { + fn update(&mut self, mut input: &[u8]) { use crate::implementation::helpers::SIMD_CHUNK_SIZE; if input.is_empty() { return; } if self.incomplete_len != 0 { let to_copy = core::cmp::min(SIMD_CHUNK_SIZE - self.incomplete_len, input.len()); - self.incomplete_data - .as_mut_ptr() - .add(self.incomplete_len) - .copy_from_nonoverlapping(input.as_ptr(), to_copy); + self.incomplete_data[self.incomplete_len..self.incomplete_len + to_copy] + .copy_from_slice(&input[..to_copy]); if self.incomplete_len + to_copy == SIMD_CHUNK_SIZE { self.update_from_incomplete_data(); input = &input[to_copy..]; @@ -747,15 +735,13 @@ impl basic::imp::Utf8Validator for Utf8ValidatorImp { } if idx < len { let to_copy = len - idx; - self.incomplete_data - .as_mut_ptr() - .copy_from_nonoverlapping(input.as_ptr().add(idx), to_copy); + self.incomplete_data[..to_copy].copy_from_slice(&input[idx..idx + to_copy]); self.incomplete_len = to_copy; } } #[inline] - unsafe fn finalize(mut self) -> core::result::Result<(), basic::Utf8Error> { + fn finalize(mut self) -> core::result::Result<(), basic::Utf8Error> { if self.incomplete_len != 0 { for i in &mut self.incomplete_data[self.incomplete_len..] { *i = 0; @@ -785,14 +771,14 @@ pub struct ChunkedUtf8ValidatorImp { impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp { #[inline] #[must_use] - unsafe fn new() -> Self { + fn new() -> Self { Self { algorithm: Utf8CheckAlgorithm::<16, 4>::new(), } } #[inline] - unsafe fn update_from_chunks(&mut self, input: &[u8]) { + fn update_from_chunks(&mut self, input: &[u8]) { use crate::implementation::helpers::SIMD_CHUNK_SIZE; assert!( @@ -806,7 +792,7 @@ impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp { } #[inline] - unsafe fn finalize( + fn finalize( mut self, remaining_input: core::option::Option<&[u8]>, ) -> core::result::Result<(), basic::Utf8Error> { @@ -823,10 +809,7 @@ impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp { if rem > 0 { remaining_input = &remaining_input[chunks_lim..]; let mut tmpbuf = TempSimdChunk::new(); - tmpbuf - .0 - .as_mut_ptr() - .copy_from_nonoverlapping(remaining_input.as_ptr(), remaining_input.len()); + tmpbuf.0[..remaining_input.len()].copy_from_slice(remaining_input); let simd_input = SimdInput::new(&tmpbuf.0); self.algorithm.check_utf8(&simd_input); } @@ -840,3 +823,21 @@ impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp { } } } + +pub(crate) mod v128 { + pub use super::validate_utf8_basic; + pub use super::validate_utf8_compat; + #[cfg(feature = "public_imp")] + pub use super::ChunkedUtf8ValidatorImp; + #[cfg(feature = "public_imp")] + pub use super::Utf8ValidatorImp; +} + +pub(crate) mod v256 { + pub use super::validate_utf8_basic; + pub use super::validate_utf8_compat; + #[cfg(feature = "public_imp")] + pub use super::ChunkedUtf8ValidatorImp; + #[cfg(feature = "public_imp")] + pub use super::Utf8ValidatorImp; +} diff --git a/portable/tests/tests.rs b/portable/tests/tests.rs index 40a6d850..96cfa9ee 100644 --- a/portable/tests/tests.rs +++ b/portable/tests/tests.rs @@ -69,76 +69,56 @@ mod public_imp { #[allow(unused_variables)] // nothing to do if not SIMD implementation is available pub(super) fn test_valid(input: &[u8]) { #[cfg(feature = "public_imp")] - unsafe { - assert!(simdutf8_portable::basic::imp::portable::simd128::validate_utf8(input).is_ok()); - assert!( - simdutf8_portable::compat::imp::portable::simd128::validate_utf8(input).is_ok() - ); + { + assert!(simdutf8_portable::basic::imp::v128::validate_utf8(input).is_ok()); + assert!(simdutf8_portable::compat::imp::v128::validate_utf8(input).is_ok()); - test_streaming::( + test_streaming::(input, true); + test_chunked_streaming::( input, true, ); - test_chunked_streaming::< - simdutf8_portable::basic::imp::portable::simd128::ChunkedUtf8ValidatorImp, - >(input, true); - assert!(simdutf8_portable::basic::imp::portable::simd256::validate_utf8(input).is_ok()); - assert!( - simdutf8_portable::compat::imp::portable::simd256::validate_utf8(input).is_ok() - ); + assert!(simdutf8_portable::basic::imp::v256::validate_utf8(input).is_ok()); + assert!(simdutf8_portable::compat::imp::v256::validate_utf8(input).is_ok()); - test_streaming::( + test_streaming::(input, true); + test_chunked_streaming::( input, true, ); - test_chunked_streaming::< - simdutf8_portable::basic::imp::portable::simd256::ChunkedUtf8ValidatorImp, - >(input, true); } } #[allow(unused_variables)] // nothing to do if not SIMD implementation is available pub(super) fn test_invalid(input: &[u8], valid_up_to: usize, error_len: Option) { #[cfg(feature = "public_imp")] - unsafe { - assert!( - simdutf8_portable::basic::imp::portable::simd128::validate_utf8(input).is_err() - ); - let err = simdutf8_portable::compat::imp::portable::simd128::validate_utf8(input) - .unwrap_err(); + { + assert!(simdutf8_portable::basic::imp::v128::validate_utf8(input).is_err()); + let err = simdutf8_portable::compat::imp::v128::validate_utf8(input).unwrap_err(); assert_eq!(err.valid_up_to(), valid_up_to); assert_eq!(err.error_len(), error_len); - test_streaming::( + test_streaming::(input, false); + test_chunked_streaming::( input, false, ); - test_chunked_streaming::< - simdutf8_portable::basic::imp::portable::simd128::ChunkedUtf8ValidatorImp, - >(input, false); - assert!( - simdutf8_portable::basic::imp::portable::simd256::validate_utf8(input).is_err() - ); - let err = simdutf8_portable::compat::imp::portable::simd256::validate_utf8(input) - .unwrap_err(); + assert!(simdutf8_portable::basic::imp::v256::validate_utf8(input).is_err()); + let err = simdutf8_portable::compat::imp::v256::validate_utf8(input).unwrap_err(); assert_eq!(err.valid_up_to(), valid_up_to); assert_eq!(err.error_len(), error_len); - test_streaming::( + test_streaming::(input, false); + test_chunked_streaming::( input, false, ); - test_chunked_streaming::< - simdutf8_portable::basic::imp::portable::simd256::ChunkedUtf8ValidatorImp, - >(input, false); } } #[allow(unused)] // not used if not SIMD implementation is available fn test_streaming(input: &[u8], ok: bool) { - unsafe { - let mut validator = T::new(); - validator.update(input); - assert_eq!(validator.finalize().is_ok(), ok); - } + let mut validator = T::new(); + validator.update(input); + assert_eq!(validator.finalize().is_ok(), ok); for i in [64, 128, 256, 1024, 65536, 1, 2, 3, 36, 99].iter() { test_streaming_blocks::(input, *i, ok) } @@ -150,13 +130,11 @@ mod public_imp { block_size: usize, ok: bool, ) { - unsafe { - let mut validator = T::new(); - for chunk in input.chunks(block_size) { - validator.update(chunk); - } - assert_eq!(validator.finalize().is_ok(), ok); + let mut validator = T::new(); + for chunk in input.chunks(block_size) { + validator.update(chunk); } + assert_eq!(validator.finalize().is_ok(), ok); } #[allow(unused)] // not used if not SIMD implementation is available @@ -177,21 +155,19 @@ mod public_imp { chunk_size: usize, ok: bool, ) { - unsafe { - let mut validator = T::new(); - let mut chunks = input.chunks_exact(chunk_size); - for chunk in &mut chunks { - validator.update_from_chunks(chunk); - } - assert_eq!(validator.finalize(Some(chunks.remainder())).is_ok(), ok); + let mut validator = T::new(); + let mut chunks = input.chunks_exact(chunk_size); + for chunk in &mut chunks { + validator.update_from_chunks(chunk); } + assert_eq!(validator.finalize(Some(chunks.remainder())).is_ok(), ok); } #[test] #[should_panic] fn test_neon_chunked_panic() { test_chunked_streaming_with_chunk_size::< - simdutf8_portable::basic::imp::portable::simd128::ChunkedUtf8ValidatorImp, + simdutf8_portable::basic::imp::v128::ChunkedUtf8ValidatorImp, >(b"abcd", 1, true); } } From dc02eac60c749c0a03982b5381f4a11cdad3dcd1 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Fri, 25 Oct 2024 07:29:45 +0200 Subject: [PATCH 43/83] cleanup --- portable/src/implementation/helpers.rs | 42 -------------------------- portable/src/implementation/mod.rs | 4 +-- portable/src/implementation/simd.rs | 11 ++----- 3 files changed, 5 insertions(+), 52 deletions(-) diff --git a/portable/src/implementation/helpers.rs b/portable/src/implementation/helpers.rs index 42b132d3..78d633f1 100644 --- a/portable/src/implementation/helpers.rs +++ b/portable/src/implementation/helpers.rs @@ -36,45 +36,3 @@ pub(crate) fn get_compat_error(input: &[u8], failing_block_pos: usize) -> Utf8Er // UNWRAP: safe because the SIMD UTF-8 validation found an error validate_utf8_at_offset(input, offset).unwrap_err() } - -pub(crate) const SIMD_CHUNK_SIZE: usize = 64; - -#[repr(C, align(32))] -#[allow(dead_code)] // only used if there is a SIMD implementation -pub(crate) struct Utf8CheckAlgorithm { - pub(crate) prev: T, - pub(crate) incomplete: T, - pub(crate) error: T, -} - -#[repr(C, align(16))] -#[allow(dead_code)] // only used if a 128-bit SIMD implementation is used -pub(crate) struct TempSimdChunkA16(pub(crate) [u8; SIMD_CHUNK_SIZE]); - -#[allow(dead_code)] // only used if there is a SIMD implementation -impl TempSimdChunkA16 { - #[expect(clippy::inline_always)] - #[inline(always)] // needs to be forced because otherwise it is not inlined on armv7 neo - pub(crate) const fn new() -> Self { - Self([0; SIMD_CHUNK_SIZE]) - } -} - -#[repr(C, align(32))] -#[allow(dead_code)] // only used if a 256-bit SIMD implementation is used -pub(crate) struct TempSimdChunkA32(pub(crate) [u8; SIMD_CHUNK_SIZE]); - -#[allow(dead_code)] // only used if there is a SIMD implementation -impl TempSimdChunkA32 { - #[expect(clippy::inline_always)] - #[inline(always)] // needs to be forced because otherwise it is not inlined on armv7 neo - pub(crate) const fn new() -> Self { - Self([0; SIMD_CHUNK_SIZE]) - } -} - -#[derive(Clone, Copy)] -#[allow(dead_code)] // only used if there is a SIMD implementation -pub(crate) struct SimdU8Value(pub(crate) T) -where - T: Copy; diff --git a/portable/src/implementation/mod.rs b/portable/src/implementation/mod.rs index 38fe56eb..00f3d553 100644 --- a/portable/src/implementation/mod.rs +++ b/portable/src/implementation/mod.rs @@ -7,7 +7,7 @@ pub(crate) mod simd; #[inline] pub(crate) fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { - if input.len() < helpers::SIMD_CHUNK_SIZE { + if input.len() < simd::SIMD_CHUNK_SIZE { return validate_utf8_basic_fallback(input); } @@ -24,7 +24,7 @@ fn validate_utf8_basic_simd(input: &[u8]) -> Result<(), crate::basic::Utf8Error> #[inline] pub(crate) fn validate_utf8_compat(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { - if input.len() < helpers::SIMD_CHUNK_SIZE { + if input.len() < simd::SIMD_CHUNK_SIZE { return validate_utf8_compat_fallback(input); } diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs index 4fc1bf81..776a3beb 100644 --- a/portable/src/implementation/simd.rs +++ b/portable/src/implementation/simd.rs @@ -4,7 +4,9 @@ use std::simd::{ simd_swizzle, u8x16, LaneCount, Simd, SupportedLaneCount, }; -use crate::{basic, compat, implementation::helpers::SIMD_CHUNK_SIZE}; +use crate::{basic, compat}; + +pub(crate) const SIMD_CHUNK_SIZE: usize = 64; #[cfg(all( any(target_arch = "aarch64", target_arch = "arm"), @@ -562,7 +564,6 @@ where /// #[inline] pub fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), basic::Utf8Error> { - use crate::implementation::helpers::SIMD_CHUNK_SIZE; let mut algorithm = Self::new(); let mut chunks = input.chunks_exact(SIMD_CHUNK_SIZE); for chunk in chunks.by_ref() { @@ -593,7 +594,6 @@ where #[inline] #[expect(clippy::redundant_else)] // more readable fn validate_utf8_compat_simd0(input: &[u8]) -> core::result::Result<(), usize> { - use crate::implementation::helpers::SIMD_CHUNK_SIZE; let mut algorithm = Self::new(); let mut idx = 0; let mut chunks = input.chunks_exact(SIMD_CHUNK_SIZE); @@ -709,7 +709,6 @@ impl basic::imp::Utf8Validator for Utf8ValidatorImp { #[inline] fn update(&mut self, mut input: &[u8]) { - use crate::implementation::helpers::SIMD_CHUNK_SIZE; if input.is_empty() { return; } @@ -779,8 +778,6 @@ impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp { #[inline] fn update_from_chunks(&mut self, input: &[u8]) { - use crate::implementation::helpers::SIMD_CHUNK_SIZE; - assert!( input.len() % SIMD_CHUNK_SIZE == 0, "Input size must be a multiple of 64." @@ -796,8 +793,6 @@ impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp { mut self, remaining_input: core::option::Option<&[u8]>, ) -> core::result::Result<(), basic::Utf8Error> { - use crate::implementation::helpers::SIMD_CHUNK_SIZE; - if let Some(mut remaining_input) = remaining_input { if !remaining_input.is_empty() { let len = remaining_input.len(); From 934d666f66e6673484fabfa29285b205eacacbca Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Fri, 25 Oct 2024 07:41:18 +0200 Subject: [PATCH 44/83] wip --- portable/Cargo.toml | 3 --- portable/src/basic.rs | 6 +++++- portable/src/implementation/mod.rs | 10 ++-------- portable/src/implementation/simd.rs | 2 ++ 4 files changed, 9 insertions(+), 12 deletions(-) diff --git a/portable/Cargo.toml b/portable/Cargo.toml index 1b644084..2eda911f 100644 --- a/portable/Cargo.toml +++ b/portable/Cargo.toml @@ -12,6 +12,3 @@ std = [] # fixme: needed? # expose SIMD implementations in basic::imp::* and compat::imp::* public_imp = [] - -# use 256-bit vectors -simd256 = [] diff --git a/portable/src/basic.rs b/portable/src/basic.rs index ec4d4275..73f89cbb 100644 --- a/portable/src/basic.rs +++ b/portable/src/basic.rs @@ -195,7 +195,11 @@ pub mod imp { } /// Best for current target - pub use v128 as auto; + pub mod auto { + pub use crate::implementation::simd::auto::validate_utf8_basic as validate_utf8; + pub use crate::implementation::simd::auto::ChunkedUtf8ValidatorImp; + pub use crate::implementation::simd::auto::Utf8ValidatorImp; + } /// Includes the validation implementation using 128-bit portable SIMD. pub mod v128 { diff --git a/portable/src/implementation/mod.rs b/portable/src/implementation/mod.rs index 00f3d553..c4cf00b6 100644 --- a/portable/src/implementation/mod.rs +++ b/portable/src/implementation/mod.rs @@ -16,10 +16,7 @@ pub(crate) fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basic::Utf8 #[inline(never)] fn validate_utf8_basic_simd(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { - #[cfg(not(feature = "simd256"))] - return simd::v128::validate_utf8_basic(input); - #[cfg(feature = "simd256")] - return simd::v256::validate_utf8_basic(input); + simd::auto::validate_utf8_basic(input) } #[inline] @@ -32,10 +29,7 @@ pub(crate) fn validate_utf8_compat(input: &[u8]) -> Result<(), crate::compat::Ut } fn validate_utf8_compat_simd(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { - #[cfg(not(feature = "simd256"))] - return simd::v128::validate_utf8_compat(input); - #[cfg(feature = "simd256")] - return simd::v256::validate_utf8_compat(input); + simd::auto::validate_utf8_compat(input) } // fallback method implementations diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs index 776a3beb..30114342 100644 --- a/portable/src/implementation/simd.rs +++ b/portable/src/implementation/simd.rs @@ -819,6 +819,8 @@ impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp { } } +pub(crate) use v128 as auto; // FIXME: select based on target feature + pub(crate) mod v128 { pub use super::validate_utf8_basic; pub use super::validate_utf8_compat; From 5d4506602846a55aeca41294f6cc4616a6b00548 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Fri, 25 Oct 2024 07:45:13 +0200 Subject: [PATCH 45/83] wip --- portable/Cargo.toml | 2 +- portable/src/basic.rs | 3 +-- portable/src/compat.rs | 3 +-- portable/src/implementation/simd.rs | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/portable/Cargo.toml b/portable/Cargo.toml index 2eda911f..9dc007bb 100644 --- a/portable/Cargo.toml +++ b/portable/Cargo.toml @@ -8,7 +8,7 @@ edition = "2021" [features] default = ["std"] -std = [] # fixme: needed? +std = [] # expose SIMD implementations in basic::imp::* and compat::imp::* public_imp = [] diff --git a/portable/src/basic.rs b/portable/src/basic.rs index 73f89cbb..100eef80 100644 --- a/portable/src/basic.rs +++ b/portable/src/basic.rs @@ -22,8 +22,7 @@ impl core::fmt::Display for Utf8Error { } } -#[cfg(feature = "std")] -impl std::error::Error for Utf8Error {} +impl core::error::Error for Utf8Error {} /// Analogue to [`std::str::from_utf8()`]. /// diff --git a/portable/src/compat.rs b/portable/src/compat.rs index 4d09998f..edfb6a8d 100644 --- a/portable/src/compat.rs +++ b/portable/src/compat.rs @@ -64,8 +64,7 @@ impl Display for Utf8Error { } } -#[cfg(feature = "std")] -impl std::error::Error for Utf8Error {} +impl core::error::Error for Utf8Error {} /// Analogue to [`std::str::from_utf8()`]. /// diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs index 30114342..aa332857 100644 --- a/portable/src/implementation/simd.rs +++ b/portable/src/implementation/simd.rs @@ -1,4 +1,4 @@ -use std::simd::{ +use core::simd::{ cmp::SimdPartialOrd, num::{SimdInt, SimdUint}, simd_swizzle, u8x16, LaneCount, Simd, SupportedLaneCount, From a7d93ca52a6f04f4cb407f1737a59f79fe18d613 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Fri, 25 Oct 2024 08:00:28 +0200 Subject: [PATCH 46/83] wip --- portable/Cargo.toml | 2 -- portable/src/implementation/helpers.rs | 38 ----------------------- portable/src/implementation/mod.rs | 42 ++++++++++++++++++++++++-- portable/src/implementation/simd.rs | 2 +- 4 files changed, 41 insertions(+), 43 deletions(-) delete mode 100644 portable/src/implementation/helpers.rs diff --git a/portable/Cargo.toml b/portable/Cargo.toml index 9dc007bb..d4e5175a 100644 --- a/portable/Cargo.toml +++ b/portable/Cargo.toml @@ -1,5 +1,3 @@ -#cargo-features = ["edition2024"] # TODO - [package] name = "simdutf8-portable" version = "0.1.0" diff --git a/portable/src/implementation/helpers.rs b/portable/src/implementation/helpers.rs deleted file mode 100644 index 78d633f1..00000000 --- a/portable/src/implementation/helpers.rs +++ /dev/null @@ -1,38 +0,0 @@ -type Utf8ErrorCompat = crate::compat::Utf8Error; - -#[inline] -#[expect(clippy::cast_possible_truncation)] -pub(crate) fn validate_utf8_at_offset(input: &[u8], offset: usize) -> Result<(), Utf8ErrorCompat> { - match core::str::from_utf8(&input[offset..]) { - Ok(_) => Ok(()), - Err(err) => Err(Utf8ErrorCompat { - valid_up_to: err.valid_up_to() + offset, - error_len: err.error_len().map(|len| { - // never truncates since std::str::err::Utf8Error::error_len() never returns value larger than 4 - len as u8 - }), - }), - } -} - -#[cold] -#[expect(clippy::unwrap_used)] -#[allow(dead_code)] // only used if there is a SIMD implementation -pub(crate) fn get_compat_error(input: &[u8], failing_block_pos: usize) -> Utf8ErrorCompat { - let offset = if failing_block_pos == 0 { - // Error must be in this block since it is the first. - 0 - } else { - // The previous block is OK except for a possible continuation over the block boundary. - // We go backwards over the last three bytes of the previous block and find the - // last non-continuation byte as a starting point for an std validation. If the last - // three bytes are all continuation bytes then the previous block ends with a four byte - // UTF-8 codepoint, is thus complete and valid UTF-8. We start the check with the - // current block in that case. - (1..=3) - .find(|i| input[failing_block_pos - i] >> 6 != 0b10) - .map_or(failing_block_pos, |i| failing_block_pos - i) - }; - // UNWRAP: safe because the SIMD UTF-8 validation found an error - validate_utf8_at_offset(input, offset).unwrap_err() -} diff --git a/portable/src/implementation/mod.rs b/portable/src/implementation/mod.rs index c4cf00b6..b2e689d9 100644 --- a/portable/src/implementation/mod.rs +++ b/portable/src/implementation/mod.rs @@ -2,7 +2,6 @@ #![forbid(unsafe_code)] -pub(crate) mod helpers; pub(crate) mod simd; #[inline] @@ -43,5 +42,44 @@ pub(crate) fn validate_utf8_basic_fallback(input: &[u8]) -> Result<(), crate::ba #[inline] pub(crate) fn validate_utf8_compat_fallback(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { - helpers::validate_utf8_at_offset(input, 0) + validate_utf8_at_offset(input, 0) +} + +type Utf8ErrorCompat = crate::compat::Utf8Error; + +#[inline] +#[expect(clippy::cast_possible_truncation)] +pub(crate) fn validate_utf8_at_offset(input: &[u8], offset: usize) -> Result<(), Utf8ErrorCompat> { + match core::str::from_utf8(&input[offset..]) { + Ok(_) => Ok(()), + Err(err) => Err(Utf8ErrorCompat { + valid_up_to: err.valid_up_to() + offset, + error_len: err.error_len().map(|len| { + // never truncates since std::str::err::Utf8Error::error_len() never returns value larger than 4 + len as u8 + }), + }), + } +} + +#[cold] +#[expect(clippy::unwrap_used)] +#[allow(dead_code)] // only used if there is a SIMD implementation +pub(crate) fn get_compat_error(input: &[u8], failing_block_pos: usize) -> Utf8ErrorCompat { + let offset = if failing_block_pos == 0 { + // Error must be in this block since it is the first. + 0 + } else { + // The previous block is OK except for a possible continuation over the block boundary. + // We go backwards over the last three bytes of the previous block and find the + // last non-continuation byte as a starting point for an std validation. If the last + // three bytes are all continuation bytes then the previous block ends with a four byte + // UTF-8 codepoint, is thus complete and valid UTF-8. We start the check with the + // current block in that case. + (1..=3) + .find(|i| input[failing_block_pos - i] >> 6 != 0b10) + .map_or(failing_block_pos, |i| failing_block_pos - i) + }; + // UNWRAP: safe because the SIMD UTF-8 validation found an error + validate_utf8_at_offset(input, offset).unwrap_err() } diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs index aa332857..7a47301f 100644 --- a/portable/src/implementation/simd.rs +++ b/portable/src/implementation/simd.rs @@ -670,7 +670,7 @@ pub fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), basic::Utf8 #[inline] pub fn validate_utf8_compat(input: &[u8]) -> core::result::Result<(), compat::Utf8Error> { Utf8CheckAlgorithm::<16, 4>::validate_utf8_compat_simd0(input) - .map_err(|idx| crate::implementation::helpers::get_compat_error(input, idx)) + .map_err(|idx| super::get_compat_error(input, idx)) } /// Low-level implementation of the [`basic::imp::Utf8Validator`] trait. From ef1c44b120603a7ae7e634fa48fdf148686eabe6 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Fri, 25 Oct 2024 09:12:30 +0200 Subject: [PATCH 47/83] simd 256 fixes --- portable/src/implementation/simd.rs | 286 ++++++++++++++++++++++------ 1 file changed, 233 insertions(+), 53 deletions(-) diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs index 7a47301f..440a240d 100644 --- a/portable/src/implementation/simd.rs +++ b/portable/src/implementation/simd.rs @@ -3,6 +3,7 @@ use core::simd::{ num::{SimdInt, SimdUint}, simd_swizzle, u8x16, LaneCount, Simd, SupportedLaneCount, }; +use std::simd::u8x32; use crate::{basic, compat}; @@ -20,7 +21,7 @@ const HAS_FAST_REDUCE_MAX: bool = true; )))] const HAS_FAST_REDUCE_MAX: bool = false; -const HAS_FAST_MASKED_LOAD: bool = false; // FIXME avx512, avx2 (?) +const HAS_FAST_MASKED_LOAD: bool = false; // FIXME avx512, avx2 (32-bit chunks only?) #[repr(C, align(32))] #[allow(dead_code)] // only used if a 128-bit SIMD implementation is used @@ -116,12 +117,50 @@ impl SimdInputTrait for SimdInput<16, 4> { } } +impl SimdInputTrait for SimdInput<32, 2> { + #[inline] + fn new(s: &[u8]) -> Self { + assert!(s.len() == 64); + Self { + vals: [u8x32::from_slice(&s[..32]), u8x32::from_slice(&s[32..64])], + } + } + + #[inline] + fn new_partial_masked_load(mut slice: &[u8]) -> Self { + let val0 = load_masked_opt(slice); + slice = &slice[slice.len().min(32)..]; + if slice.is_empty() { + return Self { + vals: [val0, u8x32::default()], + }; + } + let val1 = load_masked_opt(slice); + Self { vals: [val0, val1] } + } + + #[inline] + fn new_partial_copy(slice: &[u8]) -> Self { + let mut buf = [0; 64]; + buf[..slice.len()].copy_from_slice(slice); + Self::new(&buf) + } + + #[inline] + fn is_ascii(&self) -> bool { + (self.vals[0] | self.vals[1]).is_ascii() + } +} + #[inline] -fn load_masked_opt(slice: &[u8]) -> Simd { - if slice.len() > 15 { - u8x16::from_slice(&slice[..16]) +fn load_masked_opt(slice: &[u8]) -> Simd +where + LaneCount: SupportedLaneCount, +{ + if slice.len() > N - 1 { + Simd::::from_slice(&slice[..N]) } else { - u8x16::load_or_default(slice) + Simd::::load_or_default(slice) } } @@ -134,10 +173,34 @@ where pub(crate) error: Simd, // FIXME: should be a mask? } +trait Lookup16 { + #[expect(clippy::too_many_arguments)] + fn lookup_16( + self, + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self; +} + trait SimdU8Value where LaneCount: SupportedLaneCount, Self: Copy, + Self: Lookup16, { #[expect(clippy::too_many_arguments)] fn from_32_cut_off_leading( @@ -195,27 +258,6 @@ where v15: u8, ) -> Self; - #[expect(clippy::too_many_arguments)] - fn lookup_16( - self, - v0: u8, - v1: u8, - v2: u8, - v3: u8, - v4: u8, - v5: u8, - v6: u8, - v7: u8, - v8: u8, - v9: u8, - v10: u8, - v11: u8, - v12: u8, - v13: u8, - v14: u8, - v15: u8, - ) -> Self; - // const generics would be more awkward and verbose with the current // portable SIMD swizzle implementation and compiler limitations. fn prev1(self, prev: Self) -> Self; @@ -290,6 +332,48 @@ impl SimdU8Value<16> for u8x16 { ]) } + #[inline] + fn prev1(self, prev: Self) -> Self { + simd_swizzle!( + self, + prev, + [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,] + ) + } + + #[inline] + fn prev2(self, prev: Self) -> Self { + simd_swizzle!( + self, + prev, + [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,] + ) + } + + #[inline] + fn prev3(self, prev: Self) -> Self { + simd_swizzle!( + self, + prev, + [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,] + ) + } + + #[inline] + fn is_ascii(self) -> bool { + if HAS_FAST_REDUCE_MAX { + self.reduce_max() < 0b1000_0000 + } else { + (self & Self::splat(0b1000_0000)) == Self::splat(0) + } + } +} + +impl Lookup16 for Simd +where + Self: SimdU8Value, + LaneCount: SupportedLaneCount, +{ #[inline] fn lookup_16( self, @@ -317,13 +401,84 @@ impl SimdU8Value<16> for u8x16 { ); src.swizzle_dyn(self) } +} + +impl SimdU8Value<32> for u8x32 { + #[inline] + fn from_32_cut_off_leading( + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + v16: u8, + v17: u8, + v18: u8, + v19: u8, + v20: u8, + v21: u8, + v22: u8, + v23: u8, + v24: u8, + v25: u8, + v26: u8, + v27: u8, + v28: u8, + v29: u8, + v30: u8, + v31: u8, + ) -> Self { + Self::from_array([ + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, + v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, + ]) + } + + #[inline] + fn repeat_16( + v0: u8, + v1: u8, + v2: u8, + v3: u8, + v4: u8, + v5: u8, + v6: u8, + v7: u8, + v8: u8, + v9: u8, + v10: u8, + v11: u8, + v12: u8, + v13: u8, + v14: u8, + v15: u8, + ) -> Self { + Self::from_array([ + v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v0, v1, v2, v3, + v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, + ]) + } #[inline] fn prev1(self, prev: Self) -> Self { simd_swizzle!( self, prev, - [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,] + [ + 63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 23, 24, 25, 26, 27, 28, 29, 30 + ] ) } @@ -332,7 +487,10 @@ impl SimdU8Value<16> for u8x16 { simd_swizzle!( self, prev, - [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,] + [ + 62, 63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29 + ] ) } @@ -341,7 +499,10 @@ impl SimdU8Value<16> for u8x16 { simd_swizzle!( self, prev, - [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,] + [ + 61, 62, 63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, + ] ) } @@ -654,25 +815,6 @@ where } } -/// Validation implementation for CPUs supporting the SIMD extension (see module). -/// -/// # Errors -/// Returns the zero-sized [`basic::Utf8Error`] on failure. -#[inline] -pub fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), basic::Utf8Error> { - Utf8CheckAlgorithm::<16, 4>::validate_utf8_basic(input) -} - -/// Validation implementation for CPUs supporting the SIMD extension (see module). -/// -/// # Errors -/// Returns [`compat::Utf8Error`] with detailed error information on failure. -#[inline] -pub fn validate_utf8_compat(input: &[u8]) -> core::result::Result<(), compat::Utf8Error> { - Utf8CheckAlgorithm::<16, 4>::validate_utf8_compat_simd0(input) - .map_err(|idx| super::get_compat_error(input, idx)) -} - /// Low-level implementation of the [`basic::imp::Utf8Validator`] trait. /// /// This is implementation requires CPU SIMD features specified by the module it resides in. @@ -819,11 +961,30 @@ impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp { } } -pub(crate) use v128 as auto; // FIXME: select based on target feature +pub(crate) use v256 as auto; // FIXME: select based on target feature pub(crate) mod v128 { - pub use super::validate_utf8_basic; - pub use super::validate_utf8_compat; + /// Validation implementation for CPUs supporting the SIMD extension (see module). + /// + /// # Errors + /// Returns the zero-sized [`basic::Utf8Error`] on failure. + #[inline] + pub fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), crate::basic::Utf8Error> { + super::Utf8CheckAlgorithm::<16, 4>::validate_utf8_basic(input) + } + + /// Validation implementation for CPUs supporting the SIMD extension (see module). + /// + /// # Errors + /// Returns [`compat::Utf8Error`] with detailed error information on failure. + #[inline] + pub fn validate_utf8_compat( + input: &[u8], + ) -> core::result::Result<(), crate::compat::Utf8Error> { + super::Utf8CheckAlgorithm::<16, 4>::validate_utf8_compat_simd0(input) + .map_err(|idx| crate::implementation::get_compat_error(input, idx)) + } + #[cfg(feature = "public_imp")] pub use super::ChunkedUtf8ValidatorImp; #[cfg(feature = "public_imp")] @@ -831,8 +992,27 @@ pub(crate) mod v128 { } pub(crate) mod v256 { - pub use super::validate_utf8_basic; - pub use super::validate_utf8_compat; + /// Validation implementation for CPUs supporting the SIMD extension (see module). + /// + /// # Errors + /// Returns the zero-sized [`basic::Utf8Error`] on failure. + #[inline] + pub fn validate_utf8_basic(input: &[u8]) -> core::result::Result<(), crate::basic::Utf8Error> { + super::Utf8CheckAlgorithm::<32, 2>::validate_utf8_basic(input) + } + + /// Validation implementation for CPUs supporting the SIMD extension (see module). + /// + /// # Errors + /// Returns [`compat::Utf8Error`] with detailed error information on failure. + #[inline] + pub fn validate_utf8_compat( + input: &[u8], + ) -> core::result::Result<(), crate::compat::Utf8Error> { + super::Utf8CheckAlgorithm::<32, 2>::validate_utf8_compat_simd0(input) + .map_err(|idx| crate::implementation::get_compat_error(input, idx)) + } + #[cfg(feature = "public_imp")] pub use super::ChunkedUtf8ValidatorImp; #[cfg(feature = "public_imp")] From 473361a072e9757fdd9d39e80dfe4d8e12a69d9a Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Fri, 25 Oct 2024 09:16:56 +0200 Subject: [PATCH 48/83] simplify, use v128 for now --- portable/src/implementation/simd.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs index 440a240d..e49bbca5 100644 --- a/portable/src/implementation/simd.rs +++ b/portable/src/implementation/simd.rs @@ -199,8 +199,6 @@ trait Lookup16 { trait SimdU8Value where LaneCount: SupportedLaneCount, - Self: Copy, - Self: Lookup16, { #[expect(clippy::too_many_arguments)] fn from_32_cut_off_leading( @@ -961,7 +959,7 @@ impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp { } } -pub(crate) use v256 as auto; // FIXME: select based on target feature +pub(crate) use v128 as auto; // FIXME: select based on target feature pub(crate) mod v128 { /// Validation implementation for CPUs supporting the SIMD extension (see module). From bdf642b3e1c467069d854d5571b483a27ce5918c Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Fri, 25 Oct 2024 09:24:53 +0200 Subject: [PATCH 49/83] cleanup --- portable/src/implementation/simd.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs index e49bbca5..fe0e3c56 100644 --- a/portable/src/implementation/simd.rs +++ b/portable/src/implementation/simd.rs @@ -5,7 +5,7 @@ use core::simd::{ }; use std::simd::u8x32; -use crate::{basic, compat}; +use crate::basic; pub(crate) const SIMD_CHUNK_SIZE: usize = 64; @@ -199,6 +199,7 @@ trait Lookup16 { trait SimdU8Value where LaneCount: SupportedLaneCount, + Self: Copy, { #[expect(clippy::too_many_arguments)] fn from_32_cut_off_leading( From 12d14207678f4f7b99a68cb9fb9e1552455097fd Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Fri, 25 Oct 2024 12:33:58 +0200 Subject: [PATCH 50/83] make some fns const --- portable/src/compat.rs | 2 +- portable/src/implementation/mod.rs | 4 +++- portable/src/implementation/simd.rs | 1 + portable/src/lib.rs | 1 - 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/portable/src/compat.rs b/portable/src/compat.rs index edfb6a8d..5ecee0dd 100644 --- a/portable/src/compat.rs +++ b/portable/src/compat.rs @@ -32,7 +32,7 @@ impl Utf8Error { /// ... #[inline] #[must_use] - pub fn valid_up_to(&self) -> usize { + pub const fn valid_up_to(&self) -> usize { self.valid_up_to } diff --git a/portable/src/implementation/mod.rs b/portable/src/implementation/mod.rs index b2e689d9..760eeedf 100644 --- a/portable/src/implementation/mod.rs +++ b/portable/src/implementation/mod.rs @@ -33,7 +33,9 @@ fn validate_utf8_compat_simd(input: &[u8]) -> Result<(), crate::compat::Utf8Erro // fallback method implementations #[inline] -pub(crate) fn validate_utf8_basic_fallback(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { +pub(crate) const fn validate_utf8_basic_fallback( + input: &[u8], +) -> Result<(), crate::basic::Utf8Error> { match core::str::from_utf8(input) { Ok(_) => Ok(()), Err(_) => Err(crate::basic::Utf8Error {}), diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs index fe0e3c56..afadf422 100644 --- a/portable/src/implementation/simd.rs +++ b/portable/src/implementation/simd.rs @@ -471,6 +471,7 @@ impl SimdU8Value<32> for u8x32 { #[inline] fn prev1(self, prev: Self) -> Self { + // FIXME? this is more than we actually need. Not sure if AVX2 support this simd_swizzle!( self, prev, diff --git a/portable/src/lib.rs b/portable/src/lib.rs index dc585a20..e96aa2ca 100644 --- a/portable/src/lib.rs +++ b/portable/src/lib.rs @@ -7,7 +7,6 @@ clippy::nursery )] #![expect(clippy::redundant_pub_crate)] // check is broken (see e.g. https://github.com/rust-lang/rust-clippy/issues/5369) -#![expect(clippy::missing_const_for_fn)] // not necessary most of the times #![deny(missing_docs)] #![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(docsrs, feature(doc_cfg))] From 73983507a65eac118c8e1bb28ae5e3e1a15b532b Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Fri, 25 Oct 2024 14:18:01 +0200 Subject: [PATCH 51/83] nostd --- portable/src/implementation/simd.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs index afadf422..0783232f 100644 --- a/portable/src/implementation/simd.rs +++ b/portable/src/implementation/simd.rs @@ -1,9 +1,9 @@ +use core::simd::u8x32; use core::simd::{ cmp::SimdPartialOrd, num::{SimdInt, SimdUint}, simd_swizzle, u8x16, LaneCount, Simd, SupportedLaneCount, }; -use std::simd::u8x32; use crate::basic; From 6fec796ab738b0dce32598f69e4aeeb3a49299c7 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 27 Oct 2024 06:10:24 +0100 Subject: [PATCH 52/83] fix benchmark --- bench/benches/throughput_basic_portable.rs | 2 +- bench/benches/throughput_compat_portable.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bench/benches/throughput_basic_portable.rs b/bench/benches/throughput_basic_portable.rs index e21f0965..4da0ed75 100644 --- a/bench/benches/throughput_basic_portable.rs +++ b/bench/benches/throughput_basic_portable.rs @@ -1,3 +1,3 @@ use simdutf8_bench::define_throughput_benchmark; -define_throughput_benchmark!(BenchFn::Basic); +define_throughput_benchmark!(BenchFn::BasicPortable); diff --git a/bench/benches/throughput_compat_portable.rs b/bench/benches/throughput_compat_portable.rs index 1a7cf383..bd51376f 100644 --- a/bench/benches/throughput_compat_portable.rs +++ b/bench/benches/throughput_compat_portable.rs @@ -1,3 +1,3 @@ use simdutf8_bench::define_throughput_benchmark; -define_throughput_benchmark!(BenchFn::Compat); +define_throughput_benchmark!(BenchFn::CompatPortable); From ea024f0ae831e8a1ffbe239947a68a801bb46ebc Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 27 Oct 2024 06:52:31 +0000 Subject: [PATCH 53/83] use 256-bit impl on avx2 --- portable/src/implementation/simd.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs index 0783232f..31030e87 100644 --- a/portable/src/implementation/simd.rs +++ b/portable/src/implementation/simd.rs @@ -961,8 +961,18 @@ impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp { } } +#[cfg(not(all( + any(target_arch = "x86_64", target_arch = "x86"), + target_feature = "avx2" +)))] pub(crate) use v128 as auto; // FIXME: select based on target feature +#[cfg(all( + any(target_arch = "x86_64", target_arch = "x86"), + target_feature = "avx2" +))] +pub(crate) use v256 as auto; // FIXME: select based on target feature + pub(crate) mod v128 { /// Validation implementation for CPUs supporting the SIMD extension (see module). /// From b6ad12bb47acc234e3994f5fdf752ba7cb21e19f Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Mon, 28 Oct 2024 06:43:17 +0000 Subject: [PATCH 54/83] missing inline --- portable/src/implementation/simd.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs index 31030e87..fb15db16 100644 --- a/portable/src/implementation/simd.rs +++ b/portable/src/implementation/simd.rs @@ -48,6 +48,7 @@ trait SimdInputTrait { fn new(ptr: &[u8]) -> Self; fn new_partial_masked_load(slice: &[u8]) -> Self; fn new_partial_copy(slice: &[u8]) -> Self; + #[inline] fn new_partial(slice: &[u8]) -> Self where Self: Sized, From 0602f21c53a2012299441663f6159a1f37decfea Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Mon, 28 Oct 2024 06:44:51 +0000 Subject: [PATCH 55/83] don't always check the remainder --- portable/src/implementation/simd.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs index fb15db16..ba9b8a35 100644 --- a/portable/src/implementation/simd.rs +++ b/portable/src/implementation/simd.rs @@ -740,8 +740,7 @@ where algorithm.check_utf8(&simd_input); } let rem = chunks.remainder(); - if !rem.is_ascii() { - // FIXME: simd??? + if !rem.is_empty() { let simd_input = SimdInput::::new_partial(rem); algorithm.check_utf8(&simd_input); } From 01373c5f5ef4bb2311df95c0b52314ad18f87756 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Mon, 28 Oct 2024 06:50:14 +0000 Subject: [PATCH 56/83] fix check remainder only if present --- portable/src/implementation/simd.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs index ba9b8a35..2eac70ad 100644 --- a/portable/src/implementation/simd.rs +++ b/portable/src/implementation/simd.rs @@ -801,7 +801,7 @@ where break; } let rem = chunks.remainder(); - if !rem.is_ascii() { + if !rem.is_empty() { // FIXME: simd??? let simd_input = SimdInput::::new_partial(rem); algorithm.check_utf8(&simd_input); From 8a22e8aa14b74e0251850e0aed4a2608639419d6 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Mon, 28 Oct 2024 06:50:51 +0000 Subject: [PATCH 57/83] cleanup --- portable/src/implementation/simd.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs index 2eac70ad..a6dca3a5 100644 --- a/portable/src/implementation/simd.rs +++ b/portable/src/implementation/simd.rs @@ -802,7 +802,6 @@ where } let rem = chunks.remainder(); if !rem.is_empty() { - // FIXME: simd??? let simd_input = SimdInput::::new_partial(rem); algorithm.check_utf8(&simd_input); } From 5344bafdda59fc75f09de229ef57a99c476bc42e Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Thu, 31 Oct 2024 06:33:29 +0100 Subject: [PATCH 58/83] cleanup: don't use aligned buffers --- portable/src/implementation/simd.rs | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs index a6dca3a5..d10a2957 100644 --- a/portable/src/implementation/simd.rs +++ b/portable/src/implementation/simd.rs @@ -23,19 +23,6 @@ const HAS_FAST_REDUCE_MAX: bool = false; const HAS_FAST_MASKED_LOAD: bool = false; // FIXME avx512, avx2 (32-bit chunks only?) -#[repr(C, align(32))] -#[allow(dead_code)] // only used if a 128-bit SIMD implementation is used -pub(crate) struct TempSimdChunk(pub(crate) [u8; SIMD_CHUNK_SIZE]); - -#[allow(dead_code)] // only used if there is a SIMD implementation -impl TempSimdChunk { - #[expect(clippy::inline_always)] - #[inline(always)] // FIXME needs to be forced because otherwise it is not inlined on armv7 neo - pub(crate) const fn new() -> Self { - Self([0; SIMD_CHUNK_SIZE]) - } -} - #[repr(C)] struct SimdInput where @@ -944,9 +931,7 @@ impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp { let rem = len - chunks_lim; if rem > 0 { remaining_input = &remaining_input[chunks_lim..]; - let mut tmpbuf = TempSimdChunk::new(); - tmpbuf.0[..remaining_input.len()].copy_from_slice(remaining_input); - let simd_input = SimdInput::new(&tmpbuf.0); + let simd_input = SimdInput::new_partial(remaining_input); self.algorithm.check_utf8(&simd_input); } } From cbed92221a23a1050a7a6f0554578574e03d94be Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Thu, 31 Oct 2024 06:39:31 +0100 Subject: [PATCH 59/83] cmt --- portable/src/implementation/simd.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs index d10a2957..6590dbbb 100644 --- a/portable/src/implementation/simd.rs +++ b/portable/src/implementation/simd.rs @@ -21,7 +21,7 @@ const HAS_FAST_REDUCE_MAX: bool = true; )))] const HAS_FAST_REDUCE_MAX: bool = false; -const HAS_FAST_MASKED_LOAD: bool = false; // FIXME avx512, avx2 (32-bit chunks only?) +const HAS_FAST_MASKED_LOAD: bool = false; // FIXME avx512 #[repr(C)] struct SimdInput From ce7439419a90a2dbd3622e6d7ab49378900bf2ff Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Thu, 31 Oct 2024 06:45:50 +0100 Subject: [PATCH 60/83] simplify --- portable/src/implementation/simd.rs | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs index 6590dbbb..029838eb 100644 --- a/portable/src/implementation/simd.rs +++ b/portable/src/implementation/simd.rs @@ -921,17 +921,15 @@ impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp { mut self, remaining_input: core::option::Option<&[u8]>, ) -> core::result::Result<(), basic::Utf8Error> { - if let Some(mut remaining_input) = remaining_input { + if let Some(remaining_input) = remaining_input { if !remaining_input.is_empty() { - let len = remaining_input.len(); - let chunks_lim = len - (len % SIMD_CHUNK_SIZE); - if chunks_lim > 0 { - self.update_from_chunks(&remaining_input[..chunks_lim]); + let mut chunks = remaining_input.chunks_exact(SIMD_CHUNK_SIZE); + for chunk in &mut chunks { + let input = SimdInput::new(chunk); + self.algorithm.check_utf8(&input); } - let rem = len - chunks_lim; - if rem > 0 { - remaining_input = &remaining_input[chunks_lim..]; - let simd_input = SimdInput::new_partial(remaining_input); + if !chunks.remainder().is_empty() { + let simd_input = SimdInput::new_partial(chunks.remainder()); self.algorithm.check_utf8(&simd_input); } } From 5a4259881dbcedd4856a83efeea1c00115bf1ba2 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Thu, 31 Oct 2024 06:51:56 +0100 Subject: [PATCH 61/83] simplify --- portable/src/implementation/simd.rs | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs index 029838eb..e5530b99 100644 --- a/portable/src/implementation/simd.rs +++ b/portable/src/implementation/simd.rs @@ -852,27 +852,22 @@ impl basic::imp::Utf8Validator for Utf8ValidatorImp { return; } } - let len = input.len(); - let mut idx: usize = 0; - let iter_lim = len - (len % SIMD_CHUNK_SIZE); - while idx < iter_lim { - let input = SimdInput::new(&input[idx..idx + SIMD_CHUNK_SIZE]); + // no incomplete data, check chunks + let mut chunks = input.chunks_exact(SIMD_CHUNK_SIZE); + for chunk in &mut chunks { + let input = SimdInput::new(chunk); self.algorithm.check_utf8(&input); - idx += SIMD_CHUNK_SIZE; } - if idx < len { - let to_copy = len - idx; - self.incomplete_data[..to_copy].copy_from_slice(&input[idx..idx + to_copy]); - self.incomplete_len = to_copy; + if !chunks.remainder().is_empty() { + self.incomplete_data[..chunks.remainder().len()].copy_from_slice(chunks.remainder()); + self.incomplete_len = chunks.remainder().len(); } } #[inline] fn finalize(mut self) -> core::result::Result<(), basic::Utf8Error> { if self.incomplete_len != 0 { - for i in &mut self.incomplete_data[self.incomplete_len..] { - *i = 0; - } + self.incomplete_data[self.incomplete_len..].fill(0); self.update_from_incomplete_data(); } self.algorithm.check_incomplete_pending(); From c26a7186d0dca3edca07fad6a47b0758d14bb6df Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Thu, 31 Oct 2024 10:02:11 +0100 Subject: [PATCH 62/83] fallback, etc. --- portable/Cargo.toml | 6 + portable/src/basic.rs | 22 +++- portable/src/compat.rs | 18 ++- portable/src/implementation/fallback.rs | 161 ++++++++++++++++++++++++ portable/src/implementation/mod.rs | 70 ++++------- portable/src/implementation/simd.rs | 44 ++++--- portable/tests/tests.rs | 25 ++++ 7 files changed, 279 insertions(+), 67 deletions(-) create mode 100644 portable/src/implementation/fallback.rs diff --git a/portable/Cargo.toml b/portable/Cargo.toml index d4e5175a..219312bd 100644 --- a/portable/Cargo.toml +++ b/portable/Cargo.toml @@ -10,3 +10,9 @@ std = [] # expose SIMD implementations in basic::imp::* and compat::imp::* public_imp = [] +force_fallback = [] +force_simd128 = [] +force_simd256 = [] + +[dependencies] +cfg-if = "1.0.0" diff --git a/portable/src/basic.rs b/portable/src/basic.rs index 100eef80..b99107bd 100644 --- a/portable/src/basic.rs +++ b/portable/src/basic.rs @@ -193,11 +193,25 @@ pub mod imp { ) -> core::result::Result<(), basic::Utf8Error>; } - /// Best for current target + /// Best for current target as defined by compile-time arch and target features. If no fast + /// SIMD implementation is available, the scalar implementation from the standard library is + /// used as a fallback. + /// + /// However, the crate feature `force_nonsimd` forces the fallback implementation, `force_simd128` + /// forces the 128-bit SIMD implementation and `force_simd256` forces the 256-bit SIMD implementation, + /// in order of precedence. + /// pub mod auto { - pub use crate::implementation::simd::auto::validate_utf8_basic as validate_utf8; - pub use crate::implementation::simd::auto::ChunkedUtf8ValidatorImp; - pub use crate::implementation::simd::auto::Utf8ValidatorImp; + pub use crate::implementation::auto::validate_utf8_basic as validate_utf8; + pub use crate::implementation::auto::ChunkedUtf8ValidatorImp; + pub use crate::implementation::auto::Utf8ValidatorImp; + } + + /// Includes the scalar fallback implementation using 128-bit portable SIMD. + pub mod fallback { + pub use crate::implementation::fallback::validate_utf8_basic as validate_utf8; + pub use crate::implementation::fallback::ChunkedUtf8ValidatorImp; + pub use crate::implementation::fallback::Utf8ValidatorImp; } /// Includes the validation implementation using 128-bit portable SIMD. diff --git a/portable/src/compat.rs b/portable/src/compat.rs index 5ecee0dd..ce3e62d9 100644 --- a/portable/src/compat.rs +++ b/portable/src/compat.rs @@ -101,8 +101,22 @@ pub fn from_utf8_mut(input: &mut [u8]) -> Result<&mut str, Utf8Error> { /// Allows direct access to the platform-specific unsafe validation implementations. #[cfg(feature = "public_imp")] pub mod imp { - /// Best for current target FIXME: 256-bit support - pub use v128 as auto; + /// Best for current target as defined by compile-time arch and target features. If no fast + /// SIMD implementation is available, the scalar implementation from the standard library is + /// used as a fallback. + /// + /// However, the crate feature `force_nonsimd` forces the fallback implementation, `force_simd128` + /// forces the 128-bit SIMD implementation and `force_simd256` forces the 256-bit SIMD implementation, + /// in order of precedence. + /// + pub mod auto { + pub use crate::implementation::auto::validate_utf8_compat as validate_utf8; + } + + /// Includes the scalar fallback implementation using 128-bit portable SIMD. + pub mod fallback { + pub use crate::implementation::fallback::validate_utf8_compat as validate_utf8; + } /// Includes the validation implementation for 128-bit portable SIMD. pub mod v128 { diff --git a/portable/src/implementation/fallback.rs b/portable/src/implementation/fallback.rs new file mode 100644 index 00000000..f366f5cf --- /dev/null +++ b/portable/src/implementation/fallback.rs @@ -0,0 +1,161 @@ +/// Fallback implementation using the standard library. +/// +/// # Errors +/// Returns the zero-sized [`basic::Utf8Error`] on failure. +#[inline] +pub const fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { + match core::str::from_utf8(input) { + Ok(_) => Ok(()), + Err(_) => Err(crate::basic::Utf8Error {}), + } +} + +/// Fallback implementation using the standard library. +/// +/// # Errors +/// Returns [`compat::Utf8Error`] with detailed error information on failure. +#[inline] +pub fn validate_utf8_compat(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { + super::validate_utf8_at_offset(input, 0) +} + +/// Low-level implementation of the [`basic::imp::Utf8Validator`] trait. +/// +/// This is implementation requires CPU SIMD features specified by the module it resides in. +/// It is undefined behavior to call it if the required CPU features are not +/// available. +#[cfg(feature = "public_imp")] +pub struct Utf8ValidatorImp { + incomplete_data: [u8; 4], + incomplete_len: u8, + err: bool, +} + +use core::panic; + +#[cfg(feature = "public_imp")] +pub use Utf8ValidatorImp as ChunkedUtf8ValidatorImp; + +#[cfg(feature = "public_imp")] +impl Utf8ValidatorImp { + #[inline] + #[expect(clippy::cast_possible_truncation)] + fn update(&mut self, mut input: &[u8]) { + if self.err { + return; + } + if self.incomplete_len > 0 { + let total_bytes_needed: usize = match self.incomplete_data[0] { + 0..0b1000_0000 => { + panic!("ASCII data should never be incomplete"); + } + 0b1000_0000..0b1100_0000 => { + // first byte cannot be a continuation byte + self.err = true; + return; + } + 0b1100_0000..0b1110_0000 => 2, + 0b1110_0000..0b1111_0000 => 3, + 0b1111_0000..0b1111_1000 => 4, + _ => { + // invalid byte for starting sequence + self.err = true; + return; + } + }; + if self.incomplete_len as usize >= total_bytes_needed { + // actually errored on previous update + self.err = true; + return; + } + let bytes_needed = total_bytes_needed - self.incomplete_len as usize; + let to_copy = core::cmp::min(bytes_needed, input.len()); + self.incomplete_data + [self.incomplete_len as usize..self.incomplete_len as usize + to_copy] + .copy_from_slice(&input[..to_copy]); + if to_copy < bytes_needed { + self.incomplete_len += to_copy as u8; + return; + } + if core::str::from_utf8(&self.incomplete_data[..total_bytes_needed]).is_err() { + self.err = true; + return; + } + self.incomplete_len = 0; + input = &input[to_copy..]; + } + if let Err(e) = core::str::from_utf8(input) { + if input.len() - e.valid_up_to() > 3 { + self.err = true; + return; + } + self.incomplete_len = (input.len() - e.valid_up_to()) as u8; + self.incomplete_data[..self.incomplete_len as usize] + .copy_from_slice(&input[e.valid_up_to()..]); + } + } + + #[inline] + const fn finalize(self) -> core::result::Result<(), crate::basic::Utf8Error> { + if self.err || self.incomplete_len > 0 { + Err(crate::basic::Utf8Error {}) + } else { + Ok(()) + } + } +} + +#[cfg(feature = "public_imp")] +impl crate::basic::imp::Utf8Validator for Utf8ValidatorImp { + #[inline] + #[must_use] + fn new() -> Self { + Self { + incomplete_data: [0; 4], + incomplete_len: 0, + err: false, + } + } + + #[inline] + fn update(&mut self, input: &[u8]) { + if input.is_empty() { + return; + } + self.update(input); + } + + #[inline] + fn finalize(self) -> core::result::Result<(), crate::basic::Utf8Error> { + self.finalize() + } +} + +#[cfg(feature = "public_imp")] +impl crate::basic::imp::ChunkedUtf8Validator for Utf8ValidatorImp { + #[inline] + #[must_use] + fn new() -> Self { + Self { + incomplete_data: [0; 4], + incomplete_len: 0, + err: false, + } + } + + #[inline] + fn update_from_chunks(&mut self, input: &[u8]) { + self.update(input); + } + + #[inline] + fn finalize( + mut self, + remaining_input: core::option::Option<&[u8]>, + ) -> core::result::Result<(), crate::basic::Utf8Error> { + if let Some(remaining_input) = remaining_input { + self.update(remaining_input); + } + self.finalize() + } +} diff --git a/portable/src/implementation/mod.rs b/portable/src/implementation/mod.rs index 760eeedf..15f815a7 100644 --- a/portable/src/implementation/mod.rs +++ b/portable/src/implementation/mod.rs @@ -2,49 +2,53 @@ #![forbid(unsafe_code)] +pub(crate) mod fallback; pub(crate) mod simd; +cfg_if::cfg_if! { + if #[cfg(feature = "force_fallback")] { + pub(crate) use fallback as auto; + } else if #[cfg(feature = "force_simd128")] { + pub(crate) use simd::v128 as auto; + } else if #[cfg(feature = "force_simd256")] { + pub(crate) use simd::v256 as auto; + + // known good configurations + } else if #[cfg(all( + any(target_arch = "x86_64", target_arch = "x86"), + target_feature = "avx2" + ))] { + pub(crate) use simd::v256 as auto; + } else { + pub(crate) use fallback as auto; + } +} + #[inline] -pub(crate) fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { +pub(crate) const fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { if input.len() < simd::SIMD_CHUNK_SIZE { - return validate_utf8_basic_fallback(input); + return fallback::validate_utf8_basic(input); } validate_utf8_basic_simd(input) } #[inline(never)] -fn validate_utf8_basic_simd(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { - simd::auto::validate_utf8_basic(input) +const fn validate_utf8_basic_simd(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { + auto::validate_utf8_basic(input) } #[inline] pub(crate) fn validate_utf8_compat(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { if input.len() < simd::SIMD_CHUNK_SIZE { - return validate_utf8_compat_fallback(input); + return fallback::validate_utf8_compat(input); } validate_utf8_compat_simd(input) } fn validate_utf8_compat_simd(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { - simd::auto::validate_utf8_compat(input) -} - -// fallback method implementations -#[inline] -pub(crate) const fn validate_utf8_basic_fallback( - input: &[u8], -) -> Result<(), crate::basic::Utf8Error> { - match core::str::from_utf8(input) { - Ok(_) => Ok(()), - Err(_) => Err(crate::basic::Utf8Error {}), - } -} - -#[inline] -pub(crate) fn validate_utf8_compat_fallback(input: &[u8]) -> Result<(), crate::compat::Utf8Error> { - validate_utf8_at_offset(input, 0) + auto::validate_utf8_compat(input) } type Utf8ErrorCompat = crate::compat::Utf8Error; @@ -63,25 +67,3 @@ pub(crate) fn validate_utf8_at_offset(input: &[u8], offset: usize) -> Result<(), }), } } - -#[cold] -#[expect(clippy::unwrap_used)] -#[allow(dead_code)] // only used if there is a SIMD implementation -pub(crate) fn get_compat_error(input: &[u8], failing_block_pos: usize) -> Utf8ErrorCompat { - let offset = if failing_block_pos == 0 { - // Error must be in this block since it is the first. - 0 - } else { - // The previous block is OK except for a possible continuation over the block boundary. - // We go backwards over the last three bytes of the previous block and find the - // last non-continuation byte as a starting point for an std validation. If the last - // three bytes are all continuation bytes then the previous block ends with a four byte - // UTF-8 codepoint, is thus complete and valid UTF-8. We start the check with the - // current block in that case. - (1..=3) - .find(|i| input[failing_block_pos - i] >> 6 != 0b10) - .map_or(failing_block_pos, |i| failing_block_pos - i) - }; - // UNWRAP: safe because the SIMD UTF-8 validation found an error - validate_utf8_at_offset(input, offset).unwrap_err() -} diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs index e5530b99..6806bc2d 100644 --- a/portable/src/implementation/simd.rs +++ b/portable/src/implementation/simd.rs @@ -938,20 +938,30 @@ impl basic::imp::ChunkedUtf8Validator for ChunkedUtf8ValidatorImp { } } -#[cfg(not(all( - any(target_arch = "x86_64", target_arch = "x86"), - target_feature = "avx2" -)))] -pub(crate) use v128 as auto; // FIXME: select based on target feature - -#[cfg(all( - any(target_arch = "x86_64", target_arch = "x86"), - target_feature = "avx2" -))] -pub(crate) use v256 as auto; // FIXME: select based on target feature +#[cold] +#[expect(clippy::unwrap_used)] +#[allow(dead_code)] // only used if there is a SIMD implementation +pub(crate) fn get_compat_error(input: &[u8], failing_block_pos: usize) -> crate::compat::Utf8Error { + let offset = if failing_block_pos == 0 { + // Error must be in this block since it is the first. + 0 + } else { + // The previous block is OK except for a possible continuation over the block boundary. + // We go backwards over the last three bytes of the previous block and find the + // last non-continuation byte as a starting point for an std validation. If the last + // three bytes are all continuation bytes then the previous block ends with a four byte + // UTF-8 codepoint, is thus complete and valid UTF-8. We start the check with the + // current block in that case. + (1..=3) + .find(|i| input[failing_block_pos - i] >> 6 != 0b10) + .map_or(failing_block_pos, |i| failing_block_pos - i) + }; + // UNWRAP: safe because the SIMD UTF-8 validation found an error + super::validate_utf8_at_offset(input, offset).unwrap_err() +} pub(crate) mod v128 { - /// Validation implementation for CPUs supporting the SIMD extension (see module). + /// Validation implementation using 128-bit SIMD. /// /// # Errors /// Returns the zero-sized [`basic::Utf8Error`] on failure. @@ -960,7 +970,7 @@ pub(crate) mod v128 { super::Utf8CheckAlgorithm::<16, 4>::validate_utf8_basic(input) } - /// Validation implementation for CPUs supporting the SIMD extension (see module). + /// Validation implementation using 128-bit SIMD. /// /// # Errors /// Returns [`compat::Utf8Error`] with detailed error information on failure. @@ -969,7 +979,7 @@ pub(crate) mod v128 { input: &[u8], ) -> core::result::Result<(), crate::compat::Utf8Error> { super::Utf8CheckAlgorithm::<16, 4>::validate_utf8_compat_simd0(input) - .map_err(|idx| crate::implementation::get_compat_error(input, idx)) + .map_err(|idx| super::get_compat_error(input, idx)) } #[cfg(feature = "public_imp")] @@ -979,7 +989,7 @@ pub(crate) mod v128 { } pub(crate) mod v256 { - /// Validation implementation for CPUs supporting the SIMD extension (see module). + /// Validation implementation using 256-bit SIMD. /// /// # Errors /// Returns the zero-sized [`basic::Utf8Error`] on failure. @@ -988,7 +998,7 @@ pub(crate) mod v256 { super::Utf8CheckAlgorithm::<32, 2>::validate_utf8_basic(input) } - /// Validation implementation for CPUs supporting the SIMD extension (see module). + /// Validation implementation using 256-bit SIMD. /// /// # Errors /// Returns [`compat::Utf8Error`] with detailed error information on failure. @@ -997,7 +1007,7 @@ pub(crate) mod v256 { input: &[u8], ) -> core::result::Result<(), crate::compat::Utf8Error> { super::Utf8CheckAlgorithm::<32, 2>::validate_utf8_compat_simd0(input) - .map_err(|idx| crate::implementation::get_compat_error(input, idx)) + .map_err(|idx| super::get_compat_error(input, idx)) } #[cfg(feature = "public_imp")] diff --git a/portable/tests/tests.rs b/portable/tests/tests.rs index 96cfa9ee..55b3e399 100644 --- a/portable/tests/tests.rs +++ b/portable/tests/tests.rs @@ -70,6 +70,16 @@ mod public_imp { pub(super) fn test_valid(input: &[u8]) { #[cfg(feature = "public_imp")] { + assert!(simdutf8_portable::basic::imp::fallback::validate_utf8(input).is_ok()); + assert!(simdutf8_portable::compat::imp::fallback::validate_utf8(input).is_ok()); + + test_streaming::( + input, true, + ); + test_chunked_streaming::< + simdutf8_portable::basic::imp::fallback::ChunkedUtf8ValidatorImp, + >(input, true); + assert!(simdutf8_portable::basic::imp::v128::validate_utf8(input).is_ok()); assert!(simdutf8_portable::compat::imp::v128::validate_utf8(input).is_ok()); @@ -92,6 +102,18 @@ mod public_imp { pub(super) fn test_invalid(input: &[u8], valid_up_to: usize, error_len: Option) { #[cfg(feature = "public_imp")] { + assert!(simdutf8_portable::basic::imp::fallback::validate_utf8(input).is_err()); + let err = simdutf8_portable::compat::imp::fallback::validate_utf8(input).unwrap_err(); + assert_eq!(err.valid_up_to(), valid_up_to); + assert_eq!(err.error_len(), error_len); + + test_streaming::( + input, false, + ); + test_chunked_streaming::< + simdutf8_portable::basic::imp::fallback::ChunkedUtf8ValidatorImp, + >(input, false); + assert!(simdutf8_portable::basic::imp::v128::validate_utf8(input).is_err()); let err = simdutf8_portable::compat::imp::v128::validate_utf8(input).unwrap_err(); assert_eq!(err.valid_up_to(), valid_up_to); @@ -169,6 +191,9 @@ mod public_imp { test_chunked_streaming_with_chunk_size::< simdutf8_portable::basic::imp::v128::ChunkedUtf8ValidatorImp, >(b"abcd", 1, true); + test_chunked_streaming_with_chunk_size::< + simdutf8_portable::basic::imp::v256::ChunkedUtf8ValidatorImp, + >(b"abcd", 1, true); } } From 0e16dd26c7f6673f63776081330632c4b7c77ca6 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Thu, 31 Oct 2024 14:37:33 +0100 Subject: [PATCH 63/83] simplify/optimize --- portable/src/implementation/fallback.rs | 77 +++++++++++-------------- 1 file changed, 34 insertions(+), 43 deletions(-) diff --git a/portable/src/implementation/fallback.rs b/portable/src/implementation/fallback.rs index f366f5cf..fe405f67 100644 --- a/portable/src/implementation/fallback.rs +++ b/portable/src/implementation/fallback.rs @@ -26,8 +26,7 @@ pub fn validate_utf8_compat(input: &[u8]) -> Result<(), crate::compat::Utf8Error /// available. #[cfg(feature = "public_imp")] pub struct Utf8ValidatorImp { - incomplete_data: [u8; 4], - incomplete_len: u8, + expected_cont_bytes: u8, err: bool, } @@ -39,65 +38,59 @@ pub use Utf8ValidatorImp as ChunkedUtf8ValidatorImp; #[cfg(feature = "public_imp")] impl Utf8ValidatorImp { #[inline] - #[expect(clippy::cast_possible_truncation)] fn update(&mut self, mut input: &[u8]) { if self.err { return; } - if self.incomplete_len > 0 { - let total_bytes_needed: usize = match self.incomplete_data[0] { - 0..0b1000_0000 => { - panic!("ASCII data should never be incomplete"); - } - 0b1000_0000..0b1100_0000 => { - // first byte cannot be a continuation byte + if self.expected_cont_bytes > 0 { + let to_check = (self.expected_cont_bytes as usize).min(input.len()); + for b in &input[..to_check] { + if b & 0b1100_0000 != 0b1000_0000 { + // not a continuation byte self.err = true; return; } - 0b1100_0000..0b1110_0000 => 2, - 0b1110_0000..0b1111_0000 => 3, - 0b1111_0000..0b1111_1000 => 4, + self.expected_cont_bytes -= 1; + } + if self.expected_cont_bytes > 0 { + // not enough continuation bytes + return; + } + input = &input[to_check..]; + } + if let Err(e) = core::str::from_utf8(input) { + // cannot wrap, since there is at least one byte left which is not valid UTF-8 + // by itself + self.expected_cont_bytes = match input[e.valid_up_to()] { + 0b1100_0000..0b1110_0000 => 1, + 0b1110_0000..0b1111_0000 => 2, + 0b1111_0000..0b1111_1000 => 3, _ => { // invalid byte for starting sequence self.err = true; return; } }; - if self.incomplete_len as usize >= total_bytes_needed { - // actually errored on previous update - self.err = true; - return; - } - let bytes_needed = total_bytes_needed - self.incomplete_len as usize; - let to_copy = core::cmp::min(bytes_needed, input.len()); - self.incomplete_data - [self.incomplete_len as usize..self.incomplete_len as usize + to_copy] - .copy_from_slice(&input[..to_copy]); - if to_copy < bytes_needed { - self.incomplete_len += to_copy as u8; - return; - } - if core::str::from_utf8(&self.incomplete_data[..total_bytes_needed]).is_err() { + let rem_input = input.len() - e.valid_up_to() - 1; + if rem_input >= self.expected_cont_bytes as usize { + // too many continuation bytes so they are not valid self.err = true; return; } - self.incomplete_len = 0; - input = &input[to_copy..]; - } - if let Err(e) = core::str::from_utf8(input) { - if input.len() - e.valid_up_to() > 3 { - self.err = true; - return; + for i in 0..rem_input { + if input[e.valid_up_to() + i + 1] & 0b1100_0000 != 0b1000_0000 { + // not a continuation byte + self.err = true; + return; + } + self.expected_cont_bytes -= 1; } - self.incomplete_len = (input.len() - e.valid_up_to()) as u8; - self.incomplete_data[..self.incomplete_len as usize] - .copy_from_slice(&input[e.valid_up_to()..]); } } #[inline] const fn finalize(self) -> core::result::Result<(), crate::basic::Utf8Error> { - if self.err || self.incomplete_len > 0 { + if self.err || self.expected_cont_bytes > 0 { Err(crate::basic::Utf8Error {}) } else { Ok(()) @@ -111,8 +104,7 @@ impl crate::basic::imp::Utf8Validator for Utf8ValidatorImp { #[must_use] fn new() -> Self { Self { - incomplete_data: [0; 4], - incomplete_len: 0, + expected_cont_bytes: 0, err: false, } } @@ -137,8 +129,7 @@ impl crate::basic::imp::ChunkedUtf8Validator for Utf8ValidatorImp { #[must_use] fn new() -> Self { Self { - incomplete_data: [0; 4], - incomplete_len: 0, + expected_cont_bytes: 0, err: false, } } From b65f1ec65c0b3890c089121b341af0f9283248d6 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Thu, 31 Oct 2024 14:37:59 +0100 Subject: [PATCH 64/83] cleanup --- portable/src/implementation/fallback.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/portable/src/implementation/fallback.rs b/portable/src/implementation/fallback.rs index fe405f67..485e1c31 100644 --- a/portable/src/implementation/fallback.rs +++ b/portable/src/implementation/fallback.rs @@ -30,8 +30,6 @@ pub struct Utf8ValidatorImp { err: bool, } -use core::panic; - #[cfg(feature = "public_imp")] pub use Utf8ValidatorImp as ChunkedUtf8ValidatorImp; From 48168d041325724a01d3d7f01d55403fd5a83472 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Thu, 31 Oct 2024 14:43:56 +0100 Subject: [PATCH 65/83] simplify --- portable/src/implementation/fallback.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/portable/src/implementation/fallback.rs b/portable/src/implementation/fallback.rs index 485e1c31..c25211c8 100644 --- a/portable/src/implementation/fallback.rs +++ b/portable/src/implementation/fallback.rs @@ -70,12 +70,7 @@ impl Utf8ValidatorImp { } }; let rem_input = input.len() - e.valid_up_to() - 1; - if rem_input >= self.expected_cont_bytes as usize { - // too many continuation bytes so they are not valid - self.err = true; - return; - } - for i in 0..rem_input { + for i in 0..rem_input.min(self.expected_cont_bytes as usize) { if input[e.valid_up_to() + i + 1] & 0b1100_0000 != 0b1000_0000 { // not a continuation byte self.err = true; @@ -83,6 +78,7 @@ impl Utf8ValidatorImp { } self.expected_cont_bytes -= 1; } + debug_assert!(self.expected_cont_bytes != 0); // otherwise from_utf8 would not have errored } } From a168c221030a10b17838dcaa4ff43b943557d9cd Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Thu, 31 Oct 2024 14:52:54 +0100 Subject: [PATCH 66/83] simplify --- portable/src/implementation/fallback.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/portable/src/implementation/fallback.rs b/portable/src/implementation/fallback.rs index c25211c8..95f03755 100644 --- a/portable/src/implementation/fallback.rs +++ b/portable/src/implementation/fallback.rs @@ -69,16 +69,17 @@ impl Utf8ValidatorImp { return; } }; - let rem_input = input.len() - e.valid_up_to() - 1; - for i in 0..rem_input.min(self.expected_cont_bytes as usize) { - if input[e.valid_up_to() + i + 1] & 0b1100_0000 != 0b1000_0000 { + let rem_input = &input[e.valid_up_to() + 1..]; + let rem_input = &rem_input[0..rem_input.len().min(self.expected_cont_bytes as usize)]; + for b in rem_input { + if b & 0b1100_0000 != 0b1000_0000 { // not a continuation byte self.err = true; return; } self.expected_cont_bytes -= 1; } - debug_assert!(self.expected_cont_bytes != 0); // otherwise from_utf8 would not have errored + debug_assert!(self.expected_cont_bytes > 0); // otherwise from_utf8 would not have errored } } From add05555c4e1c33999e9653ade13c3c640138f5b Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 3 Nov 2024 07:56:54 +0100 Subject: [PATCH 67/83] nit --- portable/src/implementation/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/portable/src/implementation/mod.rs b/portable/src/implementation/mod.rs index 15f815a7..9863b944 100644 --- a/portable/src/implementation/mod.rs +++ b/portable/src/implementation/mod.rs @@ -12,7 +12,6 @@ cfg_if::cfg_if! { pub(crate) use simd::v128 as auto; } else if #[cfg(feature = "force_simd256")] { pub(crate) use simd::v256 as auto; - // known good configurations } else if #[cfg(all( any(target_arch = "x86_64", target_arch = "x86"), From e8b3a7166e3e3ec425cce32436e38c2357376593 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 3 Nov 2024 08:30:47 +0100 Subject: [PATCH 68/83] nit --- portable/src/implementation/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portable/src/implementation/mod.rs b/portable/src/implementation/mod.rs index 9863b944..66e3b52b 100644 --- a/portable/src/implementation/mod.rs +++ b/portable/src/implementation/mod.rs @@ -54,7 +54,7 @@ type Utf8ErrorCompat = crate::compat::Utf8Error; #[inline] #[expect(clippy::cast_possible_truncation)] -pub(crate) fn validate_utf8_at_offset(input: &[u8], offset: usize) -> Result<(), Utf8ErrorCompat> { +fn validate_utf8_at_offset(input: &[u8], offset: usize) -> Result<(), Utf8ErrorCompat> { match core::str::from_utf8(&input[offset..]) { Ok(_) => Ok(()), Err(err) => Err(Utf8ErrorCompat { From d586ea18714f3973ebdd09f75787d3cdf0784280 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 3 Nov 2024 08:40:07 +0100 Subject: [PATCH 69/83] more supported archs --- portable/src/implementation/mod.rs | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/portable/src/implementation/mod.rs b/portable/src/implementation/mod.rs index 66e3b52b..38c44c01 100644 --- a/portable/src/implementation/mod.rs +++ b/portable/src/implementation/mod.rs @@ -18,13 +18,29 @@ cfg_if::cfg_if! { target_feature = "avx2" ))] { pub(crate) use simd::v256 as auto; + } else if #[cfg(all( + any(target_arch = "x86_64", target_arch = "x86"), + target_feature = "sse4.2" + ))] { + pub(crate) use simd::v128 as auto; + } else if #[cfg(all( + target_arch = "aarch64", + target_feature = "neon" + ))] { + pub(crate) use simd::v128 as auto; + } else if #[cfg(all( + target_arch = "arm", + target_feature = "v7", + target_endian = "little" + ))] { + pub(crate) use simd::v128 as auto; } else { pub(crate) use fallback as auto; } } #[inline] -pub(crate) const fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { +pub(crate) fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { if input.len() < simd::SIMD_CHUNK_SIZE { return fallback::validate_utf8_basic(input); } @@ -33,7 +49,8 @@ pub(crate) const fn validate_utf8_basic(input: &[u8]) -> Result<(), crate::basic } #[inline(never)] -const fn validate_utf8_basic_simd(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { +#[allow(clippy::missing_const_for_fn)] +fn validate_utf8_basic_simd(input: &[u8]) -> Result<(), crate::basic::Utf8Error> { auto::validate_utf8_basic(input) } From 6b274794e12fe661dd18b79e3cbb5a49539e94f1 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Mon, 4 Nov 2024 07:48:16 +0100 Subject: [PATCH 70/83] simplify, nits --- portable/src/implementation/simd.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/portable/src/implementation/simd.rs b/portable/src/implementation/simd.rs index 6806bc2d..0fa878f5 100644 --- a/portable/src/implementation/simd.rs +++ b/portable/src/implementation/simd.rs @@ -157,8 +157,8 @@ where LaneCount: SupportedLaneCount, { pub(crate) prev: Simd, - pub(crate) incomplete: Simd, // FIXME: should be a mask? - pub(crate) error: Simd, // FIXME: should be a mask? + pub(crate) incomplete: Simd, // FIXME: could be a mask? + pub(crate) error: Simd, // FIXME: could be a mask? } trait Lookup16 { @@ -723,8 +723,7 @@ where } } for chunk in chunks.by_ref() { - let simd_input = SimdInput::::new(chunk); - algorithm.check_utf8(&simd_input); + algorithm.check_utf8(&SimdInput::::new(chunk)); } let rem = chunks.remainder(); if !rem.is_empty() { From bd421506d5539e72c19c23a78fad82cd506feec0 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Mon, 4 Nov 2024 07:48:46 +0100 Subject: [PATCH 71/83] bench fix --- bench/Cargo.lock | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bench/Cargo.lock b/bench/Cargo.lock index c3d5eee0..765bdc4f 100644 --- a/bench/Cargo.lock +++ b/bench/Cargo.lock @@ -1881,6 +1881,9 @@ dependencies = [ [[package]] name = "simdutf8-portable" version = "0.1.0" +dependencies = [ + "cfg-if 1.0.0", +] [[package]] name = "smallvec" From 64a9c877de94e99c874edaffaca174dd70b8056a Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sat, 9 Nov 2024 10:27:00 +0100 Subject: [PATCH 72/83] fixes --- portable/src/implementation/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/portable/src/implementation/mod.rs b/portable/src/implementation/mod.rs index 38c44c01..44228834 100644 --- a/portable/src/implementation/mod.rs +++ b/portable/src/implementation/mod.rs @@ -3,6 +3,8 @@ #![forbid(unsafe_code)] pub(crate) mod fallback; + +#[allow(unused)] pub(crate) mod simd; cfg_if::cfg_if! { @@ -30,8 +32,9 @@ cfg_if::cfg_if! { pub(crate) use simd::v128 as auto; } else if #[cfg(all( target_arch = "arm", + target_endian = "little", target_feature = "v7", - target_endian = "little" + target_feature = "neon" ))] { pub(crate) use simd::v128 as auto; } else { From b7301abab08e8a0d5469f44aeaf6fa09056d13db Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sat, 9 Nov 2024 10:31:07 +0100 Subject: [PATCH 73/83] bench partial --- bench/src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bench/src/lib.rs b/bench/src/lib.rs index 3ee33aee..0d3b57ff 100644 --- a/bench/src/lib.rs +++ b/bench/src/lib.rs @@ -138,11 +138,12 @@ fn get_valid_slice_of_len_or_more_aligned( fn bench(c: &mut Criterion, name: &str, bytes: &[u8], bench_fn: BenchFn) { let mut group = c.benchmark_group(name); for i in [1, 8, 64, 512, 4096, 65536, 131072].iter() { + let i = i + 33; let alignment = Alignment { boundary: 64, offset: 8, // 8 is the default alignment on 64-bit, so this is what can be expected worst-case }; - let (vec, offset) = get_valid_slice_of_len_or_more_aligned(bytes, *i, alignment); + let (vec, offset) = get_valid_slice_of_len_or_more_aligned(bytes, i, alignment); let slice = &vec[offset..]; assert_eq!( (slice.as_ptr() as usize) % alignment.boundary, From d23f56941f0d81c696cf85c13a189ae61fd69192 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 10 Nov 2024 06:10:07 +0100 Subject: [PATCH 74/83] workspace (nightly only) --- nightly_workspace/.gitignore | 2 ++ nightly_workspace/Cargo.toml | 6 ++++++ nightly_workspace/simdutf8 | 1 + 3 files changed, 9 insertions(+) create mode 100644 nightly_workspace/.gitignore create mode 100644 nightly_workspace/Cargo.toml create mode 120000 nightly_workspace/simdutf8 diff --git a/nightly_workspace/.gitignore b/nightly_workspace/.gitignore new file mode 100644 index 00000000..1b72444a --- /dev/null +++ b/nightly_workspace/.gitignore @@ -0,0 +1,2 @@ +/Cargo.lock +/target diff --git a/nightly_workspace/Cargo.toml b/nightly_workspace/Cargo.toml new file mode 100644 index 00000000..0beb84a2 --- /dev/null +++ b/nightly_workspace/Cargo.toml @@ -0,0 +1,6 @@ +[workspace] +members = [ + "simdutf8", + "simdutf8/portable", + "simdutf8/bench" +] diff --git a/nightly_workspace/simdutf8 b/nightly_workspace/simdutf8 new file mode 120000 index 00000000..a96aa0ea --- /dev/null +++ b/nightly_workspace/simdutf8 @@ -0,0 +1 @@ +.. \ No newline at end of file From 3cecf1f89aad5ad3579e69a2c945dc1822d02a2e Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 10 Nov 2024 06:28:13 +0100 Subject: [PATCH 75/83] update --- portable/Cargo.toml | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/portable/Cargo.toml b/portable/Cargo.toml index 219312bd..67920dac 100644 --- a/portable/Cargo.toml +++ b/portable/Cargo.toml @@ -1,7 +1,16 @@ [package] name = "simdutf8-portable" -version = "0.1.0" +version = "0.0.1" +authors = ["Hans Kratz "] edition = "2021" +description = "SIMD-accelerated UTF-8 validation using core::simd (experimental)" +documentation = "https://docs.rs/simdutf8-portable/" +homepage = "https://github.com/rusticstuff/simdutf8/tree/main/portable" +repository = "https://github.com/rusticstuff/simdutf8" +readme = "README.md" +keywords = ["utf-8", "unicode", "string", "validation", "simd"] +categories = ["encoding", "algorithms", "no-std"] +license = "MIT OR Apache-2.0" [features] default = ["std"] @@ -10,9 +19,22 @@ std = [] # expose SIMD implementations in basic::imp::* and compat::imp::* public_imp = [] + +# features to force a certain implementation. Features earlier in the list take +# precedence. + +# force non-SIMD fallback implementation (for testing) force_fallback = [] +# force 128-bit/256-bit SIMD implementation. +# CAVE: slower than even the fallback implementation if not all SIMD functions +# have a fast implementation, in particular `swizzle_dyn` needs to be fast. force_simd128 = [] force_simd256 = [] +[package.metadata.docs.rs] +features = ["public_imp"] +rustdoc-args = ["--cfg", "docsrs"] +default-target = "x86_64-unknown-linux-gnu" + [dependencies] cfg-if = "1.0.0" From 9dd881b770f934f10fe7c7800dd6a395ce9641b5 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 10 Nov 2024 06:34:51 +0100 Subject: [PATCH 76/83] doc --- portable/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portable/src/lib.rs b/portable/src/lib.rs index e96aa2ca..7cf85aba 100644 --- a/portable/src/lib.rs +++ b/portable/src/lib.rs @@ -9,7 +9,7 @@ #![expect(clippy::redundant_pub_crate)] // check is broken (see e.g. https://github.com/rust-lang/rust-clippy/issues/5369) #![deny(missing_docs)] #![cfg_attr(not(feature = "std"), no_std)] -#![cfg_attr(docsrs, feature(doc_cfg))] +#![feature(doc_auto_cfg)] #![feature(portable_simd)] //! Blazingly fast API-compatible UTF-8 validation for Rust using SIMD extensions, based on the implementation from From c935c4b3287457a0793aa73180567d8c4663f222 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 10 Nov 2024 07:39:46 +0100 Subject: [PATCH 77/83] .prettier cfg --- portable/.prettierrc.toml | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 portable/.prettierrc.toml diff --git a/portable/.prettierrc.toml b/portable/.prettierrc.toml new file mode 100644 index 00000000..3a7aca30 --- /dev/null +++ b/portable/.prettierrc.toml @@ -0,0 +1,2 @@ +proseWrap = "always" +printWidth = 100 From 563fb03d2addf10d29c9bf070280be3246913191 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 10 Nov 2024 07:39:59 +0100 Subject: [PATCH 78/83] initial README --- portable/README.md | 98 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 portable/README.md diff --git a/portable/README.md b/portable/README.md new file mode 100644 index 00000000..4c8b63b5 --- /dev/null +++ b/portable/README.md @@ -0,0 +1,98 @@ +[![CI](https://github.com/rusticstuff/simdutf8/actions/workflows/portable.yml/badge.svg)](https://github.com/rusticstuff/simdutf8/actions/workflows/portable.yml) +[![crates.io](https://img.shields.io/crates/v/simdutf8-portable.svg)](https://crates.io/crates/simdutf8-portable) +[![docs.rs](https://docs.rs/simdutf8-portable/badge.svg)](https://docs.rs/simdutf8-portable) + +# simdutf8-portable – Fast UTF-8 validation using `core::simd` (portable SIMD) + +Fast API-compatible UTF-8 validation for Rust using the experimental architecture-independan +[`core::simd`](https://doc.rust-lang.org/core/simd/index.html) (portable SIMD) module from the +standard library. An up-to-date nightly Rust compiler is required. The API and the algorithm used +are the same as in the [simdutf8](https://crates.io/crates/simdutf8) crate. + +## Features + +- `#[forbid(unsafe_code)]` implementation +- `auto` module which selects the best implementation for known-good targets at compile-time + including falling back to a scalar implementation if a fast SIMD implementation is not possible. +- Future-proof: The implementation is designed to be future-proof and will be updated as the + `core::simd` module evolves +- `no_std` support +- no unnecessary bounds checks in the compiled code (as of nightly-xx) +- fast out of the box for `aarch64` and `wasm32` targets +- Features to force a specific implementation at compile-time +- Support 128-bit and 256-bit SIMD + +## Limitations + +- target-feature +- no runtime implementation selection +- slower + - memcpy calls +- swizzle_dyn + - slow on uncommon targets + - requires -Zbuild-std for sse4.2, avx2 support if not part of the target architecture + +## Quick start + +Add the dependency to your Cargo.toml file: + +```toml +[dependencies] +simdutf8-portable = "0.01" +``` + +Use `simdutf8-portable::basic::from_utf8()` as a drop-in replacement for `std::str::from_utf8()`. + +```rust +use simdutf8-portable::basic::from_utf8; + +println!("{}", from_utf8(b"I \xE2\x9D\xA4\xEF\xB8\x8F UTF-8!").unwrap()); +``` + +If you need detailed information on validation failures, use `simdutf8::compat::from_utf8()` +instead. + +```rust +use simdutf8-portable::compat::from_utf8; + +let err = from_utf8(b"I \xE2\x9D\xA4\xEF\xB8 UTF-8!").unwrap_err(); +assert_eq!(err.valid_up_to(), 5); +assert_eq!(err.error_len(), Some(2)); +``` + +## APIs + +See docs or [simdutf8](https://crates.io/crates/simdutf8). + +## Minimum Supported Rust Version (MSRV) + +Rust nightly as of xx xx + +## Architecture notes + +## Benchmarks + +## Thanks + +- to [Heinz N. Gies](https://github.com/licenser) for the initial portable SIMD implementation. +- to the authors of simdjson for coming up with the high-performance SIMD implementation and in + particular to Daniel Lemire for his feedback. It was very helpful. +- to the authors of the simdjson Rust port who did most of the heavy lifting of porting the C++ code + to Rust. + +## License + +This code is dual-licensed under the +[Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0.html) and the +[MIT License](https://opensource.org/licenses/MIT). + +It is based on code distributed with simd-json.rs, the Rust port of simdjson, which is dual-licensed +under the MIT license and Apache 2.0 license as well. + +simdjson itself is distributed under the Apache License 2.0. + +## References + +John Keiser, Daniel Lemire, +[Validating UTF-8 In Less Than One Instruction Per Byte](https://arxiv.org/abs/2010.03090), +Software: Practice and Experience 51 (5), 2021 From 9766fb6fd3aff1b4779adfded8a7f97353606479 Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 10 Nov 2024 08:47:50 +0100 Subject: [PATCH 79/83] README wip --- portable/README.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/portable/README.md b/portable/README.md index 4c8b63b5..9cd9136f 100644 --- a/portable/README.md +++ b/portable/README.md @@ -4,26 +4,28 @@ # simdutf8-portable – Fast UTF-8 validation using `core::simd` (portable SIMD) -Fast API-compatible UTF-8 validation for Rust using the experimental architecture-independan +Fast API-compatible UTF-8 validation for Rust using the experimental architecture-independant [`core::simd`](https://doc.rust-lang.org/core/simd/index.html) (portable SIMD) module from the standard library. An up-to-date nightly Rust compiler is required. The API and the algorithm used are the same as in the [simdutf8](https://crates.io/crates/simdutf8) crate. ## Features -- `#[forbid(unsafe_code)]` implementation +- no unsafe code (`#[forbid(unsafe_code)]`) in the implementation - `auto` module which selects the best implementation for known-good targets at compile-time including falling back to a scalar implementation if a fast SIMD implementation is not possible. -- Future-proof: The implementation is designed to be future-proof and will be updated as the - `core::simd` module evolves +- new platforms need no new code as long as they are supported by `core::simd`. - `no_std` support -- no unnecessary bounds checks in the compiled code (as of nightly-xx) - fast out of the box for `aarch64` and `wasm32` targets -- Features to force a specific implementation at compile-time -- Support 128-bit and 256-bit SIMD +- `force_simd256`, `force_simd128` and `force_fallback` crate features to force a specific + implementation at compile-time +- supports 128-bit and 256-bit SIMD +- There are no unnecessary bounds checks in the compiled code (as of nightly-xx) ## Limitations +- uses memcpy because of forbid(unsafe), see https://github.com/llvm/llvm-project/issues/87440 +- Zero-overhead abstractions are not so zero-overhead - target-feature - no runtime implementation selection - slower From 803b4bd9444686898b7cbf59c5c9d7afdf6474cb Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 10 Nov 2024 09:45:31 +0100 Subject: [PATCH 80/83] wasm32 simd128 support --- portable/src/implementation/mod.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/portable/src/implementation/mod.rs b/portable/src/implementation/mod.rs index 44228834..cb26a1a0 100644 --- a/portable/src/implementation/mod.rs +++ b/portable/src/implementation/mod.rs @@ -37,6 +37,11 @@ cfg_if::cfg_if! { target_feature = "neon" ))] { pub(crate) use simd::v128 as auto; + } else if #[cfg(all( + target_arch = "wasm32", + target_feature = "simd128" + ))] { + pub(crate) use simd::v128 as auto; } else { pub(crate) use fallback as auto; } From 7a587469a400e4d1774dbc4e50ddf38d2ea56bee Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Sun, 10 Nov 2024 09:45:48 +0100 Subject: [PATCH 81/83] README WIP --- portable/README.md | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/portable/README.md b/portable/README.md index 9cd9136f..1ababe3d 100644 --- a/portable/README.md +++ b/portable/README.md @@ -12,28 +12,39 @@ are the same as in the [simdutf8](https://crates.io/crates/simdutf8) crate. ## Features - no unsafe code (`#[forbid(unsafe_code)]`) in the implementation -- `auto` module which selects the best implementation for known-good targets at compile-time +- `auto` module which selects the best implemetation for known-good targets at compile-time including falling back to a scalar implementation if a fast SIMD implementation is not possible. - new platforms need no new code as long as they are supported by `core::simd`. - `no_std` support -- fast out of the box for `aarch64` and `wasm32` targets +- fast out of the box for targets which have SIMD features enabled by default such as `aarch64` - `force_simd256`, `force_simd128` and `force_fallback` crate features to force a specific implementation at compile-time - supports 128-bit and 256-bit SIMD -- There are no unnecessary bounds checks in the compiled code (as of nightly-xx) +- There are no unnecessary bounds checks in the compiled code, functions are properly inlined and + loops properly unrolled (as of nightly-xx) ## Limitations - uses memcpy because of forbid(unsafe), see https://github.com/llvm/llvm-project/issues/87440 -- Zero-overhead abstractions are not so zero-overhead -- target-feature +- target-feature required - no runtime implementation selection - slower - memcpy calls - swizzle_dyn - - slow on uncommon targets + - slow on non-special-cased targets - requires -Zbuild-std for sse4.2, avx2 support if not part of the target architecture +## Architecture notes + +| Architecture | [Platforms](https://doc.rust-lang.org/nightly/rustc/platform-support.html) | Performance | Notes | +| ------------ | -------------------------------------------------------------------------- | --------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| aarch64 | all | xx% of simdutf8 | works out of the box | +| x86_64 | all | xx% of simdutf8 | requires `-Zbuild-std` and `RUSTFLAGS="-C target-feature=+avx2"` or `RUSTFLAGS="-C target-feature=+sse4.2"` | +| wasm32 | all | (not tested) | requires `-Zbuild-std` and `RUSTFLAGS="-C target-feature=+simd128"` | +| armv7 | thumbv7neon‑\* | (not tested) | works out of the box | +| armv7 | others | (not tested) | requires `-Zbuild-std` and `RUSTFLAGS="-C target-feature=+neon"` | +| other | ... | bad | falls back to `core::str::from_utf8` unless `forcesimd128` or `forcesimd256` are used. Check [`swizzle_dyn` support](https://github.com/rust-lang/rust/blob/master/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs) before forcing. | + ## Quick start Add the dependency to your Cargo.toml file: From 79e883fca10652902a88378a6b31c486169a0b8f Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Wed, 27 Nov 2024 09:56:11 +0100 Subject: [PATCH 82/83] doc wip --- portable/README.md | 62 ++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/portable/README.md b/portable/README.md index 1ababe3d..6b65cb12 100644 --- a/portable/README.md +++ b/portable/README.md @@ -4,28 +4,24 @@ # simdutf8-portable – Fast UTF-8 validation using `core::simd` (portable SIMD) -Fast API-compatible UTF-8 validation for Rust using the experimental architecture-independant +Fast API-compatible UTF-8 validation for Rust using the experimental [`core::simd`](https://doc.rust-lang.org/core/simd/index.html) (portable SIMD) module from the -standard library. An up-to-date nightly Rust compiler is required. The API and the algorithm used -are the same as in the [simdutf8](https://crates.io/crates/simdutf8) crate. +standard library. An up-to-date nightly Rust compiler is required. The API and the algorithm are the +same as in the [simdutf8](https://crates.io/crates/simdutf8) crate. ## Features - no unsafe code (`#[forbid(unsafe_code)]`) in the implementation -- `auto` module which selects the best implemetation for known-good targets at compile-time - including falling back to a scalar implementation if a fast SIMD implementation is not possible. -- new platforms need no new code as long as they are supported by `core::simd`. +- good performance (with **caveats**, see [architecture notes](#Architecture-notes)) +- `auto` module which selects the best implementation for known-good targets at compile-time + including falling back to `core::str:from_utf8`. +- new platforms need no new code as long as they are supported by `core::simd` - `no_std` support -- fast out of the box for targets which have SIMD features enabled by default such as `aarch64` -- `force_simd256`, `force_simd128` and `force_fallback` crate features to force a specific - implementation at compile-time - supports 128-bit and 256-bit SIMD -- There are no unnecessary bounds checks in the compiled code, functions are properly inlined and - loops properly unrolled (as of nightly-xx) ## Limitations -- uses memcpy because of forbid(unsafe), see https://github.com/llvm/llvm-project/issues/87440 +The functions in `core::simd` are marked as `#[inline]`. This way they are compiled to MIR Code generation is deferred until they are actually used in a crate. - target-feature required - no runtime implementation selection - slower @@ -34,16 +30,26 @@ are the same as in the [simdutf8](https://crates.io/crates/simdutf8) crate. - slow on non-special-cased targets - requires -Zbuild-std for sse4.2, avx2 support if not part of the target architecture +## Performance + +- There are no unnecessary bounds checks in the compiled code, functions are properly inlined and + loops properly unrolled (as of nightly-xx). +- uses memcpy because of forbid(unsafe), see https://github.com/llvm/llvm-project/issues/87440 +- swizzle_dyn slower because of abstraction + ## Architecture notes -| Architecture | [Platforms](https://doc.rust-lang.org/nightly/rustc/platform-support.html) | Performance | Notes | -| ------------ | -------------------------------------------------------------------------- | --------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| aarch64 | all | xx% of simdutf8 | works out of the box | -| x86_64 | all | xx% of simdutf8 | requires `-Zbuild-std` and `RUSTFLAGS="-C target-feature=+avx2"` or `RUSTFLAGS="-C target-feature=+sse4.2"` | -| wasm32 | all | (not tested) | requires `-Zbuild-std` and `RUSTFLAGS="-C target-feature=+simd128"` | -| armv7 | thumbv7neon‑\* | (not tested) | works out of the box | -| armv7 | others | (not tested) | requires `-Zbuild-std` and `RUSTFLAGS="-C target-feature=+neon"` | -| other | ... | bad | falls back to `core::str::from_utf8` unless `forcesimd128` or `forcesimd256` are used. Check [`swizzle_dyn` support](https://github.com/rust-lang/rust/blob/master/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs) before forcing. | +| Architecture | [Targets](https://doc.rust-lang.org/nightly/rustc/platform-support.html) | Performance\* | Notes | +| --------------- | ------------------------------------------------------------------------ | --------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| aarch64 | all | xx% of simdutf8 | works out of the box | +| x86_64 (avx2) | all | xx% of simdutf8 | requires `-Zbuild-std` and `RUSTFLAGS="-C target-feature=+avx2"` | +| x86_64 (sse4.2) | all | xx% of simdutf8 | requires `-Zbuild-std` and `RUSTFLAGS="-C target-feature=+sse4.2"` | +| wasm32 | all | (not tested) | requires `-Zbuild-std` and `RUSTFLAGS="-C target-feature=+simd128"` | +| armv7 | thumbv7neon‑\* | (not tested) | works out of the box | +| armv7 | others | (not tested) | requires `-Zbuild-std` and `RUSTFLAGS="-C target-feature=+neon"` | +| other | ... | bad | falls back to `core::str::from_utf8` unless `forcesimd128` or `forcesimd256` are used. Check [`swizzle_dyn` support](https://github.com/rust-lang/rust/blob/master/library/portable-simd/crates/core_simd/src/swizzle_dyn.rs) before forcing. | + +\*) using `basic::from_utf8` ## Quick start @@ -51,7 +57,7 @@ Add the dependency to your Cargo.toml file: ```toml [dependencies] -simdutf8-portable = "0.01" +simdutf8-portable = "0.0.1" ``` Use `simdutf8-portable::basic::from_utf8()` as a drop-in replacement for `std::str::from_utf8()`. @@ -81,17 +87,13 @@ See docs or [simdutf8](https://crates.io/crates/simdutf8). Rust nightly as of xx xx -## Architecture notes - -## Benchmarks - ## Thanks -- to [Heinz N. Gies](https://github.com/licenser) for the initial portable SIMD implementation. -- to the authors of simdjson for coming up with the high-performance SIMD implementation and in - particular to Daniel Lemire for his feedback. It was very helpful. -- to the authors of the simdjson Rust port who did most of the heavy lifting of porting the C++ code - to Rust. +- to [Heinz N. Gies](https://github.com/licenser) for the initial portable SIMD implementation, +- to the authors of `core::simd` module, +- to the authors of simdjson for coming up with the high-performance SIMD implementation, +- to the authors of the simdjson Rust port who did most of the initial heavy lifting of porting the + C++ code to Rust. ## License From 565492be58b2aeff0b240d5cb572b7a393599aec Mon Sep 17 00:00:00 2001 From: Hans Kratz Date: Wed, 27 Nov 2024 10:30:16 +0100 Subject: [PATCH 83/83] edition 2024 --- portable/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/portable/Cargo.toml b/portable/Cargo.toml index 67920dac..601f03aa 100644 --- a/portable/Cargo.toml +++ b/portable/Cargo.toml @@ -2,7 +2,7 @@ name = "simdutf8-portable" version = "0.0.1" authors = ["Hans Kratz "] -edition = "2021" +edition = "2024" description = "SIMD-accelerated UTF-8 validation using core::simd (experimental)" documentation = "https://docs.rs/simdutf8-portable/" homepage = "https://github.com/rusticstuff/simdutf8/tree/main/portable"