From 34877866fa9502881d33e4333daede44f3cb476c Mon Sep 17 00:00:00 2001 From: Alex Jago <4475543+alexjago@users.noreply.github.com> Date: Wed, 20 Jul 2022 09:50:56 +1000 Subject: [PATCH 1/8] Add trait `FromBStrRadix` as draft for #64 --- src/from_bstr_radix.rs | 370 +++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 3 + 2 files changed, 373 insertions(+) create mode 100644 src/from_bstr_radix.rs diff --git a/src/from_bstr_radix.rs b/src/from_bstr_radix.rs new file mode 100644 index 0000000..e5a81b8 --- /dev/null +++ b/src/from_bstr_radix.rs @@ -0,0 +1,370 @@ +/** A trait which provides `from_bstr_radix()` for integer types. + +This acts like `from_str_radix`, including panicking if `radix` is not in [2, 32]. +However, there are a few minor differences to `from_str_radix`: +`src` is a `&BStr` and `radix` is the output type rather than always `u32`. +The result type is slightly different too. + ``` + use bstr::BStr; + use bstr::FromBStrRadix; + use core::num::IntErrorKind; + +for radix in 2..=36 { + let e = BStr::new(b""); + let empty = u8::from_bstr_radix(e, 10); + assert_eq!(empty, Err(IntErrorKind::Empty)); + + let a = BStr::new(b"11"); + let eleven = u8::from_bstr_radix(a, radix); + assert_eq!(eleven, Ok(radix + 1)); + + let bbb = BStr::new("111111111"); + let pos_overflow = u8::from_bstr_radix(bbb, radix); + assert_eq!(pos_overflow, Err(IntErrorKind::PosOverflow)); + + let ccc = BStr::new("-111"); + let neg_overflow = u8::from_bstr_radix(ccc, radix); + assert_eq!(neg_overflow, Err(IntErrorKind::InvalidDigit)); + + let radix = radix as i32; + let totally_fine = i32::from_bstr_radix(ccc, radix); + assert_eq!(totally_fine, Ok(-(radix*radix + radix + 1))); +} +``` +*/ + +pub trait FromBStrRadix +where + T: PartialOrd + + Copy + + core::ops::Add + + core::ops::Sub + + core::ops::Mul, +{ + type Output; + + fn from_bstr_radix( + src: &crate::BStr, + radix: Self::Output, + ) -> Result; +} + +macro_rules! make_from_bstr_radix { + ($t:ident) => { + impl FromBStrRadix<$t> for $t { + type Output = $t; + + fn from_bstr_radix( + src: &crate::BStr, + radix: $t, + ) -> Result<$t, core::num::IntErrorKind> { + // This more-or-less follows the stdlib implementation, but it's... simpler. + + assert!((2..=36).contains(&radix), "from_str_radix_int: must lie in the range `[2, 36]` - found {}", radix); + + if src.is_empty() { + return Err(core::num::IntErrorKind::Empty); + } + + // The stdlib implementation is that runs of `+` or `-` are invalid. + // So we need only consider the leading character. + let (start, is_neg) = if src[0] == b'-' {(1, true) } else if src[0] == b'+' {(1, false)} else {(0, false)}; + + // Leading negative on an unsigned type + if is_neg && $t::MIN == 0 { + return Err(core::num::IntErrorKind::InvalidDigit); + } + + // Input string was a single plus or minus + if start == 1 && src.len() == 1 { + return Err(core::num::IntErrorKind::InvalidDigit); + } + + // Items for manual determination of digit + let r = radix as u8; + let num_max = if r < 10 { r } else { 10 }; + let abc_max = if r < 10 { 0 } else { r - 10 }; + + // The accumulator + let mut acc: $t = 0; + + for i in start..src.len() { + let k = src[i]; + + let mul = acc.checked_mul(radix); + + let s : u8 = if k >= 48 && k < (48 + num_max) { + // 48: `0` in ASCII + (k - 48) + } else if k >= 65 && k < 65 + abc_max { + // 65: `A` in ASCII + (k - 65) + } else if k > 97 && k < 97 + abc_max { + // 97: `a` in ASCII + (k - 97) + } else { + return Err(core::num::IntErrorKind::InvalidDigit); + }; + + if is_neg { + if let Some(x) = mul.and_then(|m| m.checked_sub(s as $t)){ + acc = x; + } else { + return Err(core::num::IntErrorKind::NegOverflow); + } + } else { + if let Some(x) = mul.and_then(|m| m.checked_add(s as $t)) { + acc = x; + } else { + return Err(core::num::IntErrorKind::PosOverflow); + } + } + } + Ok(acc) + } + } + } +} + +make_from_bstr_radix!(u8); +make_from_bstr_radix!(u16); +make_from_bstr_radix!(u32); +make_from_bstr_radix!(u64); +make_from_bstr_radix!(u128); +make_from_bstr_radix!(usize); +make_from_bstr_radix!(i8); +make_from_bstr_radix!(i16); +make_from_bstr_radix!(i32); +make_from_bstr_radix!(i64); +make_from_bstr_radix!(i128); +make_from_bstr_radix!(isize); + +#[cfg(test)] +mod tests { + /*! + * Things tested: + * + * Zero + * Normal case `b"11"` (should always parse as `Ok(radix + 1)`) + * Too-long string ({number of bytes + 1} ones) (should be `PosOverflow`) + * Leading negative (should be OK for signed types, `InvalidDigit` for unsigned) + * Empty string (should be `Empty`) + * Leading double negative (should always be `InvalidDigit`) + * Leading positive (should always parse OK) + * Leading double positive (should always be `InvalidDigit`) + * Signed empty string (should be `InvalidDigit`) + * + * MIN and MAX round-trip (done in base 10 only, because `to_string` assumes that) + * + */ + + use super::*; + use crate::BStr; + use crate::BString; + use core::num::IntErrorKind; + + #[test] + fn all_radices_u8() { + for radix in 2..=36 { + let z = BStr::new(b"0"); + assert_eq!(u8::from_bstr_radix(z, radix), Ok(0)); + + let a = BStr::new(b"11"); + let eleven = u8::from_bstr_radix(a, radix); + assert_eq!(eleven, Ok(radix + 1)); + + let b = BStr::new(b"111111111"); + let pos_overflow = u8::from_bstr_radix(b, radix); + assert_eq!(pos_overflow, Err(IntErrorKind::PosOverflow)); + + let c = BStr::new(b"-11"); + let negatory = u8::from_bstr_radix(c, radix); + assert_eq!(negatory, Err(IntErrorKind::InvalidDigit)); + + let d = BStr::new(b"--11"); + let two_wrongs = u8::from_bstr_radix(d, radix); + assert_eq!(two_wrongs, Err(IntErrorKind::InvalidDigit)); + + let e = BStr::new(b""); + let empty = u8::from_bstr_radix(e, radix); + assert_eq!(empty, Err(IntErrorKind::Empty)); + + let f = BStr::new(b"+11"); + assert_eq!(u8::from_bstr_radix(f, radix), Ok(radix + 1)); + + let g = BStr::new(b"++11"); + assert_eq!( + u8::from_bstr_radix(g, radix), + Err(IntErrorKind::InvalidDigit) + ); + + let i = BStr::new("+"); + assert_eq!( + u8::from_bstr_radix(i, radix), + Err(IntErrorKind::InvalidDigit) + ); + } + // this only stringifies in base 10 + let min = BString::from(u8::MIN.to_string()); + assert_eq!(u8::from_bstr_radix(min.as_bstr(), 10), Ok(u8::MIN)); + let max = BString::from(u8::MAX.to_string()); + assert_eq!(u8::from_bstr_radix(max.as_bstr(), 10), Ok(u8::MAX)); + } + + #[test] + fn all_radices_u64() { + for radix in 2..=36 { + let z = BStr::new(b"0"); + assert_eq!(u64::from_bstr_radix(z, radix), Ok(0)); + + let a = BStr::new(b"11"); + let eleven = u64::from_bstr_radix(a, radix); + assert_eq!(eleven, Ok(radix + 1)); + + let b = BStr::new(b"11111111111111111111111111111111111111111111111111111111111111111"); + let pos_overflow = u64::from_bstr_radix(b, radix); + assert_eq!(pos_overflow, Err(IntErrorKind::PosOverflow)); + + let c = BStr::new(b"-11"); + let negatory = u64::from_bstr_radix(c, radix); + assert_eq!(negatory, Err(IntErrorKind::InvalidDigit)); + + let d = BStr::new(b"--11"); + let two_wrongs = u64::from_bstr_radix(d, radix); + assert_eq!(two_wrongs, Err(IntErrorKind::InvalidDigit)); + + let e = BStr::new(b""); + let empty = u64::from_bstr_radix(e, radix); + assert_eq!(empty, Err(IntErrorKind::Empty)); + + let f = BStr::new(b"+11"); + assert_eq!(u64::from_bstr_radix(f, radix), Ok(radix + 1)); + + let g = BStr::new(b"++11"); + assert_eq!( + u64::from_bstr_radix(g, radix), + Err(IntErrorKind::InvalidDigit) + ); + + let i = BStr::new("+"); + assert_eq!( + u64::from_bstr_radix(i, radix), + Err(IntErrorKind::InvalidDigit) + ); + } + // this only stringifies in base 10 + let min = BString::from(u64::MIN.to_string()); + assert_eq!(u64::from_bstr_radix(min.as_bstr(), 10).unwrap(), u64::MIN); + let max = BString::from(u64::MAX.to_string()); + assert_eq!(u64::from_bstr_radix(max.as_bstr(), 10).unwrap(), u64::MAX); + } + + #[test] + fn all_radices_i8() { + for radix in 2..=36 { + let z = BStr::new(b"0"); + assert_eq!(i8::from_bstr_radix(z, radix), Ok(0)); + + let a = BStr::new(b"11"); + let eleven = i8::from_bstr_radix(a, radix); + assert_eq!(eleven, Ok(radix + 1)); + + let b = BStr::new(b"111111111"); + let pos_overflow = i8::from_bstr_radix(b, radix); + assert_eq!(pos_overflow, Err(IntErrorKind::PosOverflow)); + + let c = BStr::new(b"-11"); + let totally_fine = i8::from_bstr_radix(c, radix); + assert_eq!(totally_fine, Ok(-(radix + 1))); + + let d = BStr::new(b"--11"); + let two_wrongs = i8::from_bstr_radix(d, radix); + assert_eq!(two_wrongs, Err(IntErrorKind::InvalidDigit)); + + let e = BStr::new(b""); + let empty = i8::from_bstr_radix(e, radix); + assert_eq!(empty, Err(IntErrorKind::Empty)); + + let f = BStr::new(b"+11"); + assert_eq!(i8::from_bstr_radix(f, radix), Ok(radix + 1)); + + let g = BStr::new(b"++11"); + assert_eq!( + i8::from_bstr_radix(g, radix), + Err(IntErrorKind::InvalidDigit) + ); + + let h = BStr::new(b"-111111111"); + assert_eq!( + i8::from_bstr_radix(h, radix).unwrap_err(), + IntErrorKind::NegOverflow + ); + } + // this only stringifies in base 10 + let min = BString::from(i8::MIN.to_string()); + assert_eq!(i8::from_bstr_radix(min.as_bstr(), 10).unwrap(), i8::MIN); + let max = BString::from(i8::MAX.to_string()); + assert_eq!(i8::from_bstr_radix(max.as_bstr(), 10).unwrap(), i8::MAX); + } + + #[test] + fn all_radices_i64() { + for radix in 2..=36 { + let z = BStr::new(b"0"); + assert_eq!(i64::from_bstr_radix(z, radix), Ok(0)); + + let a = BStr::new(b"11"); + let eleven = i64::from_bstr_radix(a, radix); + assert_eq!(eleven, Ok(radix + 1)); + + let b = BStr::new(b"11111111111111111111111111111111111111111111111111111111111111111"); + let pos_overflow = i64::from_bstr_radix(b, radix); + assert_eq!(pos_overflow, Err(IntErrorKind::PosOverflow)); + + let c = BStr::new(b"-11"); + let totally_fine = i64::from_bstr_radix(c, radix); + assert_eq!(totally_fine, Ok(-(radix + 1))); + + let d = BStr::new(b"--11"); + let two_wrongs = i64::from_bstr_radix(d, radix); + assert_eq!(two_wrongs, Err(IntErrorKind::InvalidDigit)); + + let e = BStr::new(b""); + let empty = i64::from_bstr_radix(e, radix); + assert_eq!(empty, Err(IntErrorKind::Empty)); + + let f = BStr::new(b"+11"); + assert_eq!(i64::from_bstr_radix(f, radix), Ok(radix + 1)); + + let g = BStr::new(b"++11"); + assert_eq!( + i64::from_bstr_radix(g, radix), + Err(IntErrorKind::InvalidDigit) + ); + + let h = BStr::new(b"-11111111111111111111111111111111111111111111111111111111111111111"); + assert_eq!( + i64::from_bstr_radix(h, radix).unwrap_err(), + IntErrorKind::NegOverflow + ); + + let i = BStr::new("+"); + assert_eq!( + i64::from_bstr_radix(i, radix), + Err(IntErrorKind::InvalidDigit) + ); + + let j = BStr::new("-"); + assert_eq!( + i64::from_bstr_radix(j, radix), + Err(IntErrorKind::InvalidDigit) + ); + } + + // this only stringifies in base 10 + let min = BString::from(i64::MIN.to_string()); + assert_eq!(i64::from_bstr_radix(min.as_bstr(), 10).unwrap(), i64::MIN); + let max = BString::from(i64::MAX.to_string()); + assert_eq!(i64::from_bstr_radix(max.as_bstr(), 10).unwrap(), i64::MAX); + } +} diff --git a/src/lib.rs b/src/lib.rs index 5adc903..e8ecbcb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -398,6 +398,8 @@ pub use crate::utf8::{ Chars, Utf8Chunk, Utf8Chunks, Utf8Error, }; +pub use crate::from_bstr_radix::FromBStrRadix; + mod ascii; mod bstr; #[cfg(feature = "alloc")] @@ -406,6 +408,7 @@ mod byteset; mod ext_slice; #[cfg(feature = "alloc")] mod ext_vec; +mod from_bstr_radix; mod impls; #[cfg(feature = "std")] pub mod io; From a4771879cb8130485493d637d2ef5814ac757d44 Mon Sep 17 00:00:00 2001 From: Alex Jago <4475543+alexjago@users.noreply.github.com> Date: Wed, 20 Jul 2022 12:08:05 +1000 Subject: [PATCH 2/8] Swap error handling over to ParseIntError approach --- src/from_bstr_radix.rs | 296 ++++++++++++++++++++--------------------- src/lib.rs | 2 +- 2 files changed, 146 insertions(+), 152 deletions(-) diff --git a/src/from_bstr_radix.rs b/src/from_bstr_radix.rs index e5a81b8..e27b36e 100644 --- a/src/from_bstr_radix.rs +++ b/src/from_bstr_radix.rs @@ -4,35 +4,50 @@ This acts like `from_str_radix`, including panicking if `radix` is not in [2, 32 However, there are a few minor differences to `from_str_radix`: `src` is a `&BStr` and `radix` is the output type rather than always `u32`. The result type is slightly different too. - ``` - use bstr::BStr; - use bstr::FromBStrRadix; - use core::num::IntErrorKind; +``` +use bstr::{BStr, FromBStrRadix, IntErrorKind}; for radix in 2..=36 { let e = BStr::new(b""); let empty = u8::from_bstr_radix(e, 10); - assert_eq!(empty, Err(IntErrorKind::Empty)); + assert_eq!(empty.unwrap_err().kind(), &IntErrorKind::Empty); let a = BStr::new(b"11"); let eleven = u8::from_bstr_radix(a, radix); assert_eq!(eleven, Ok(radix + 1)); - let bbb = BStr::new("111111111"); - let pos_overflow = u8::from_bstr_radix(bbb, radix); - assert_eq!(pos_overflow, Err(IntErrorKind::PosOverflow)); + let b = BStr::new("111111111"); + let pos_overflow = u8::from_bstr_radix(b, radix); + assert_eq!(pos_overflow.unwrap_err().kind(), &IntErrorKind::PosOverflow); - let ccc = BStr::new("-111"); - let neg_overflow = u8::from_bstr_radix(ccc, radix); - assert_eq!(neg_overflow, Err(IntErrorKind::InvalidDigit)); + let c = BStr::new("-111"); + let negatory = u8::from_bstr_radix(c, radix); + assert_eq!(negatory.unwrap_err().kind(), &IntErrorKind::InvalidDigit); let radix = radix as i32; - let totally_fine = i32::from_bstr_radix(ccc, radix); + let totally_fine = i32::from_bstr_radix(c, radix); assert_eq!(totally_fine, Ok(-(radix*radix + radix + 1))); } ``` -*/ +The `NonZero` versions of integers are not currently supported for parsing. +Instead, please parse the equivalent possibly-zero integer, then convert: +``` +use core::num::NonZeroU8; +use bstr::{BStr, FromBStrRadix}; + +let a = BStr::new(b"11"); +let eleven = u8::from_bstr_radix(a, 10).ok().and_then(NonZeroU8::new); +assert_eq!(eleven, NonZeroU8::new(11)); + +let zero = BStr::new(b"0"); +let nada = u8::from_bstr_radix(zero, 10).ok().and_then(NonZeroU8::new); +assert_eq!(nada, None); +``` + + + +*/ pub trait FromBStrRadix where T: PartialOrd @@ -41,29 +56,98 @@ where + core::ops::Sub + core::ops::Mul, { - type Output; + /// Whatever integer type is being parsed. + type Integer; fn from_bstr_radix( src: &crate::BStr, - radix: Self::Output, - ) -> Result; + radix: Self::Integer, + ) -> Result; +} + +// ParseIntError and impl is almost entirely copy-pasted from the standard library + +/// Represents an error in parsing. +/// +/// See [`IntErrorKind`] for a list of possible causes. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ParseIntError { + pub(super) kind: IntErrorKind, } +impl ParseIntError { + pub fn kind(&self) -> &IntErrorKind { + &self.kind + } + #[doc(hidden)] + pub fn __description(&self) -> &str { + match self.kind { + IntErrorKind::Empty => "cannot parse integer from empty string", + IntErrorKind::InvalidDigit => "invalid digit found in string", + IntErrorKind::PosOverflow => { + "number too large to fit in target type" + } + IntErrorKind::NegOverflow => { + "number too small to fit in target type" + } + IntErrorKind::Zero => "number would be zero for non-zero type", + } + } +} + +impl std::fmt::Display for ParseIntError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.__description().fmt(f) + } +} + +/// Enum to store the various types of errors that can cause parsing an integer to fail. +/// +/// Polyfill for post-1.55 [`core::num::IntErrorKind`]; that can just be re-used post-MSRV bump. +#[derive(Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum IntErrorKind { + /// Value being parsed is empty. + /// + /// This variant will be constructed when parsing an empty string. + Empty, + /// Contains an invalid digit in its context. + /// + /// Among other causes, this variant will be constructed when parsing a string that + /// contains a non-ASCII char. + /// + /// This variant is also constructed when a `+` or `-` is misplaced within a string + /// either on its own or in the middle of a number. + InvalidDigit, + /// Integer is too large to store in target integer type. + PosOverflow, + /// Integer is too small to store in target integer type. + NegOverflow, + /// Value was Zero + /// + /// This variant will be emitted when the parsing string has a value of zero, which + /// would be illegal for non-zero types. + Zero, +} + +// TODO: once crate MSRV >= 1.55, just re-export this. +// pub use core::num::IntErrorKind; + macro_rules! make_from_bstr_radix { ($t:ident) => { impl FromBStrRadix<$t> for $t { - type Output = $t; + type Integer = $t; fn from_bstr_radix( src: &crate::BStr, radix: $t, - ) -> Result<$t, core::num::IntErrorKind> { + ) -> Result<$t, crate::ParseIntError> { // This more-or-less follows the stdlib implementation, but it's... simpler. assert!((2..=36).contains(&radix), "from_str_radix_int: must lie in the range `[2, 36]` - found {}", radix); if src.is_empty() { - return Err(core::num::IntErrorKind::Empty); + return Err(crate::ParseIntError{kind: crate::IntErrorKind::Empty}); } // The stdlib implementation is that runs of `+` or `-` are invalid. @@ -72,12 +156,12 @@ macro_rules! make_from_bstr_radix { // Leading negative on an unsigned type if is_neg && $t::MIN == 0 { - return Err(core::num::IntErrorKind::InvalidDigit); + return Err(crate::ParseIntError{kind: crate::IntErrorKind::InvalidDigit}); } // Input string was a single plus or minus if start == 1 && src.len() == 1 { - return Err(core::num::IntErrorKind::InvalidDigit); + return Err(crate::ParseIntError{kind: crate::IntErrorKind::InvalidDigit}); } // Items for manual determination of digit @@ -103,23 +187,24 @@ macro_rules! make_from_bstr_radix { // 97: `a` in ASCII (k - 97) } else { - return Err(core::num::IntErrorKind::InvalidDigit); + return Err(crate::ParseIntError{kind: crate::IntErrorKind::InvalidDigit}); }; if is_neg { if let Some(x) = mul.and_then(|m| m.checked_sub(s as $t)){ acc = x; } else { - return Err(core::num::IntErrorKind::NegOverflow); + return Err(crate::ParseIntError{kind: crate::IntErrorKind::NegOverflow}); } } else { if let Some(x) = mul.and_then(|m| m.checked_add(s as $t)) { acc = x; } else { - return Err(core::num::IntErrorKind::PosOverflow); + return Err(crate::ParseIntError{kind: crate::IntErrorKind::PosOverflow}); } } } + Ok(acc) } } @@ -132,6 +217,7 @@ make_from_bstr_radix!(u32); make_from_bstr_radix!(u64); make_from_bstr_radix!(u128); make_from_bstr_radix!(usize); + make_from_bstr_radix!(i8); make_from_bstr_radix!(i16); make_from_bstr_radix!(i32); @@ -139,6 +225,8 @@ make_from_bstr_radix!(i64); make_from_bstr_radix!(i128); make_from_bstr_radix!(isize); +// NOTE: once MSRV exceeds 1.64 it should be possible to implement everything for the NonZero types too. + #[cfg(test)] mod tests { /*! @@ -161,10 +249,10 @@ mod tests { use super::*; use crate::BStr; use crate::BString; - use core::num::IntErrorKind; + use crate::IntErrorKind; #[test] - fn all_radices_u8() { + fn test_parse_u8() { for radix in 2..=36 { let z = BStr::new(b"0"); assert_eq!(u8::from_bstr_radix(z, radix), Ok(0)); @@ -175,33 +263,42 @@ mod tests { let b = BStr::new(b"111111111"); let pos_overflow = u8::from_bstr_radix(b, radix); - assert_eq!(pos_overflow, Err(IntErrorKind::PosOverflow)); + assert_eq!( + pos_overflow.unwrap_err().kind(), + &IntErrorKind::PosOverflow + ); let c = BStr::new(b"-11"); let negatory = u8::from_bstr_radix(c, radix); - assert_eq!(negatory, Err(IntErrorKind::InvalidDigit)); + assert_eq!( + negatory.unwrap_err().kind(), + &IntErrorKind::InvalidDigit + ); let d = BStr::new(b"--11"); let two_wrongs = u8::from_bstr_radix(d, radix); - assert_eq!(two_wrongs, Err(IntErrorKind::InvalidDigit)); + assert_eq!( + two_wrongs.unwrap_err().kind(), + &IntErrorKind::InvalidDigit + ); let e = BStr::new(b""); let empty = u8::from_bstr_radix(e, radix); - assert_eq!(empty, Err(IntErrorKind::Empty)); + assert_eq!(empty.unwrap_err().kind(), &IntErrorKind::Empty); let f = BStr::new(b"+11"); assert_eq!(u8::from_bstr_radix(f, radix), Ok(radix + 1)); let g = BStr::new(b"++11"); assert_eq!( - u8::from_bstr_radix(g, radix), - Err(IntErrorKind::InvalidDigit) + u8::from_bstr_radix(g, radix).unwrap_err().kind(), + &IntErrorKind::InvalidDigit ); let i = BStr::new("+"); assert_eq!( - u8::from_bstr_radix(i, radix), - Err(IntErrorKind::InvalidDigit) + u8::from_bstr_radix(i, radix).unwrap_err().kind(), + &IntErrorKind::InvalidDigit ); } // this only stringifies in base 10 @@ -212,55 +309,7 @@ mod tests { } #[test] - fn all_radices_u64() { - for radix in 2..=36 { - let z = BStr::new(b"0"); - assert_eq!(u64::from_bstr_radix(z, radix), Ok(0)); - - let a = BStr::new(b"11"); - let eleven = u64::from_bstr_radix(a, radix); - assert_eq!(eleven, Ok(radix + 1)); - - let b = BStr::new(b"11111111111111111111111111111111111111111111111111111111111111111"); - let pos_overflow = u64::from_bstr_radix(b, radix); - assert_eq!(pos_overflow, Err(IntErrorKind::PosOverflow)); - - let c = BStr::new(b"-11"); - let negatory = u64::from_bstr_radix(c, radix); - assert_eq!(negatory, Err(IntErrorKind::InvalidDigit)); - - let d = BStr::new(b"--11"); - let two_wrongs = u64::from_bstr_radix(d, radix); - assert_eq!(two_wrongs, Err(IntErrorKind::InvalidDigit)); - - let e = BStr::new(b""); - let empty = u64::from_bstr_radix(e, radix); - assert_eq!(empty, Err(IntErrorKind::Empty)); - - let f = BStr::new(b"+11"); - assert_eq!(u64::from_bstr_radix(f, radix), Ok(radix + 1)); - - let g = BStr::new(b"++11"); - assert_eq!( - u64::from_bstr_radix(g, radix), - Err(IntErrorKind::InvalidDigit) - ); - - let i = BStr::new("+"); - assert_eq!( - u64::from_bstr_radix(i, radix), - Err(IntErrorKind::InvalidDigit) - ); - } - // this only stringifies in base 10 - let min = BString::from(u64::MIN.to_string()); - assert_eq!(u64::from_bstr_radix(min.as_bstr(), 10).unwrap(), u64::MIN); - let max = BString::from(u64::MAX.to_string()); - assert_eq!(u64::from_bstr_radix(max.as_bstr(), 10).unwrap(), u64::MAX); - } - - #[test] - fn all_radices_i8() { + fn test_parse_i8() { for radix in 2..=36 { let z = BStr::new(b"0"); assert_eq!(i8::from_bstr_radix(z, radix), Ok(0)); @@ -271,7 +320,10 @@ mod tests { let b = BStr::new(b"111111111"); let pos_overflow = i8::from_bstr_radix(b, radix); - assert_eq!(pos_overflow, Err(IntErrorKind::PosOverflow)); + assert_eq!( + pos_overflow.unwrap_err().kind(), + &IntErrorKind::PosOverflow + ); let c = BStr::new(b"-11"); let totally_fine = i8::from_bstr_radix(c, radix); @@ -279,25 +331,28 @@ mod tests { let d = BStr::new(b"--11"); let two_wrongs = i8::from_bstr_radix(d, radix); - assert_eq!(two_wrongs, Err(IntErrorKind::InvalidDigit)); + assert_eq!( + two_wrongs.unwrap_err().kind(), + &IntErrorKind::InvalidDigit + ); let e = BStr::new(b""); let empty = i8::from_bstr_radix(e, radix); - assert_eq!(empty, Err(IntErrorKind::Empty)); + assert_eq!(empty.unwrap_err().kind(), &IntErrorKind::Empty); let f = BStr::new(b"+11"); assert_eq!(i8::from_bstr_radix(f, radix), Ok(radix + 1)); let g = BStr::new(b"++11"); assert_eq!( - i8::from_bstr_radix(g, radix), - Err(IntErrorKind::InvalidDigit) + i8::from_bstr_radix(g, radix).unwrap_err().kind(), + &IntErrorKind::InvalidDigit ); let h = BStr::new(b"-111111111"); assert_eq!( - i8::from_bstr_radix(h, radix).unwrap_err(), - IntErrorKind::NegOverflow + i8::from_bstr_radix(h, radix).unwrap_err().kind(), + &IntErrorKind::NegOverflow ); } // this only stringifies in base 10 @@ -306,65 +361,4 @@ mod tests { let max = BString::from(i8::MAX.to_string()); assert_eq!(i8::from_bstr_radix(max.as_bstr(), 10).unwrap(), i8::MAX); } - - #[test] - fn all_radices_i64() { - for radix in 2..=36 { - let z = BStr::new(b"0"); - assert_eq!(i64::from_bstr_radix(z, radix), Ok(0)); - - let a = BStr::new(b"11"); - let eleven = i64::from_bstr_radix(a, radix); - assert_eq!(eleven, Ok(radix + 1)); - - let b = BStr::new(b"11111111111111111111111111111111111111111111111111111111111111111"); - let pos_overflow = i64::from_bstr_radix(b, radix); - assert_eq!(pos_overflow, Err(IntErrorKind::PosOverflow)); - - let c = BStr::new(b"-11"); - let totally_fine = i64::from_bstr_radix(c, radix); - assert_eq!(totally_fine, Ok(-(radix + 1))); - - let d = BStr::new(b"--11"); - let two_wrongs = i64::from_bstr_radix(d, radix); - assert_eq!(two_wrongs, Err(IntErrorKind::InvalidDigit)); - - let e = BStr::new(b""); - let empty = i64::from_bstr_radix(e, radix); - assert_eq!(empty, Err(IntErrorKind::Empty)); - - let f = BStr::new(b"+11"); - assert_eq!(i64::from_bstr_radix(f, radix), Ok(radix + 1)); - - let g = BStr::new(b"++11"); - assert_eq!( - i64::from_bstr_radix(g, radix), - Err(IntErrorKind::InvalidDigit) - ); - - let h = BStr::new(b"-11111111111111111111111111111111111111111111111111111111111111111"); - assert_eq!( - i64::from_bstr_radix(h, radix).unwrap_err(), - IntErrorKind::NegOverflow - ); - - let i = BStr::new("+"); - assert_eq!( - i64::from_bstr_radix(i, radix), - Err(IntErrorKind::InvalidDigit) - ); - - let j = BStr::new("-"); - assert_eq!( - i64::from_bstr_radix(j, radix), - Err(IntErrorKind::InvalidDigit) - ); - } - - // this only stringifies in base 10 - let min = BString::from(i64::MIN.to_string()); - assert_eq!(i64::from_bstr_radix(min.as_bstr(), 10).unwrap(), i64::MIN); - let max = BString::from(i64::MAX.to_string()); - assert_eq!(i64::from_bstr_radix(max.as_bstr(), 10).unwrap(), i64::MAX); - } } diff --git a/src/lib.rs b/src/lib.rs index e8ecbcb..0f2b251 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -398,7 +398,7 @@ pub use crate::utf8::{ Chars, Utf8Chunk, Utf8Chunks, Utf8Error, }; -pub use crate::from_bstr_radix::FromBStrRadix; +pub use crate::from_bstr_radix::{FromBStrRadix, IntErrorKind, ParseIntError}; mod ascii; mod bstr; From fafe48894b4f15409d8b84cb1529cfb16ecde7d5 Mon Sep 17 00:00:00 2001 From: Alex Jago <4475543+alexjago@users.noreply.github.com> Date: Wed, 20 Jul 2022 13:02:10 +1000 Subject: [PATCH 3/8] Rename to FromBytesRadix and take an AsRef<[u8]> --- ...from_bstr_radix.rs => from_bytes_radix.rs} | 210 +++++++++--------- src/lib.rs | 6 +- 2 files changed, 111 insertions(+), 105 deletions(-) rename src/{from_bstr_radix.rs => from_bytes_radix.rs} (60%) diff --git a/src/from_bstr_radix.rs b/src/from_bytes_radix.rs similarity index 60% rename from src/from_bstr_radix.rs rename to src/from_bytes_radix.rs index e27b36e..8281153 100644 --- a/src/from_bstr_radix.rs +++ b/src/from_bytes_radix.rs @@ -1,31 +1,31 @@ -/** A trait which provides `from_bstr_radix()` for integer types. +/** A trait which provides `from_bytes_radix()` for integer types. This acts like `from_str_radix`, including panicking if `radix` is not in [2, 32]. However, there are a few minor differences to `from_str_radix`: `src` is a `&BStr` and `radix` is the output type rather than always `u32`. The result type is slightly different too. ``` -use bstr::{BStr, FromBStrRadix, IntErrorKind}; +use bstr::{BStr, FromBytesRadix, IntErrorKind}; for radix in 2..=36 { let e = BStr::new(b""); - let empty = u8::from_bstr_radix(e, 10); + let empty = u8::from_bytes_radix(&e, 10); assert_eq!(empty.unwrap_err().kind(), &IntErrorKind::Empty); let a = BStr::new(b"11"); - let eleven = u8::from_bstr_radix(a, radix); + let eleven = u8::from_bytes_radix(&a, radix); assert_eq!(eleven, Ok(radix + 1)); let b = BStr::new("111111111"); - let pos_overflow = u8::from_bstr_radix(b, radix); + let pos_overflow = u8::from_bytes_radix(&b, radix); assert_eq!(pos_overflow.unwrap_err().kind(), &IntErrorKind::PosOverflow); let c = BStr::new("-111"); - let negatory = u8::from_bstr_radix(c, radix); + let negatory = u8::from_bytes_radix(&c, radix); assert_eq!(negatory.unwrap_err().kind(), &IntErrorKind::InvalidDigit); let radix = radix as i32; - let totally_fine = i32::from_bstr_radix(c, radix); + let totally_fine = i32::from_bytes_radix(&c, radix); assert_eq!(totally_fine, Ok(-(radix*radix + radix + 1))); } ``` @@ -34,21 +34,21 @@ The `NonZero` versions of integers are not currently supported for parsing. Instead, please parse the equivalent possibly-zero integer, then convert: ``` use core::num::NonZeroU8; -use bstr::{BStr, FromBStrRadix}; +use bstr::{BStr, FromBytesRadix}; let a = BStr::new(b"11"); -let eleven = u8::from_bstr_radix(a, 10).ok().and_then(NonZeroU8::new); +let eleven = u8::from_bytes_radix(&a, 10).ok().and_then(NonZeroU8::new); assert_eq!(eleven, NonZeroU8::new(11)); let zero = BStr::new(b"0"); -let nada = u8::from_bstr_radix(zero, 10).ok().and_then(NonZeroU8::new); +let nada = u8::from_bytes_radix(&zero, 10).ok().and_then(NonZeroU8::new); assert_eq!(nada, None); ``` */ -pub trait FromBStrRadix +pub trait FromBytesRadix where T: PartialOrd + Copy @@ -59,8 +59,8 @@ where /// Whatever integer type is being parsed. type Integer; - fn from_bstr_radix( - src: &crate::BStr, + fn from_bytes_radix( + src: &dyn AsRef<[u8]>, radix: Self::Integer, ) -> Result; } @@ -133,97 +133,101 @@ pub enum IntErrorKind { // TODO: once crate MSRV >= 1.55, just re-export this. // pub use core::num::IntErrorKind; -macro_rules! make_from_bstr_radix { +macro_rules! make_from_bytes_radix { ($t:ident) => { - impl FromBStrRadix<$t> for $t { + impl FromBytesRadix<$t> for $t { type Integer = $t; - fn from_bstr_radix( - src: &crate::BStr, + fn from_bytes_radix( + src: &dyn AsRef<[u8]>, radix: $t, - ) -> Result<$t, crate::ParseIntError> { - // This more-or-less follows the stdlib implementation, but it's... simpler. + ) -> Result<$t, crate::ParseIntError> + { + // This more-or-less follows the stdlib implementation. assert!((2..=36).contains(&radix), "from_str_radix_int: must lie in the range `[2, 36]` - found {}", radix); - if src.is_empty() { - return Err(crate::ParseIntError{kind: crate::IntErrorKind::Empty}); - } + let src = src.as_ref(); - // The stdlib implementation is that runs of `+` or `-` are invalid. - // So we need only consider the leading character. - let (start, is_neg) = if src[0] == b'-' {(1, true) } else if src[0] == b'+' {(1, false)} else {(0, false)}; + if let Some(s0) = src.get(0) { + // The stdlib implementation is that runs of `+` or `-` are invalid. + // So we need only consider the leading character. + let (start, is_neg) = if *s0 == b'-' {(1, true) } else if *s0 == b'+' {(1, false)} else {(0, false)}; - // Leading negative on an unsigned type - if is_neg && $t::MIN == 0 { - return Err(crate::ParseIntError{kind: crate::IntErrorKind::InvalidDigit}); - } - - // Input string was a single plus or minus - if start == 1 && src.len() == 1 { - return Err(crate::ParseIntError{kind: crate::IntErrorKind::InvalidDigit}); - } + // Leading negative on an unsigned type + if is_neg && $t::MIN == 0 { + return Err(crate::ParseIntError{kind: crate::IntErrorKind::InvalidDigit}); + } - // Items for manual determination of digit - let r = radix as u8; - let num_max = if r < 10 { r } else { 10 }; - let abc_max = if r < 10 { 0 } else { r - 10 }; - - // The accumulator - let mut acc: $t = 0; - - for i in start..src.len() { - let k = src[i]; - - let mul = acc.checked_mul(radix); - - let s : u8 = if k >= 48 && k < (48 + num_max) { - // 48: `0` in ASCII - (k - 48) - } else if k >= 65 && k < 65 + abc_max { - // 65: `A` in ASCII - (k - 65) - } else if k > 97 && k < 97 + abc_max { - // 97: `a` in ASCII - (k - 97) - } else { + // Input string was a single plus or minus + if start == 1 && src.len() == 1 { return Err(crate::ParseIntError{kind: crate::IntErrorKind::InvalidDigit}); - }; + } + + // Items for manual determination of digit + let r = radix as u8; + let num_max = if r < 10 { r } else { 10 }; + let abc_max = if r < 10 { 0 } else { r - 10 }; - if is_neg { - if let Some(x) = mul.and_then(|m| m.checked_sub(s as $t)){ - acc = x; + // The accumulator + let mut acc: $t = 0; + + for i in start..src.len() { + let k = src[i]; + + let mul = acc.checked_mul(radix); + + let s : u8 = if k >= 48 && k < (48 + num_max) { + // 48: `0` in ASCII + (k - 48) + } else if k >= 65 && k < 65 + abc_max { + // 65: `A` in ASCII + (k - 65) + } else if k > 97 && k < 97 + abc_max { + // 97: `a` in ASCII + (k - 97) } else { - return Err(crate::ParseIntError{kind: crate::IntErrorKind::NegOverflow}); - } - } else { - if let Some(x) = mul.and_then(|m| m.checked_add(s as $t)) { - acc = x; + return Err(crate::ParseIntError{kind: crate::IntErrorKind::InvalidDigit}); + }; + + if is_neg { + if let Some(x) = mul.and_then(|m| m.checked_sub(s as $t)){ + acc = x; + } else { + return Err(crate::ParseIntError{kind: crate::IntErrorKind::NegOverflow}); + } } else { - return Err(crate::ParseIntError{kind: crate::IntErrorKind::PosOverflow}); + if let Some(x) = mul.and_then(|m| m.checked_add(s as $t)) { + acc = x; + } else { + return Err(crate::ParseIntError{kind: crate::IntErrorKind::PosOverflow}); + } } } - } - Ok(acc) + Ok(acc) + + } else { + return Err(crate::ParseIntError{kind: crate::IntErrorKind::Empty}); + } } } } } -make_from_bstr_radix!(u8); -make_from_bstr_radix!(u16); -make_from_bstr_radix!(u32); -make_from_bstr_radix!(u64); -make_from_bstr_radix!(u128); -make_from_bstr_radix!(usize); +make_from_bytes_radix!(u8); +make_from_bytes_radix!(u16); +make_from_bytes_radix!(u32); +make_from_bytes_radix!(u64); +make_from_bytes_radix!(u128); +make_from_bytes_radix!(usize); -make_from_bstr_radix!(i8); -make_from_bstr_radix!(i16); -make_from_bstr_radix!(i32); -make_from_bstr_radix!(i64); -make_from_bstr_radix!(i128); -make_from_bstr_radix!(isize); +make_from_bytes_radix!(i8); +make_from_bytes_radix!(i16); +make_from_bytes_radix!(i32); +make_from_bytes_radix!(i64); +make_from_bytes_radix!(i128); +make_from_bytes_radix!(isize); // NOTE: once MSRV exceeds 1.64 it should be possible to implement everything for the NonZero types too. @@ -255,110 +259,110 @@ mod tests { fn test_parse_u8() { for radix in 2..=36 { let z = BStr::new(b"0"); - assert_eq!(u8::from_bstr_radix(z, radix), Ok(0)); + assert_eq!(u8::from_bytes_radix(&z, radix), Ok(0)); let a = BStr::new(b"11"); - let eleven = u8::from_bstr_radix(a, radix); + let eleven = u8::from_bytes_radix(&a, radix); assert_eq!(eleven, Ok(radix + 1)); let b = BStr::new(b"111111111"); - let pos_overflow = u8::from_bstr_radix(b, radix); + let pos_overflow = u8::from_bytes_radix(&b, radix); assert_eq!( pos_overflow.unwrap_err().kind(), &IntErrorKind::PosOverflow ); let c = BStr::new(b"-11"); - let negatory = u8::from_bstr_radix(c, radix); + let negatory = u8::from_bytes_radix(&c, radix); assert_eq!( negatory.unwrap_err().kind(), &IntErrorKind::InvalidDigit ); let d = BStr::new(b"--11"); - let two_wrongs = u8::from_bstr_radix(d, radix); + let two_wrongs = u8::from_bytes_radix(&d, radix); assert_eq!( two_wrongs.unwrap_err().kind(), &IntErrorKind::InvalidDigit ); let e = BStr::new(b""); - let empty = u8::from_bstr_radix(e, radix); + let empty = u8::from_bytes_radix(&e, radix); assert_eq!(empty.unwrap_err().kind(), &IntErrorKind::Empty); let f = BStr::new(b"+11"); - assert_eq!(u8::from_bstr_radix(f, radix), Ok(radix + 1)); + assert_eq!(u8::from_bytes_radix(&f, radix), Ok(radix + 1)); let g = BStr::new(b"++11"); assert_eq!( - u8::from_bstr_radix(g, radix).unwrap_err().kind(), + u8::from_bytes_radix(&g, radix).unwrap_err().kind(), &IntErrorKind::InvalidDigit ); let i = BStr::new("+"); assert_eq!( - u8::from_bstr_radix(i, radix).unwrap_err().kind(), + u8::from_bytes_radix(&i, radix).unwrap_err().kind(), &IntErrorKind::InvalidDigit ); } // this only stringifies in base 10 let min = BString::from(u8::MIN.to_string()); - assert_eq!(u8::from_bstr_radix(min.as_bstr(), 10), Ok(u8::MIN)); + assert_eq!(u8::from_bytes_radix(&min.as_bstr(), 10), Ok(u8::MIN)); let max = BString::from(u8::MAX.to_string()); - assert_eq!(u8::from_bstr_radix(max.as_bstr(), 10), Ok(u8::MAX)); + assert_eq!(u8::from_bytes_radix(&max.as_bstr(), 10), Ok(u8::MAX)); } #[test] fn test_parse_i8() { for radix in 2..=36 { let z = BStr::new(b"0"); - assert_eq!(i8::from_bstr_radix(z, radix), Ok(0)); + assert_eq!(i8::from_bytes_radix(&z, radix), Ok(0)); let a = BStr::new(b"11"); - let eleven = i8::from_bstr_radix(a, radix); + let eleven = i8::from_bytes_radix(&a, radix); assert_eq!(eleven, Ok(radix + 1)); let b = BStr::new(b"111111111"); - let pos_overflow = i8::from_bstr_radix(b, radix); + let pos_overflow = i8::from_bytes_radix(&b, radix); assert_eq!( pos_overflow.unwrap_err().kind(), &IntErrorKind::PosOverflow ); let c = BStr::new(b"-11"); - let totally_fine = i8::from_bstr_radix(c, radix); + let totally_fine = i8::from_bytes_radix(&c, radix); assert_eq!(totally_fine, Ok(-(radix + 1))); let d = BStr::new(b"--11"); - let two_wrongs = i8::from_bstr_radix(d, radix); + let two_wrongs = i8::from_bytes_radix(&d, radix); assert_eq!( two_wrongs.unwrap_err().kind(), &IntErrorKind::InvalidDigit ); let e = BStr::new(b""); - let empty = i8::from_bstr_radix(e, radix); + let empty = i8::from_bytes_radix(&e, radix); assert_eq!(empty.unwrap_err().kind(), &IntErrorKind::Empty); let f = BStr::new(b"+11"); - assert_eq!(i8::from_bstr_radix(f, radix), Ok(radix + 1)); + assert_eq!(i8::from_bytes_radix(&f, radix), Ok(radix + 1)); let g = BStr::new(b"++11"); assert_eq!( - i8::from_bstr_radix(g, radix).unwrap_err().kind(), + i8::from_bytes_radix(&g, radix).unwrap_err().kind(), &IntErrorKind::InvalidDigit ); let h = BStr::new(b"-111111111"); assert_eq!( - i8::from_bstr_radix(h, radix).unwrap_err().kind(), + i8::from_bytes_radix(&h, radix).unwrap_err().kind(), &IntErrorKind::NegOverflow ); } // this only stringifies in base 10 let min = BString::from(i8::MIN.to_string()); - assert_eq!(i8::from_bstr_radix(min.as_bstr(), 10).unwrap(), i8::MIN); + assert_eq!(i8::from_bytes_radix(&min.as_bstr(), 10).unwrap(), i8::MIN); let max = BString::from(i8::MAX.to_string()); - assert_eq!(i8::from_bstr_radix(max.as_bstr(), 10).unwrap(), i8::MAX); + assert_eq!(i8::from_bytes_radix(&max.as_bstr(), 10).unwrap(), i8::MAX); } } diff --git a/src/lib.rs b/src/lib.rs index 0f2b251..dec0204 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -398,7 +398,9 @@ pub use crate::utf8::{ Chars, Utf8Chunk, Utf8Chunks, Utf8Error, }; -pub use crate::from_bstr_radix::{FromBStrRadix, IntErrorKind, ParseIntError}; +pub use crate::from_bytes_radix::{ + FromBytesRadix, IntErrorKind, ParseIntError, +}; mod ascii; mod bstr; @@ -408,7 +410,7 @@ mod byteset; mod ext_slice; #[cfg(feature = "alloc")] mod ext_vec; -mod from_bstr_radix; +mod from_bytes_radix; mod impls; #[cfg(feature = "std")] pub mod io; From 8457446be0cfd686511bd4fa0a8dcb7a51e31529 Mon Sep 17 00:00:00 2001 From: Alex Jago <4475543+alexjago@users.noreply.github.com> Date: Wed, 20 Jul 2022 13:07:27 +1000 Subject: [PATCH 4/8] Make `radix` be a `u32`, fix docs. --- src/from_bytes_radix.rs | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/src/from_bytes_radix.rs b/src/from_bytes_radix.rs index 8281153..1f827f7 100644 --- a/src/from_bytes_radix.rs +++ b/src/from_bytes_radix.rs @@ -1,20 +1,21 @@ /** A trait which provides `from_bytes_radix()` for integer types. This acts like `from_str_radix`, including panicking if `radix` is not in [2, 32]. -However, there are a few minor differences to `from_str_radix`: -`src` is a `&BStr` and `radix` is the output type rather than always `u32`. -The result type is slightly different too. +However, there are a few minor differences to `from_str_radix` in the input and result types. ``` use bstr::{BStr, FromBytesRadix, IntErrorKind}; for radix in 2..=36 { + + let r = radix as u8; + let e = BStr::new(b""); let empty = u8::from_bytes_radix(&e, 10); assert_eq!(empty.unwrap_err().kind(), &IntErrorKind::Empty); let a = BStr::new(b"11"); let eleven = u8::from_bytes_radix(&a, radix); - assert_eq!(eleven, Ok(radix + 1)); + assert_eq!(eleven, Ok(r + 1)); let b = BStr::new("111111111"); let pos_overflow = u8::from_bytes_radix(&b, radix); @@ -24,9 +25,11 @@ for radix in 2..=36 { let negatory = u8::from_bytes_radix(&c, radix); assert_eq!(negatory.unwrap_err().kind(), &IntErrorKind::InvalidDigit); - let radix = radix as i32; + + let r = radix as i32; + let totally_fine = i32::from_bytes_radix(&c, radix); - assert_eq!(totally_fine, Ok(-(radix*radix + radix + 1))); + assert_eq!(totally_fine, Ok(-(r*r + r + 1))); } ``` @@ -61,7 +64,7 @@ where fn from_bytes_radix( src: &dyn AsRef<[u8]>, - radix: Self::Integer, + radix: u32, ) -> Result; } @@ -140,7 +143,7 @@ macro_rules! make_from_bytes_radix { fn from_bytes_radix( src: &dyn AsRef<[u8]>, - radix: $t, + radix: u32, ) -> Result<$t, crate::ParseIntError> { // This more-or-less follows the stdlib implementation. @@ -175,7 +178,7 @@ macro_rules! make_from_bytes_radix { for i in start..src.len() { let k = src[i]; - let mul = acc.checked_mul(radix); + let mul = acc.checked_mul(radix as $t); let s : u8 = if k >= 48 && k < (48 + num_max) { // 48: `0` in ASCII @@ -258,12 +261,14 @@ mod tests { #[test] fn test_parse_u8() { for radix in 2..=36 { + let r = radix as u8; + let z = BStr::new(b"0"); assert_eq!(u8::from_bytes_radix(&z, radix), Ok(0)); let a = BStr::new(b"11"); let eleven = u8::from_bytes_radix(&a, radix); - assert_eq!(eleven, Ok(radix + 1)); + assert_eq!(eleven, Ok(r + 1)); let b = BStr::new(b"111111111"); let pos_overflow = u8::from_bytes_radix(&b, radix); @@ -291,7 +296,7 @@ mod tests { assert_eq!(empty.unwrap_err().kind(), &IntErrorKind::Empty); let f = BStr::new(b"+11"); - assert_eq!(u8::from_bytes_radix(&f, radix), Ok(radix + 1)); + assert_eq!(u8::from_bytes_radix(&f, radix), Ok(r + 1)); let g = BStr::new(b"++11"); assert_eq!( @@ -315,12 +320,14 @@ mod tests { #[test] fn test_parse_i8() { for radix in 2..=36 { + let r = radix as i8; + let z = BStr::new(b"0"); assert_eq!(i8::from_bytes_radix(&z, radix), Ok(0)); let a = BStr::new(b"11"); let eleven = i8::from_bytes_radix(&a, radix); - assert_eq!(eleven, Ok(radix + 1)); + assert_eq!(eleven, Ok(r + 1)); let b = BStr::new(b"111111111"); let pos_overflow = i8::from_bytes_radix(&b, radix); @@ -331,7 +338,7 @@ mod tests { let c = BStr::new(b"-11"); let totally_fine = i8::from_bytes_radix(&c, radix); - assert_eq!(totally_fine, Ok(-(radix + 1))); + assert_eq!(totally_fine, Ok(-(r + 1))); let d = BStr::new(b"--11"); let two_wrongs = i8::from_bytes_radix(&d, radix); @@ -345,7 +352,7 @@ mod tests { assert_eq!(empty.unwrap_err().kind(), &IntErrorKind::Empty); let f = BStr::new(b"+11"); - assert_eq!(i8::from_bytes_radix(&f, radix), Ok(radix + 1)); + assert_eq!(i8::from_bytes_radix(&f, radix), Ok(r + 1)); let g = BStr::new(b"++11"); assert_eq!( From e3348ed783070fd37f5d99cc013c02bf42820272 Mon Sep 17 00:00:00 2001 From: Alex Jago <4475543+alexjago@users.noreply.github.com> Date: Thu, 21 Jul 2022 11:39:14 +1000 Subject: [PATCH 5/8] Fix the assert message, mention at top level --- src/from_bytes_radix.rs | 3 ++- src/lib.rs | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/from_bytes_radix.rs b/src/from_bytes_radix.rs index 1f827f7..43d0568 100644 --- a/src/from_bytes_radix.rs +++ b/src/from_bytes_radix.rs @@ -1,6 +1,7 @@ /** A trait which provides `from_bytes_radix()` for integer types. This acts like `from_str_radix`, including panicking if `radix` is not in [2, 32]. +`0-9`, `A-Z` and `a-z` are supported as possible digits (case-insensitive.) However, there are a few minor differences to `from_str_radix` in the input and result types. ``` use bstr::{BStr, FromBytesRadix, IntErrorKind}; @@ -148,7 +149,7 @@ macro_rules! make_from_bytes_radix { { // This more-or-less follows the stdlib implementation. - assert!((2..=36).contains(&radix), "from_str_radix_int: must lie in the range `[2, 36]` - found {}", radix); + assert!((2..=36).contains(&radix), "from_bytes_radix_int: must lie in the range `[2, 36]` - found {}", radix); let src = src.as_ref(); diff --git a/src/lib.rs b/src/lib.rs index dec0204..7d764d3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -26,6 +26,8 @@ with convenient `std::fmt::Debug` implementations: Additionally, the free function [`B`](fn.B.html) serves as a convenient short hand for writing byte string literals. +And the trait [`FromBytesRadix`](trait.FromBytesRadix.html) provides integer +parsing with `from_bytes_radix`, like the standard library's `from_str_radix`. # Quick examples From e4e2b4c45ad3666599aad59b07c5231ffe3fc990 Mon Sep 17 00:00:00 2001 From: Alex Jago <4475543+alexjago@users.noreply.github.com> Date: Thu, 21 Jul 2022 15:34:20 +1000 Subject: [PATCH 6/8] Remove extraneous ; improve docs and tests. --- src/from_bytes_radix.rs | 195 +++++++++++++++++++++++++--------------- 1 file changed, 122 insertions(+), 73 deletions(-) diff --git a/src/from_bytes_radix.rs b/src/from_bytes_radix.rs index 43d0568..d76a7ce 100644 --- a/src/from_bytes_radix.rs +++ b/src/from_bytes_radix.rs @@ -1,28 +1,28 @@ /** A trait which provides `from_bytes_radix()` for integer types. -This acts like `from_str_radix`, including panicking if `radix` is not in [2, 32]. -`0-9`, `A-Z` and `a-z` are supported as possible digits (case-insensitive.) -However, there are a few minor differences to `from_str_radix` in the input and result types. +This acts like `from_str_radix`, including panicking if `radix` is not in +[2, 32] and supporting `[0-9A-Za-z]` as possible digits, depending on the +value of `radix`. ``` -use bstr::{BStr, FromBytesRadix, IntErrorKind}; +use bstr::{FromBytesRadix, IntErrorKind, B}; for radix in 2..=36 { let r = radix as u8; - let e = BStr::new(b""); + let e = B(""); let empty = u8::from_bytes_radix(&e, 10); assert_eq!(empty.unwrap_err().kind(), &IntErrorKind::Empty); - let a = BStr::new(b"11"); + let a = B("11"); let eleven = u8::from_bytes_radix(&a, radix); assert_eq!(eleven, Ok(r + 1)); - let b = BStr::new("111111111"); + let b = B("111111111"); let pos_overflow = u8::from_bytes_radix(&b, radix); assert_eq!(pos_overflow.unwrap_err().kind(), &IntErrorKind::PosOverflow); - let c = BStr::new("-111"); + let c = B("-111"); let negatory = u8::from_bytes_radix(&c, radix); assert_eq!(negatory.unwrap_err().kind(), &IntErrorKind::InvalidDigit); @@ -38,35 +38,26 @@ The `NonZero` versions of integers are not currently supported for parsing. Instead, please parse the equivalent possibly-zero integer, then convert: ``` use core::num::NonZeroU8; -use bstr::{BStr, FromBytesRadix}; +use bstr::{FromBytesRadix, B}; -let a = BStr::new(b"11"); +let a = B("11"); let eleven = u8::from_bytes_radix(&a, 10).ok().and_then(NonZeroU8::new); assert_eq!(eleven, NonZeroU8::new(11)); -let zero = BStr::new(b"0"); +let zero = B("0"); let nada = u8::from_bytes_radix(&zero, 10).ok().and_then(NonZeroU8::new); assert_eq!(nada, None); ``` - - */ -pub trait FromBytesRadix -where - T: PartialOrd - + Copy - + core::ops::Add - + core::ops::Sub - + core::ops::Mul, -{ - /// Whatever integer type is being parsed. - type Integer; +pub trait FromBytesRadix { fn from_bytes_radix( src: &dyn AsRef<[u8]>, radix: u32, - ) -> Result; + ) -> Result + where + Self: Sized; } // ParseIntError and impl is almost entirely copy-pasted from the standard library @@ -139,18 +130,31 @@ pub enum IntErrorKind { macro_rules! make_from_bytes_radix { ($t:ident) => { - impl FromBytesRadix<$t> for $t { - type Integer = $t; - + impl FromBytesRadix for $t { fn from_bytes_radix( src: &dyn AsRef<[u8]>, radix: u32, ) -> Result<$t, crate::ParseIntError> { - // This more-or-less follows the stdlib implementation. - + //! Convert a `u8` slice in a given base (`radix`) + //! to an integer. + //! + //! This acts like [`from_str_radix`](https://doc.rust-lang.org/std/primitive.i8.html#method.from_str_radix): + //! + //! * Digits are a subset of `[0-9A-Za-z]`, depending on `radix` + //! * A `radix` outside [2, 32] will cause a __panic__ + //! * A single `+` may optionally precede the digits + //! * For signed types a single `-` may optionally precede the digits + //! * Any other characters (including whitespace and `_`) are invalid + //! + + // Provide a nice formatted error message if we can. + #[cfg(feature = "alloc")] assert!((2..=36).contains(&radix), "from_bytes_radix_int: must lie in the range `[2, 36]` - found {}", radix); + #[cfg(not(feature = "alloc"))] + assert!((2..=36).contains(&radix)); + let src = src.as_ref(); if let Some(s0) = src.get(0) { @@ -246,68 +250,71 @@ mod tests { * Leading negative (should be OK for signed types, `InvalidDigit` for unsigned) * Empty string (should be `Empty`) * Leading double negative (should always be `InvalidDigit`) - * Leading positive (should always parse OK) + * Leading positive (should always parse as `Ok(radix + 1)`) * Leading double positive (should always be `InvalidDigit`) - * Signed empty string (should be `InvalidDigit`) + * Standalone `+` or `-` (should be `InvalidDigit`) * * MIN and MAX round-trip (done in base 10 only, because `to_string` assumes that) + * MIN-1 (if signed) and MAX+1 for negative and positive overflows (ditto base 10) + * + * Over-large radix (matching stdlib panic) + * + * Error on whitespace or underscore (matching stdlib) * */ use super::*; - use crate::BStr; - use crate::BString; - use crate::IntErrorKind; + use crate::{BString, IntErrorKind, B}; #[test] - fn test_parse_u8() { + fn parse_u8() { for radix in 2..=36 { let r = radix as u8; - let z = BStr::new(b"0"); - assert_eq!(u8::from_bytes_radix(&z, radix), Ok(0)); + assert_eq!(u8::from_bytes_radix(&B("0"), radix), Ok(0)); - let a = BStr::new(b"11"); - let eleven = u8::from_bytes_radix(&a, radix); + let eleven = u8::from_bytes_radix(&B("11"), radix); assert_eq!(eleven, Ok(r + 1)); - let b = BStr::new(b"111111111"); - let pos_overflow = u8::from_bytes_radix(&b, radix); + // Nine 1s in a row is larger than a u8 even in base 2 + let pos_overflow = u8::from_bytes_radix(&B("111111111"), radix); assert_eq!( pos_overflow.unwrap_err().kind(), &IntErrorKind::PosOverflow ); - let c = BStr::new(b"-11"); - let negatory = u8::from_bytes_radix(&c, radix); + let negatory = u8::from_bytes_radix(&B("-11"), radix); assert_eq!( negatory.unwrap_err().kind(), &IntErrorKind::InvalidDigit ); - let d = BStr::new(b"--11"); - let two_wrongs = u8::from_bytes_radix(&d, radix); + let two_wrongs = u8::from_bytes_radix(&B("--11"), radix); assert_eq!( two_wrongs.unwrap_err().kind(), &IntErrorKind::InvalidDigit ); - let e = BStr::new(b""); - let empty = u8::from_bytes_radix(&e, radix); + let empty = u8::from_bytes_radix(&B(""), radix); assert_eq!(empty.unwrap_err().kind(), &IntErrorKind::Empty); - let f = BStr::new(b"+11"); - assert_eq!(u8::from_bytes_radix(&f, radix), Ok(r + 1)); + let leading_plus = u8::from_bytes_radix(&B("+11"), radix); + assert_eq!(leading_plus, Ok(r + 1)); - let g = BStr::new(b"++11"); + let ungood = u8::from_bytes_radix(&B("++11"), radix); assert_eq!( - u8::from_bytes_radix(&g, radix).unwrap_err().kind(), + ungood.unwrap_err().kind(), &IntErrorKind::InvalidDigit ); - let i = BStr::new("+"); + let plus_only = u8::from_bytes_radix(&B("+"), radix); assert_eq!( - u8::from_bytes_radix(&i, radix).unwrap_err().kind(), + plus_only.unwrap_err().kind(), + &IntErrorKind::InvalidDigit + ); + + assert_eq!( + u8::from_bytes_radix(&B("-"), radix).unwrap_err().kind(), &IntErrorKind::InvalidDigit ); } @@ -316,54 +323,66 @@ mod tests { assert_eq!(u8::from_bytes_radix(&min.as_bstr(), 10), Ok(u8::MIN)); let max = BString::from(u8::MAX.to_string()); assert_eq!(u8::from_bytes_radix(&max.as_bstr(), 10), Ok(u8::MAX)); + + let maxmax = BString::from((u8::MAX as i16 + 1_i16).to_string()); + assert_eq!( + u8::from_bytes_radix(&maxmax.as_bstr(), 10).unwrap_err().kind(), + &IntErrorKind::PosOverflow + ); } #[test] - fn test_parse_i8() { + fn parse_i8() { for radix in 2..=36 { let r = radix as i8; - let z = BStr::new(b"0"); - assert_eq!(i8::from_bytes_radix(&z, radix), Ok(0)); + assert_eq!(i8::from_bytes_radix(&B("0"), radix), Ok(0)); - let a = BStr::new(b"11"); - let eleven = i8::from_bytes_radix(&a, radix); + let eleven = i8::from_bytes_radix(&B("11"), radix); assert_eq!(eleven, Ok(r + 1)); - let b = BStr::new(b"111111111"); - let pos_overflow = i8::from_bytes_radix(&b, radix); + // Eight ones in a row is sufficient to overflow an i8 + let pos_overflow = i8::from_bytes_radix(&B("11111111"), radix); assert_eq!( pos_overflow.unwrap_err().kind(), &IntErrorKind::PosOverflow ); - let c = BStr::new(b"-11"); - let totally_fine = i8::from_bytes_radix(&c, radix); - assert_eq!(totally_fine, Ok(-(r + 1))); + let leading_minus = i8::from_bytes_radix(&B("-11"), radix); + assert_eq!(leading_minus, Ok(-(r + 1))); - let d = BStr::new(b"--11"); - let two_wrongs = i8::from_bytes_radix(&d, radix); + let two_wrongs = i8::from_bytes_radix(&B("--11"), radix); assert_eq!( two_wrongs.unwrap_err().kind(), &IntErrorKind::InvalidDigit ); - let e = BStr::new(b""); - let empty = i8::from_bytes_radix(&e, radix); + let empty = i8::from_bytes_radix(&B(""), radix); assert_eq!(empty.unwrap_err().kind(), &IntErrorKind::Empty); - let f = BStr::new(b"+11"); - assert_eq!(i8::from_bytes_radix(&f, radix), Ok(r + 1)); + let leading_plus = i8::from_bytes_radix(&B("+11"), radix); + assert_eq!(leading_plus, Ok(r + 1)); - let g = BStr::new(b"++11"); + let ungood = i8::from_bytes_radix(&B("++11"), radix); assert_eq!( - i8::from_bytes_radix(&g, radix).unwrap_err().kind(), + ungood.unwrap_err().kind(), &IntErrorKind::InvalidDigit ); - let h = BStr::new(b"-111111111"); + let plus_only = i8::from_bytes_radix(&B("+"), radix); assert_eq!( - i8::from_bytes_radix(&h, radix).unwrap_err().kind(), + plus_only.unwrap_err().kind(), + &IntErrorKind::InvalidDigit + ); + let minus_only = i8::from_bytes_radix(&B("-"), radix); + assert_eq!( + minus_only.unwrap_err().kind(), + &IntErrorKind::InvalidDigit + ); + + let neg_overflow = i8::from_bytes_radix(&B("-11111111"), radix); + assert_eq!( + neg_overflow.unwrap_err().kind(), &IntErrorKind::NegOverflow ); } @@ -372,5 +391,35 @@ mod tests { assert_eq!(i8::from_bytes_radix(&min.as_bstr(), 10).unwrap(), i8::MIN); let max = BString::from(i8::MAX.to_string()); assert_eq!(i8::from_bytes_radix(&max.as_bstr(), 10).unwrap(), i8::MAX); + + let minmin = BString::from((i8::MIN as i16 - 1_i16).to_string()); + assert_eq!( + i8::from_bytes_radix(&minmin.as_bstr(), 10).unwrap_err().kind(), + &IntErrorKind::NegOverflow + ); + + let maxmax = BString::from((i8::MAX as i16 + 1_i16).to_string()); + assert_eq!( + i8::from_bytes_radix(&maxmax.as_bstr(), 10).unwrap_err().kind(), + &IntErrorKind::PosOverflow + ); + } + + /// Test a radix that's greater than allowed + #[test] + #[should_panic] + fn radix_too_large() { + let b = B("11"); + let _ = u8::from_bytes_radix(&b, 1000); + } + + /// Ensure we behave like the stdlib here + #[test] + fn underscore_whitespace() { + let _ = i32::from_str_radix("1_000_000", 10).unwrap_err(); + let _ = i32::from_bytes_radix(&B("1_000_000"), 10).unwrap_err(); + + let _ = i32::from_str_radix("1 000 000", 10).unwrap_err(); + let _ = i32::from_bytes_radix(&B("1 000 000"), 10).unwrap_err(); } } From c887025584dcd0b6e6df39bc853d6ca0d99606b1 Mon Sep 17 00:00:00 2001 From: Alex Jago <4475543+alexjago@users.noreply.github.com> Date: Fri, 22 Jul 2022 18:42:31 +1000 Subject: [PATCH 7/8] Update src/from_bytes_radix.rs Co-authored-by: Ryan Lopopolo --- src/from_bytes_radix.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/from_bytes_radix.rs b/src/from_bytes_radix.rs index d76a7ce..93fdc19 100644 --- a/src/from_bytes_radix.rs +++ b/src/from_bytes_radix.rs @@ -51,7 +51,11 @@ assert_eq!(nada, None); */ -pub trait FromBytesRadix { +mod private { + pub trait Sealed {} +} + +pub trait FromBytesRadix: Sized + private::Sealed { fn from_bytes_radix( src: &dyn AsRef<[u8]>, radix: u32, From e239bc5a397ba64e929558c4d11d6b24a779d561 Mon Sep 17 00:00:00 2001 From: Alex Jago <4475543+alexjago@users.noreply.github.com> Date: Fri, 22 Jul 2022 18:42:38 +1000 Subject: [PATCH 8/8] Update src/from_bytes_radix.rs Co-authored-by: Ryan Lopopolo --- src/from_bytes_radix.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/from_bytes_radix.rs b/src/from_bytes_radix.rs index 93fdc19..a49da00 100644 --- a/src/from_bytes_radix.rs +++ b/src/from_bytes_radix.rs @@ -134,6 +134,8 @@ pub enum IntErrorKind { macro_rules! make_from_bytes_radix { ($t:ident) => { + impl private::Sealed for $t {} + impl FromBytesRadix for $t { fn from_bytes_radix( src: &dyn AsRef<[u8]>,