From 50b14bcfb6e4a43b60aa0cc5bd19a3004895c63d Mon Sep 17 00:00:00 2001 From: Laura Demkowicz-Duffy Date: Tue, 3 Dec 2024 15:26:12 +0000 Subject: [PATCH] feat: multi::many_till_count combinator --- doc/choosing_a_combinator.md | 1 + src/multi/mod.rs | 97 ++++++++++++++++++++++++++++++++++++ src/multi/tests.rs | 27 +++++++++- tests/overflow.rs | 16 ++++++ 4 files changed, 140 insertions(+), 1 deletion(-) diff --git a/doc/choosing_a_combinator.md b/doc/choosing_a_combinator.md index 07744efaf..aa0277fa3 100644 --- a/doc/choosing_a_combinator.md +++ b/doc/choosing_a_combinator.md @@ -49,6 +49,7 @@ Those are used to recognize the lowest level elements of your grammar, like, "he | [many0_count](https://docs.rs/nom/latest/nom/multi/fn.many0_count.html) | `many0_count(tag("ab"))` | `"abababc"` | `Ok(("c", 3))` |Applies the parser 0 or more times and returns how often it was applicable. `many1_count` does the same operation but the parser must apply at least once| | [many_m_n](https://docs.rs/nom/latest/nom/multi/fn.many_m_n.html) | `many_m_n(1, 3, tag("ab"))` | `"ababc"` | `Ok(("c", vec!["ab", "ab"]))` |Applies the parser between m and n times (n included) and returns the list of results in a Vec| | [many_till](https://docs.rs/nom/latest/nom/multi/fn.many_till.html) | `many_till(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `Ok(("g", (vec!["ab", "ab"], "ef")))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second| +| [many_till_count](https://docs.rs/nom/latest/nom/multi/fn.many_till_count.html) | `many_till_count(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `Ok(("g", (2, "ef")))` |Applies the first parser until the second applies. Returns a tuple containing the number of times the first succeeded and the result of the second| | [separated_list0](https://docs.rs/nom/latest/nom/multi/fn.separated_list0.html) | `separated_list0(tag(","), tag("ab"))` | `"ab,ab,ab."` | `Ok((".", vec!["ab", "ab", "ab"]))` |`separated_list1` works like `separated_list0` but must returns at least one element| | [fold_many0](https://docs.rs/nom/latest/nom/multi/fn.fold_many0.html) | `fold_many0(be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([], 6))` |Applies the parser 0 or more times and folds the list of return values. The `fold_many1` version must apply the child parser at least one time| | [fold_many_m_n](https://docs.rs/nom/latest/nom/multi/fn.fold_many_m_n.html) | `fold_many_m_n(1, 2, be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([3], 3))` |Applies the parser between m and n times (n included) and folds the list of return value| diff --git a/src/multi/mod.rs b/src/multi/mod.rs index a1436d6d9..c66e6fa89 100644 --- a/src/multi/mod.rs +++ b/src/multi/mod.rs @@ -849,6 +849,103 @@ where } } +/// Applies the parser `f` until the parser `g` produces a result. +/// +/// Returns a tuple of the number of times `f` succeeded and the result of `g`. +/// +/// `f` keeps going so long as `g` produces [`Err::Error`]. To instead chain an error up, see [`cut`][crate::combinator::cut]. +/// +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult, Parser}; +/// use nom::multi::many_till_count; +/// use nom::bytes::complete::tag; +/// +/// fn parser(s: &str) -> IResult<&str, (usize, &str)> { +/// many_till_count(tag("abc"), tag("end")).parse(s) +/// }; +/// +/// assert_eq!(parser("abcabcend"), Ok(("", (2, "end")))); +/// assert_eq!(parser("abc123end"), Err(Err::Error(Error::new("123end", ErrorKind::Tag)))); +/// assert_eq!(parser("123123end"), Err(Err::Error(Error::new("123123end", ErrorKind::Tag)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag)))); +/// assert_eq!(parser("abcendefg"), Ok(("efg", (1, "end")))); +/// ``` +pub fn many_till_count( + f: F, + g: G, +) -> impl Parser>::Output), Error = E> +where + I: Clone + Input, + F: Parser, + G: Parser, + E: ParseError, +{ + ManyTillCount { + f, + g, + e: PhantomData, + } +} + +/// Parser implementation for the [many_till_count] combinator +pub struct ManyTillCount { + f: F, + g: G, + e: PhantomData, +} + +impl Parser for ManyTillCount +where + I: Clone + Input, + F: Parser, + G: Parser, + E: ParseError, +{ + type Output = (usize, >::Output); + type Error = E; + + fn process( + &mut self, + mut i: I, + ) -> crate::PResult { + let mut count = OM::Output::bind(|| 0); + loop { + let len = i.input_len(); + match self + .g + .process::>(i.clone()) + { + Ok((i1, o)) => return Ok((i1, OM::Output::combine(count, o, |res, o| (res, o)))), + Err(Err::Failure(e)) => return Err(Err::Failure(e)), + Err(Err::Incomplete(i)) => return Err(Err::Incomplete(i)), + Err(Err::Error(_)) => { + match self.f.process::(i.clone()) { + Err(Err::Error(err)) => { + return Err(Err::Error(OM::Error::map(err, |err| { + E::append(i, ErrorKind::ManyTill, err) + }))) + } + Err(Err::Failure(e)) => return Err(Err::Failure(e)), + Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)), + Ok((i1, o)) => { + // infinite loop check: the parser must always consume + if i1.input_len() == len { + return Err(Err::Error(OM::Error::bind(|| { + E::from_error_kind(i, ErrorKind::Many0) + }))); + } + + i = i1; + + count = OM::Output::combine(count, o, |acc, _o| acc + 1) + } + } + } + } + } + } +} + /// Runs the embedded parser `count` times, gathering the results in a `Vec` /// /// # Arguments diff --git a/src/multi/tests.rs b/src/multi/tests.rs index 080240cd4..a22de6d85 100644 --- a/src/multi/tests.rs +++ b/src/multi/tests.rs @@ -14,7 +14,7 @@ use crate::{ lib::std::vec::Vec, multi::{ count, fold, fold_many0, fold_many1, fold_many_m_n, length_count, many, many0, many1, many_m_n, - many_till, separated_list0, separated_list1, + many_till, many_till_count, separated_list0, separated_list1, }, }; @@ -185,6 +185,31 @@ fn many_till_test() { ); } +#[test] +fn many_till_count_test() { + #[allow(clippy::type_complexity)] + fn multi(i: &[u8]) -> IResult<&[u8], (usize, &[u8])> { + many_till_count(tag("abcd"), tag("efgh")).parse(i) + } + + let a = b"abcdabcdefghabcd"; + let b = b"efghabcd"; + let c = b"azerty"; + + let res_a = (2, &b"efgh"[..]); + let res_b = (0, &b"efgh"[..]); + assert_eq!(multi(&a[..]), Ok((&b"abcd"[..], res_a))); + assert_eq!(multi(&b[..]), Ok((&b"abcd"[..], res_b))); + assert_eq!( + multi(&c[..]), + Err(Err::Error(error_node_position!( + &c[..], + ErrorKind::ManyTill, + error_position!(&c[..], ErrorKind::Tag) + ))) + ); +} + #[test] #[cfg(feature = "std")] fn infinite_many() { diff --git a/tests/overflow.rs b/tests/overflow.rs index 8016548ba..d06769434 100644 --- a/tests/overflow.rs +++ b/tests/overflow.rs @@ -82,6 +82,22 @@ fn overflow_incomplete_many_till() { ); } +#[test] +fn overflow_incomplete_many_till_count() { + use nom::{bytes::complete::tag, multi::many_till_count}; + + #[allow(clippy::type_complexity)] + fn multi(i: &[u8]) -> IResult<&[u8], (usize, &[u8])> { + many_till_count(length_data(be_u64), tag("abc")).parse(i) + } + + // Trigger an overflow in many_till + assert_eq!( + multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xef"[..]), + Err(Err::Incomplete(Needed::new(18446744073709551599))) + ); +} + #[test] #[cfg(feature = "alloc")] fn overflow_incomplete_many_m_n() {