From 50b14bcfb6e4a43b60aa0cc5bd19a3004895c63d Mon Sep 17 00:00:00 2001
From: Laura Demkowicz-Duffy <dev@demkowiczduffy.co.uk>
Date: Tue, 3 Dec 2024 15:26:12 +0000
Subject: [PATCH] feat: multi::many_till_count combinator

---
 doc/choosing_a_combinator.md |  1 +
 src/multi/mod.rs             | 97 ++++++++++++++++++++++++++++++++++++
 src/multi/tests.rs           | 27 +++++++++-
 tests/overflow.rs            | 16 ++++++
 4 files changed, 140 insertions(+), 1 deletion(-)
diff --git a/doc/choosing_a_combinator.md b/doc/choosing_a_combinator.md
index 07744efaf..aa0277fa3 100644
--- a/doc/choosing_a_combinator.md
+++ b/doc/choosing_a_combinator.md
@@ -49,6 +49,7 @@ Those are used to recognize the lowest level elements of your grammar, like, "he
 | [many0_count](https://docs.rs/nom/latest/nom/multi/fn.many0_count.html) | `many0_count(tag("ab"))` | `"abababc"` | `Ok(("c", 3))` |Applies the parser 0 or more times and returns how often it was applicable. `many1_count` does the same operation but the parser must apply at least once|
 | [many_m_n](https://docs.rs/nom/latest/nom/multi/fn.many_m_n.html) | `many_m_n(1, 3, tag("ab"))` | `"ababc"` | `Ok(("c", vec!["ab", "ab"]))` |Applies the parser between m and n times (n included) and returns the list of results in a Vec|
 | [many_till](https://docs.rs/nom/latest/nom/multi/fn.many_till.html) | `many_till(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `Ok(("g", (vec!["ab", "ab"], "ef")))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second|
+| [many_till_count](https://docs.rs/nom/latest/nom/multi/fn.many_till_count.html) | `many_till_count(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `Ok(("g", (2, "ef")))` |Applies the first parser until the second applies. Returns a tuple containing the number of times the first succeeded and the result of the second|
 | [separated_list0](https://docs.rs/nom/latest/nom/multi/fn.separated_list0.html) | `separated_list0(tag(","), tag("ab"))` | `"ab,ab,ab."` | `Ok((".", vec!["ab", "ab", "ab"]))` |`separated_list1` works like `separated_list0` but must returns at least one element|
 | [fold_many0](https://docs.rs/nom/latest/nom/multi/fn.fold_many0.html) | `fold_many0(be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([], 6))` |Applies the parser 0 or more times and folds the list of return values. The `fold_many1` version must apply the child parser at least one time|
 | [fold_many_m_n](https://docs.rs/nom/latest/nom/multi/fn.fold_many_m_n.html) | `fold_many_m_n(1, 2, be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([3], 3))` |Applies the parser between m and n times (n included) and folds the list of return value|
diff --git a/src/multi/mod.rs b/src/multi/mod.rs
index a1436d6d9..c66e6fa89 100644
--- a/src/multi/mod.rs
+++ b/src/multi/mod.rs
@@ -849,6 +849,103 @@ where
   }
 }
 
+/// Applies the parser `f` until the parser `g` produces a result.
+///
+/// Returns a tuple of the number of times `f` succeeded and the result of `g`.
+///
+/// `f` keeps going so long as `g` produces [`Err::Error`]. To instead chain an error up, see [`cut`][crate::combinator::cut].
+///
+/// ```rust
+/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult, Parser};
+/// use nom::multi::many_till_count;
+/// use nom::bytes::complete::tag;
+///
+/// fn parser(s: &str) -> IResult<&str, (usize, &str)> {
+///   many_till_count(tag("abc"), tag("end")).parse(s)
+/// };
+///
+/// assert_eq!(parser("abcabcend"), Ok(("", (2, "end"))));
+/// assert_eq!(parser("abc123end"), Err(Err::Error(Error::new("123end", ErrorKind::Tag))));
+/// assert_eq!(parser("123123end"), Err(Err::Error(Error::new("123123end", ErrorKind::Tag))));
+/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag))));
+/// assert_eq!(parser("abcendefg"), Ok(("efg", (1, "end"))));
+/// ```
+pub fn many_till_count<I, E, F, G>(
+  f: F,
+  g: G,
+) -> impl Parser<I, Output = (usize, <G as Parser<I>>::Output), Error = E>
+where
+  I: Clone + Input,
+  F: Parser<I, Error = E>,
+  G: Parser<I, Error = E>,
+  E: ParseError<I>,
+{
+  ManyTillCount {
+    f,
+    g,
+    e: PhantomData,
+  }
+}
+
+/// Parser implementation for the [many_till_count] combinator
+pub struct ManyTillCount<F, G, E> {
+  f: F,
+  g: G,
+  e: PhantomData<E>,
+}
+
+impl<I, F, G, E> Parser<I> for ManyTillCount<F, G, E>
+where
+  I: Clone + Input,
+  F: Parser<I, Error = E>,
+  G: Parser<I, Error = E>,
+  E: ParseError<I>,
+{
+  type Output = (usize, <G as Parser<I>>::Output);
+  type Error = E;
+
+  fn process<OM: OutputMode>(
+    &mut self,
+    mut i: I,
+  ) -> crate::PResult<OM, I, Self::Output, Self::Error> {
+    let mut count = OM::Output::bind(|| 0);
+    loop {
+      let len = i.input_len();
+      match self
+        .g
+        .process::<OutputM<OM::Output, Check, OM::Incomplete>>(i.clone())
+      {
+        Ok((i1, o)) => return Ok((i1, OM::Output::combine(count, o, |res, o| (res, o)))),
+        Err(Err::Failure(e)) => return Err(Err::Failure(e)),
+        Err(Err::Incomplete(i)) => return Err(Err::Incomplete(i)),
+        Err(Err::Error(_)) => {
+          match self.f.process::<OM>(i.clone()) {
+            Err(Err::Error(err)) => {
+              return Err(Err::Error(OM::Error::map(err, |err| {
+                E::append(i, ErrorKind::ManyTill, err)
+              })))
+            }
+            Err(Err::Failure(e)) => return Err(Err::Failure(e)),
+            Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)),
+            Ok((i1, o)) => {
+              // infinite loop check: the parser must always consume
+              if i1.input_len() == len {
+                return Err(Err::Error(OM::Error::bind(|| {
+                  E::from_error_kind(i, ErrorKind::Many0)
+                })));
+              }
+
+              i = i1;
+
+              count = OM::Output::combine(count, o, |acc, _o| acc + 1)
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
 /// Runs the embedded parser `count` times, gathering the results in a `Vec`
 ///
 /// # Arguments
diff --git a/src/multi/tests.rs b/src/multi/tests.rs
index 080240cd4..a22de6d85 100644
--- a/src/multi/tests.rs
+++ b/src/multi/tests.rs
@@ -14,7 +14,7 @@ use crate::{
   lib::std::vec::Vec,
   multi::{
     count, fold, fold_many0, fold_many1, fold_many_m_n, length_count, many, many0, many1, many_m_n,
-    many_till, separated_list0, separated_list1,
+    many_till, many_till_count, separated_list0, separated_list1,
   },
 };
 
@@ -185,6 +185,31 @@ fn many_till_test() {
   );
 }
 
+#[test]
+fn many_till_count_test() {
+  #[allow(clippy::type_complexity)]
+  fn multi(i: &[u8]) -> IResult<&[u8], (usize, &[u8])> {
+    many_till_count(tag("abcd"), tag("efgh")).parse(i)
+  }
+
+  let a = b"abcdabcdefghabcd";
+  let b = b"efghabcd";
+  let c = b"azerty";
+
+  let res_a = (2, &b"efgh"[..]);
+  let res_b = (0, &b"efgh"[..]);
+  assert_eq!(multi(&a[..]), Ok((&b"abcd"[..], res_a)));
+  assert_eq!(multi(&b[..]), Ok((&b"abcd"[..], res_b)));
+  assert_eq!(
+    multi(&c[..]),
+    Err(Err::Error(error_node_position!(
+      &c[..],
+      ErrorKind::ManyTill,
+      error_position!(&c[..], ErrorKind::Tag)
+    )))
+  );
+}
+
 #[test]
 #[cfg(feature = "std")]
 fn infinite_many() {
diff --git a/tests/overflow.rs b/tests/overflow.rs
index 8016548ba..d06769434 100644
--- a/tests/overflow.rs
+++ b/tests/overflow.rs
@@ -82,6 +82,22 @@ fn overflow_incomplete_many_till() {
   );
 }
 
+#[test]
+fn overflow_incomplete_many_till_count() {
+  use nom::{bytes::complete::tag, multi::many_till_count};
+
+  #[allow(clippy::type_complexity)]
+  fn multi(i: &[u8]) -> IResult<&[u8], (usize, &[u8])> {
+    many_till_count(length_data(be_u64), tag("abc")).parse(i)
+  }
+
+  // Trigger an overflow in many_till
+  assert_eq!(
+    multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xef"[..]),
+    Err(Err::Incomplete(Needed::new(18446744073709551599)))
+  );
+}
+
 #[test]
 #[cfg(feature = "alloc")]
 fn overflow_incomplete_many_m_n() {