Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: multi::many_till_count combinator #1790

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/choosing_a_combinator.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ Those are used to recognize the lowest level elements of your grammar, like, "he
| [many0_count](https://docs.rs/nom/latest/nom/multi/fn.many0_count.html) | `many0_count(tag("ab"))` | `"abababc"` | `Ok(("c", 3))` |Applies the parser 0 or more times and returns how often it was applicable. `many1_count` does the same operation but the parser must apply at least once|
| [many_m_n](https://docs.rs/nom/latest/nom/multi/fn.many_m_n.html) | `many_m_n(1, 3, tag("ab"))` | `"ababc"` | `Ok(("c", vec!["ab", "ab"]))` |Applies the parser between m and n times (n included) and returns the list of results in a Vec|
| [many_till](https://docs.rs/nom/latest/nom/multi/fn.many_till.html) | `many_till(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `Ok(("g", (vec!["ab", "ab"], "ef")))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second|
| [many_till_count](https://docs.rs/nom/latest/nom/multi/fn.many_till_count.html) | `many_till_count(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `Ok(("g", (2, "ef")))` |Applies the first parser until the second applies. Returns a tuple containing the number of times the first succeeded and the result of the second|
| [separated_list0](https://docs.rs/nom/latest/nom/multi/fn.separated_list0.html) | `separated_list0(tag(","), tag("ab"))` | `"ab,ab,ab."` | `Ok((".", vec!["ab", "ab", "ab"]))` |`separated_list1` works like `separated_list0` but must returns at least one element|
| [fold_many0](https://docs.rs/nom/latest/nom/multi/fn.fold_many0.html) | `fold_many0(be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([], 6))` |Applies the parser 0 or more times and folds the list of return values. The `fold_many1` version must apply the child parser at least one time|
| [fold_many_m_n](https://docs.rs/nom/latest/nom/multi/fn.fold_many_m_n.html) | `fold_many_m_n(1, 2, be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([3], 3))` |Applies the parser between m and n times (n included) and folds the list of return value|
Expand Down
97 changes: 97 additions & 0 deletions src/multi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -849,6 +849,103 @@ where
}
}

/// Applies the parser `f` until the parser `g` produces a result.
///
/// Returns a tuple of the number of times `f` succeeded and the result of `g`.
///
/// `f` keeps going so long as `g` produces [`Err::Error`]. To instead chain an error up, see [`cut`][crate::combinator::cut].
///
/// ```rust
/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult, Parser};
/// use nom::multi::many_till_count;
/// use nom::bytes::complete::tag;
///
/// fn parser(s: &str) -> IResult<&str, (usize, &str)> {
/// many_till_count(tag("abc"), tag("end")).parse(s)
/// };
///
/// assert_eq!(parser("abcabcend"), Ok(("", (2, "end"))));
/// assert_eq!(parser("abc123end"), Err(Err::Error(Error::new("123end", ErrorKind::Tag))));
/// assert_eq!(parser("123123end"), Err(Err::Error(Error::new("123123end", ErrorKind::Tag))));
/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag))));
/// assert_eq!(parser("abcendefg"), Ok(("efg", (1, "end"))));
/// ```
pub fn many_till_count<I, E, F, G>(
f: F,
g: G,
) -> impl Parser<I, Output = (usize, <G as Parser<I>>::Output), Error = E>
where
I: Clone + Input,
F: Parser<I, Error = E>,
G: Parser<I, Error = E>,
E: ParseError<I>,
{
ManyTillCount {
f,
g,
e: PhantomData,
}
}

/// Parser implementation for the [many_till_count] combinator
pub struct ManyTillCount<F, G, E> {
f: F,
g: G,
e: PhantomData<E>,
}

impl<I, F, G, E> Parser<I> for ManyTillCount<F, G, E>
where
I: Clone + Input,
F: Parser<I, Error = E>,
G: Parser<I, Error = E>,
E: ParseError<I>,
{
type Output = (usize, <G as Parser<I>>::Output);
type Error = E;

fn process<OM: OutputMode>(
&mut self,
mut i: I,
) -> crate::PResult<OM, I, Self::Output, Self::Error> {
let mut count = OM::Output::bind(|| 0);
loop {
let len = i.input_len();
match self
.g
.process::<OutputM<OM::Output, Check, OM::Incomplete>>(i.clone())
{
Ok((i1, o)) => return Ok((i1, OM::Output::combine(count, o, |res, o| (res, o)))),
Err(Err::Failure(e)) => return Err(Err::Failure(e)),
Err(Err::Incomplete(i)) => return Err(Err::Incomplete(i)),
Err(Err::Error(_)) => {
match self.f.process::<OM>(i.clone()) {
Err(Err::Error(err)) => {
return Err(Err::Error(OM::Error::map(err, |err| {
E::append(i, ErrorKind::ManyTill, err)
})))
}
Err(Err::Failure(e)) => return Err(Err::Failure(e)),
Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)),
Ok((i1, o)) => {
// infinite loop check: the parser must always consume
if i1.input_len() == len {
return Err(Err::Error(OM::Error::bind(|| {
E::from_error_kind(i, ErrorKind::Many0)
})));
}

i = i1;

count = OM::Output::combine(count, o, |acc, _o| acc + 1)
}
}
}
}
}
}
}

/// Runs the embedded parser `count` times, gathering the results in a `Vec`
///
/// # Arguments
Expand Down
27 changes: 26 additions & 1 deletion src/multi/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::{
lib::std::vec::Vec,
multi::{
count, fold, fold_many0, fold_many1, fold_many_m_n, length_count, many, many0, many1, many_m_n,
many_till, separated_list0, separated_list1,
many_till, many_till_count, separated_list0, separated_list1,
},
};

Expand Down Expand Up @@ -185,6 +185,31 @@ fn many_till_test() {
);
}

#[test]
fn many_till_count_test() {
#[allow(clippy::type_complexity)]
fn multi(i: &[u8]) -> IResult<&[u8], (usize, &[u8])> {
many_till_count(tag("abcd"), tag("efgh")).parse(i)
}

let a = b"abcdabcdefghabcd";
let b = b"efghabcd";
let c = b"azerty";

let res_a = (2, &b"efgh"[..]);
let res_b = (0, &b"efgh"[..]);
assert_eq!(multi(&a[..]), Ok((&b"abcd"[..], res_a)));
assert_eq!(multi(&b[..]), Ok((&b"abcd"[..], res_b)));
assert_eq!(
multi(&c[..]),
Err(Err::Error(error_node_position!(
&c[..],
ErrorKind::ManyTill,
error_position!(&c[..], ErrorKind::Tag)
)))
);
}

#[test]
#[cfg(feature = "std")]
fn infinite_many() {
Expand Down
16 changes: 16 additions & 0 deletions tests/overflow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,22 @@ fn overflow_incomplete_many_till() {
);
}

#[test]
fn overflow_incomplete_many_till_count() {
use nom::{bytes::complete::tag, multi::many_till_count};

#[allow(clippy::type_complexity)]
fn multi(i: &[u8]) -> IResult<&[u8], (usize, &[u8])> {
many_till_count(length_data(be_u64), tag("abc")).parse(i)
}

// Trigger an overflow in many_till
assert_eq!(
multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xef"[..]),
Err(Err::Incomplete(Needed::new(18446744073709551599)))
);
}

#[test]
#[cfg(feature = "alloc")]
fn overflow_incomplete_many_m_n() {
Expand Down