Skip to content

Commit

Permalink
Handle keywords more elegantly
Browse files Browse the repository at this point in the history
  • Loading branch information
zesterer committed Jan 1, 2025
1 parent 59d9f68 commit 6e27ebe
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 21 deletions.
20 changes: 16 additions & 4 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,8 @@ pub enum RichPattern<'a, T> {
Token(MaybeRef<'a, T>),
/// A labelled pattern.
Label(Cow<'a, str>),
/// A specific keyword.
Identifier(String),
/// Anything other than the end of input.
Any,
/// Something other than the provided input.
Expand All @@ -256,8 +258,8 @@ impl<'a, T> From<DefaultExpected<'a, T>> for RichPattern<'a, T> {
}
}

impl<'a, T> From<text::TextExpected> for RichPattern<'a, T> {
fn from(expected: text::TextExpected) -> Self {
impl<'a, C: text::Char, T> From<text::TextExpected<C>> for RichPattern<'a, T> {
fn from(expected: text::TextExpected<C>) -> Self {
match expected {
text::TextExpected::Whitespace => Self::Label(Cow::Borrowed("whitespace")),
text::TextExpected::InlineWhitespace => Self::Label(Cow::Borrowed("inline whitespace")),
Expand All @@ -266,7 +268,10 @@ impl<'a, T> From<text::TextExpected> for RichPattern<'a, T> {
Self::Label(Cow::Borrowed("non-zero digit"))
}
text::TextExpected::Digit(_) => Self::Label(Cow::Borrowed("digit")),
text::TextExpected::Identifier => Self::Label(Cow::Borrowed("identifier")),
text::TextExpected::IdentifierPart => Self::Label(Cow::Borrowed("identifier")),
text::TextExpected::Identifier(kw) => {
Self::Identifier(C::str_to_chars(kw.as_ref()).map(|c| c.to_char()).collect())
}
}
}
}
Expand Down Expand Up @@ -307,6 +312,7 @@ impl<'a, T> RichPattern<'a, T> {
match self {
Self::Token(t) => RichPattern::Token(f(t.into_inner()).into()),
Self::Label(l) => RichPattern::Label(l),
Self::Identifier(i) => RichPattern::Identifier(i),
Self::Any => RichPattern::Any,
Self::SomethingElse => RichPattern::SomethingElse,
Self::EndOfInput => RichPattern::EndOfInput,
Expand All @@ -321,6 +327,7 @@ impl<'a, T> RichPattern<'a, T> {
match self {
Self::Token(tok) => RichPattern::Token(tok.into_owned()),
Self::Label(l) => RichPattern::Label(Cow::Owned(l.into_owned())),
Self::Identifier(i) => RichPattern::Identifier(i),
Self::Any => RichPattern::Any,
Self::SomethingElse => RichPattern::SomethingElse,
Self::EndOfInput => RichPattern::EndOfInput,
Expand All @@ -339,6 +346,7 @@ impl<'a, T> RichPattern<'a, T> {
write!(f, "'")
}
Self::Label(l) => write!(f, "{l}"),
Self::Identifier(i) => write!(f, "'{i}'"),
Self::Any => write!(f, "any"),
Self::SomethingElse => write!(f, "something else"),
Self::EndOfInput => write!(f, "end of input"),
Expand Down Expand Up @@ -772,7 +780,11 @@ fn write_token<T>(
tok: Option<&T>,
) -> fmt::Result {
match tok {
Some(tok) => fmt_token(tok, f),
Some(tok) => {
write!(f, "'")?;
fmt_token(tok, f)?;
write!(f, "'")
}
None => write!(f, "end of input"),
}
}
58 changes: 41 additions & 17 deletions src/text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ pub trait Char: Sized + Copy + PartialEq + fmt::Debug + Sealed + 'static {
/// For [`char`], this is [`str`]. For [`u8`], this is [`[u8]`].
type Str: ?Sized + AsRef<[u8]> + AsRef<Self::Str> + 'static;

/// A type representing an owned version of `Char::Str`.
///
/// For [`char`], this is [`String`]. For [`u8`], this is [`Vec<u8>`].
type OwnedStr: AsRef<Self::Str>;

/// Convert the given ASCII character to this character type.
fn from_ascii(c: u8) -> Self;

Expand Down Expand Up @@ -49,11 +54,15 @@ pub trait Char: Sized + Copy + PartialEq + fmt::Debug + Sealed + 'static {

/// Turn a string of this character type into an iterator over those characters.
fn str_to_chars(s: &Self::Str) -> Self::StrCharIter<'_>;

/// Turn an unowned string into an owned one.
fn to_owned(s: &Self::Str) -> Self::OwnedStr;
}

impl Sealed for char {}
impl Char for char {
type Str = str;
type OwnedStr = String;

fn from_ascii(c: u8) -> Self {
c as char
Expand Down Expand Up @@ -86,11 +95,16 @@ impl Char for char {
fn is_ident_continue(&self) -> bool {
unicode_ident::is_xid_continue(*self)
}

fn to_owned(s: &Self::Str) -> Self::OwnedStr {
s.to_string()
}
}

impl Sealed for u8 {}
impl Char for u8 {
type Str = [u8];
type OwnedStr = Vec<u8>;

fn from_ascii(c: u8) -> Self {
c
Expand Down Expand Up @@ -123,6 +137,10 @@ impl Char for u8 {
fn is_ident_continue(&self) -> bool {
self.to_char().is_ident_continue()
}

fn to_owned(s: &Self::Str) -> Self::OwnedStr {
s.to_vec()
}
}

/// A parser that accepts (and ignores) any number of whitespace characters before or after another pattern.
Expand Down Expand Up @@ -150,7 +168,7 @@ where

/// Labels denoting a variety of text-related patterns.
#[non_exhaustive]
pub enum TextExpected {
pub enum TextExpected<C: Char> {
/// Whitespace (for example: spaces, tabs, or newlines).
Whitespace,
/// Inline whitespace (for example: spaces or tabs).
Expand All @@ -164,8 +182,10 @@ pub enum TextExpected {
/// - `Digit(0..10)` implies any base-10 digit
/// - `Digit(1..16)` implies any non-zero hexadecimal digit
Digit(Range<u32>),
/// An identifier, either ASCII or unicode.
Identifier,
/// Part of an identifier, either ASCII or unicode.
IdentifierPart,
/// A specific identifier.
Identifier(C::OwnedStr),
}

/// A parser that accepts (and ignores) any number of whitespace characters.
Expand All @@ -191,7 +211,7 @@ where
I::Token: Char,
I: ValueInput<'src> + StrInput<'src, C>,
E: ParserExtra<'src, I>,
E::Error: LabelError<'src, I, TextExpected>,
E::Error: LabelError<'src, I, TextExpected<C>>,
{
any()
.try_map(|c: C, span| {
Expand Down Expand Up @@ -233,7 +253,7 @@ where
I::Token: Char,
I: ValueInput<'src> + StrInput<'src, C>,
E: ParserExtra<'src, I>,
E::Error: LabelError<'src, I, TextExpected>,
E::Error: LabelError<'src, I, TextExpected<C>>,
{
any()
.try_map(|c: C, span| {
Expand Down Expand Up @@ -286,7 +306,7 @@ where
I: ValueInput<'src>,
I::Token: Char,
E: ParserExtra<'src, I>,
E::Error: LabelError<'src, I, TextExpected>,
E::Error: LabelError<'src, I, TextExpected<I::Token>>,
{
custom(|inp| {
let before = inp.cursor();
Expand Down Expand Up @@ -351,7 +371,7 @@ where
C: Char,
I: ValueInput<'src, Token = C>,
E: ParserExtra<'src, I>,
E::Error: LabelError<'src, I, TextExpected>,
E::Error: LabelError<'src, I, TextExpected<C>>,
{
any()
.try_map(move |c: C, span| {
Expand Down Expand Up @@ -405,7 +425,7 @@ where
C: Char,
I: StrInput<'src, C>,
E: ParserExtra<'src, I>,
E::Error: LabelError<'src, I, TextExpected> + LabelError<'src, I, MaybeRef<'src, C>>,
E::Error: LabelError<'src, I, TextExpected<C>> + LabelError<'src, I, MaybeRef<'src, C>>,
{
any()
.try_map(move |c: C, span| {
Expand Down Expand Up @@ -456,15 +476,15 @@ pub mod ascii {
C: Char,
I: ValueInput<'src> + StrInput<'src, C>,
E: ParserExtra<'src, I>,
E::Error: LabelError<'src, I, TextExpected>,
E::Error: LabelError<'src, I, TextExpected<C>>,
{
any()
.try_map(|c: C, span| {
if c.to_char().is_ascii_alphabetic() || c.to_char() == '_' {
Ok(c)
} else {
Err(LabelError::expected_found(
[TextExpected::Identifier],
[TextExpected::IdentifierPart],
Some(MaybeRef::Val(c)),
span,
))
Expand All @@ -477,7 +497,7 @@ pub mod ascii {
Ok(())
} else {
Err(LabelError::expected_found(
[TextExpected::Identifier],
[TextExpected::IdentifierPart],
Some(MaybeRef::Val(c)),
span,
))
Expand Down Expand Up @@ -517,7 +537,7 @@ pub mod ascii {
Str: AsRef<C::Str> + 'src + Clone,
E: ParserExtra<'src, I> + 'src,
I: ValueInput<'src> + StrInput<'src, C>,
E::Error: LabelError<'src, I, TextExpected> + LabelError<'src, I, Str>,
E::Error: LabelError<'src, I, TextExpected<C>> + LabelError<'src, I, Str>,
{
#[cfg(debug_assertions)]
{
Expand All @@ -536,7 +556,11 @@ pub mod ascii {
if s == keyword.as_ref() {
Ok(())
} else {
Err(LabelError::expected_found([keyword.clone()], None, span))
Err(LabelError::expected_found(
[TextExpected::Identifier(C::to_owned(keyword.as_ref()))],
None,
span,
))
}
})
.to_slice()
Expand All @@ -562,15 +586,15 @@ pub mod unicode {
C: Char,
I: ValueInput<'src> + StrInput<'src, C>,
E: ParserExtra<'src, I>,
E::Error: LabelError<'src, I, TextExpected>,
E::Error: LabelError<'src, I, TextExpected<C>>,
{
any()
.try_map(|c: C, span| {
if c.is_ident_start() {
Ok(c)
} else {
Err(LabelError::expected_found(
[TextExpected::Identifier],
[TextExpected::IdentifierPart],
Some(MaybeRef::Val(c)),
span,
))
Expand All @@ -583,7 +607,7 @@ pub mod unicode {
Ok(c)
} else {
Err(LabelError::expected_found(
[TextExpected::Identifier],
[TextExpected::IdentifierPart],
Some(MaybeRef::Val(c)),
span,
))
Expand Down Expand Up @@ -623,7 +647,7 @@ pub mod unicode {
Str: AsRef<C::Str> + 'src + Clone,
I: ValueInput<'src> + StrInput<'src, C>,
E: ParserExtra<'src, I> + 'src,
E::Error: LabelError<'src, I, TextExpected> + LabelError<'src, I, Str>,
E::Error: LabelError<'src, I, TextExpected<C>> + LabelError<'src, I, Str>,
{
#[cfg(debug_assertions)]
{
Expand Down

0 comments on commit 6e27ebe

Please sign in to comment.