Skip to content

Commit

Permalink
lex: add function with match test
Browse files Browse the repository at this point in the history
Signed-off-by: FedericoBruzzone <[email protected]>
  • Loading branch information
FedericoBruzzone committed Aug 16, 2024
1 parent 9740e5e commit b5d1de2
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 25 deletions.
2 changes: 1 addition & 1 deletion src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ mod tests {
#[test]
fn identifier() {
let fs_files = collect_fs_files("./testdata/identifier", true);
assert_eq!(fs_files.len(), 25);
assert_eq!(fs_files.len(), 26);

for path in fs_files {
info!("file -> {:?}", path);
Expand Down
9 changes: 5 additions & 4 deletions src/lexer/states.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@ impl State for StateStart {
Box::new(StateWord),
TransitionKind::AdvanceOffset,
)),
Some(_) => Err(LexerError::UnexpectedToken(Token::new(
Some(c) => Err(LexerError::UnexpectedToken(Token::new(
TokenKind::TokenUnknown,
cursor.source().content()[cursor.index()..cursor.offset()].to_string(),
c.to_string(),
cursor.location().clone(),
))),
None => Ok(Lexer::proceed(Box::new(StateEOF), TransitionKind::Consume)),
Expand Down Expand Up @@ -130,11 +130,12 @@ impl State for StateString {
)),
))
}
_ => Err(LexerError::UnexpectedToken(Token::new(
Some(c) => Err(LexerError::UnexpectedToken(Token::new(
TokenKind::TokenUnknown,
"".to_string(),
c.to_string(),
cursor.location().clone(),
))),
None => Ok(Lexer::proceed(Box::new(StateEOF), TransitionKind::Consume)),
}
}
}
Expand Down
43 changes: 23 additions & 20 deletions src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ const MULTIPLY: &str = "*";
const DIVIDE: &str = "/";
const GREATER: &str = ">";
const RIGHT_ARROW: &str = "->";
const RIGHT_DOUBLE_ARROW: &str = "=>";

#[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
pub enum Literal {
Expand All @@ -55,24 +56,25 @@ pub enum TokenKind {
TokenKeyword(Keyword),
TokenIdentifier,
TokenComment,
TokenSpace, // ' '
TokenTab, // \t
TokenNewLine, // \n
TokenColon, // :
TokenSemicolon, // ;
TokenAssign, // =
TokenSingleQuote, // '
TokenDoubleQuote, // "
TokenOpenParen, // (
TokenCloseParen, // )
TokenOpenBrace, // {
TokenCloseBrace, // }
TokenOpenBracket, // [
TokenCloseBracket, // ]
TokenComma, // ,
TokenGreater, // >
TokenRightArrow, // ->
TokenEOF, // End of file
TokenSpace, // ' '
TokenTab, // \t
TokenNewLine, // \n
TokenColon, // :
TokenSemicolon, // ;
TokenAssign, // =
TokenSingleQuote, // '
TokenDoubleQuote, // "
TokenOpenParen, // (
TokenCloseParen, // )
TokenOpenBrace, // {
TokenCloseBrace, // }
TokenOpenBracket, // [
TokenCloseBracket, // ]
TokenComma, // ,
TokenGreater, // >
TokenRightArrow, // ->
TokenRightDoubleArrow, // =>
TokenEOF, // End of file
// Operators
TokenPlus, // +
TokenMinus, // -
Expand Down Expand Up @@ -125,12 +127,11 @@ impl TokenKind {
| MULTIPLY
| DIVIDE
| NEW_LINE
| RIGHT_ARROW
)
}

pub fn can_be_followed_by_symbol(c: &str) -> bool {
matches!(c, MINUS)
matches!(c, MINUS | ASSIGN)
}

fn match_keyword(lexeme: &str) -> Option<TokenKind> {
Expand Down Expand Up @@ -178,6 +179,7 @@ impl TokenKind {
MULTIPLY => Some(TokenKind::TokenMultiply),
DIVIDE => Some(TokenKind::TokenDivide),
RIGHT_ARROW => Some(TokenKind::TokenRightArrow),
RIGHT_DOUBLE_ARROW => Some(TokenKind::TokenRightDoubleArrow),
_ => None,
}
}
Expand Down Expand Up @@ -394,6 +396,7 @@ impl std::fmt::Display for TokenKind {
TokenKind::TokenGreater => write!(f, "TokenGreater"),
TokenKind::TokenComma => write!(f, "TokenComma"),
TokenKind::TokenRightArrow => write!(f, "TokenRightArrow"),
TokenKind::TokenRightDoubleArrow => write!(f, "TokenRightDoubleArrow"),
TokenKind::TokenEOF => write!(f, "TokenEOF"),
TokenKind::TokenPlus => write!(f, "TokenPlus"),
TokenKind::TokenMinus => write!(f, "TokenMinus"),
Expand Down
7 changes: 7 additions & 0 deletions testdata/identifier/id_fun_with_match.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
f_match: (int) -> str = (a) ->
match a
0 => "zero"
1 => "one"
_ => "other"
;
;
64 changes: 64 additions & 0 deletions testdata/identifier/id_fun_with_match.tokens.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
[
{ "kind": "TokenIdentifier", "lexeme": "f_match", "location": { "file_path": "", "line": 0, "column_start": 0, "column_end": 7 } },
{ "kind": "TokenColon", "lexeme": ":", "location": { "file_path": "", "line": 0, "column_start": 7, "column_end": 8 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 0, "column_start": 8, "column_end": 9 } },
{ "kind": "TokenOpenParen", "lexeme": "(", "location": { "file_path": "", "line": 0, "column_start": 9, "column_end": 10 } },
{ "kind": { "TokenKeyword": "IntType" }, "lexeme": "int", "location": { "file_path": "", "line": 0, "column_start": 10, "column_end": 13 } },
{ "kind": "TokenCloseParen", "lexeme": ")", "location": { "file_path": "", "line": 0, "column_start": 13, "column_end": 14 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 0, "column_start": 14, "column_end": 15 } },
{ "kind": "TokenRightArrow", "lexeme": "->", "location": { "file_path": "", "line": 0, "column_start": 15, "column_end": 17 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 0, "column_start": 17, "column_end": 18 } },
{ "kind": { "TokenKeyword": "StrType" }, "lexeme": "str", "location": { "file_path": "", "line": 0, "column_start": 18, "column_end": 21 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 0, "column_start": 21, "column_end": 22 } },
{ "kind": "TokenAssign", "lexeme": "=", "location": { "file_path": "", "line": 0, "column_start": 22, "column_end": 23 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 0, "column_start": 23, "column_end": 24 } },
{ "kind": "TokenOpenParen", "lexeme": "(", "location": { "file_path": "", "line": 0, "column_start": 24, "column_end": 25 } },
{ "kind": "TokenIdentifier", "lexeme": "a", "location": { "file_path": "", "line": 0, "column_start": 25, "column_end": 26 } },
{ "kind": "TokenCloseParen", "lexeme": ")", "location": { "file_path": "", "line": 0, "column_start": 26, "column_end": 27 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 0, "column_start": 27, "column_end": 28 } },
{ "kind": "TokenRightArrow", "lexeme": "->", "location": { "file_path": "", "line": 0, "column_start": 28, "column_end": 30 } },
{ "kind": "TokenNewLine", "lexeme": "\\n", "location": { "file_path": "", "line": 0, "column_start": 30, "column_end": 30 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 1, "column_start": 0, "column_end": 1 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 1, "column_start": 1, "column_end": 2 } },
{ "kind": { "TokenKeyword": "Match" }, "lexeme": "match", "location": { "file_path": "", "line": 1, "column_start": 2, "column_end": 7 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 1, "column_start": 7, "column_end": 8 } },
{ "kind": "TokenIdentifier", "lexeme": "a", "location": { "file_path": "", "line": 1, "column_start": 8, "column_end": 9 } },
{ "kind": "TokenNewLine", "lexeme": "\\n", "location": { "file_path": "", "line": 1, "column_start": 9, "column_end": 9 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 2, "column_start": 0, "column_end": 1 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 2, "column_start": 1, "column_end": 2 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 2, "column_start": 2, "column_end": 3 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 2, "column_start": 3, "column_end": 4 } },
{ "kind": { "TokenLiteral": "Int" }, "lexeme": "0", "location": { "file_path": "", "line": 2, "column_start": 4, "column_end": 5 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 2, "column_start": 5, "column_end": 6 } },
{ "kind": "TokenRightDoubleArrow", "lexeme": "=>", "location": { "file_path": "", "line": 2, "column_start": 6, "column_end": 8 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 2, "column_start": 8, "column_end": 9 } },
{ "kind": { "TokenLiteral": "Str" }, "lexeme": "\"zero\"", "location": { "file_path": "", "line": 2, "column_start": 9, "column_end": 15 } },
{ "kind": "TokenNewLine", "lexeme": "\\n", "location": { "file_path": "", "line": 2, "column_start": 15, "column_end": 15 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 3, "column_start": 0, "column_end": 1 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 3, "column_start": 1, "column_end": 2 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 3, "column_start": 2, "column_end": 3 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 3, "column_start": 3, "column_end": 4 } },
{ "kind": { "TokenLiteral": "Int" }, "lexeme": "1", "location": { "file_path": "", "line": 3, "column_start": 4, "column_end": 5 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 3, "column_start": 5, "column_end": 6 } },
{ "kind": "TokenRightDoubleArrow", "lexeme": "=>", "location": { "file_path": "", "line": 3, "column_start": 6, "column_end": 8 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 3, "column_start": 8, "column_end": 9 } },
{ "kind": { "TokenLiteral": "Str" }, "lexeme": "\"one\"", "location": { "file_path": "", "line": 3, "column_start": 9, "column_end": 14 } },
{ "kind": "TokenNewLine", "lexeme": "\\n", "location": { "file_path": "", "line": 3, "column_start": 14, "column_end": 14 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 4, "column_start": 0, "column_end": 1 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 4, "column_start": 1, "column_end": 2 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 4, "column_start": 2, "column_end": 3 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 4, "column_start": 3, "column_end": 4 } },
{ "kind": "TokenIdentifier", "lexeme": "_", "location": { "file_path": "", "line": 4, "column_start": 4, "column_end": 5 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 4, "column_start": 5, "column_end": 6 } },
{ "kind": "TokenRightDoubleArrow", "lexeme": "=>", "location": { "file_path": "", "line": 4, "column_start": 6, "column_end": 8 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 4, "column_start": 8, "column_end": 9 } },
{ "kind": { "TokenLiteral": "Str" }, "lexeme": "\"other\"", "location": { "file_path": "", "line": 4, "column_start": 9, "column_end": 16 } },
{ "kind": "TokenNewLine", "lexeme": "\\n", "location": { "file_path": "", "line": 4, "column_start": 16, "column_end": 16 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 5, "column_start": 0, "column_end": 1 } },
{ "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 5, "column_start": 1, "column_end": 2 } },
{ "kind": "TokenSemicolon", "lexeme": ";", "location": { "file_path": "", "line": 5, "column_start": 2, "column_end": 3 } },
{ "kind": "TokenNewLine", "lexeme": "\\n", "location": { "file_path": "", "line": 5, "column_start": 3, "column_end": 3 } },
{ "kind": "TokenSemicolon", "lexeme": ";", "location": { "file_path": "", "line": 6, "column_start": 0, "column_end": 1 } },
{ "kind": "TokenNewLine", "lexeme": "\\n", "location": { "file_path": "", "line": 6, "column_start": 1, "column_end": 1 } },
{ "kind": "TokenEOF", "lexeme": "", "location": { "file_path": "", "line": 7, "column_start": 0, "column_end": 0 } }
]

0 comments on commit b5d1de2

Please sign in to comment.