lex: add function with match test

Signed-off-by: FedericoBruzzone <[email protected]>
funs-lang · Aug 16, 2024 · b5d1de2 · b5d1de2
1 parent 9740e5e
commit b5d1de2
Show file tree

Hide file tree

Showing 5 changed files with 100 additions and 25 deletions.
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs
@@ -129,7 +129,7 @@ mod tests {
     #[test]
     fn identifier() {
         let fs_files = collect_fs_files("./testdata/identifier", true);
-        assert_eq!(fs_files.len(), 25);
+        assert_eq!(fs_files.len(), 26);
 
         for path in fs_files {
             info!("file -> {:?}", path);

diff --git a/src/lexer/states.rs b/src/lexer/states.rs
@@ -94,9 +94,9 @@ impl State for StateStart {
                 Box::new(StateWord),
                 TransitionKind::AdvanceOffset,
             )),
-            Some(_) => Err(LexerError::UnexpectedToken(Token::new(
+            Some(c) => Err(LexerError::UnexpectedToken(Token::new(
                 TokenKind::TokenUnknown,
-                cursor.source().content()[cursor.index()..cursor.offset()].to_string(),
+                c.to_string(),
                 cursor.location().clone(),
             ))),
             None => Ok(Lexer::proceed(Box::new(StateEOF), TransitionKind::Consume)),
@@ -130,11 +130,12 @@ impl State for StateString {
                     )),
                 ))
             }
-            _ => Err(LexerError::UnexpectedToken(Token::new(
+            Some(c) => Err(LexerError::UnexpectedToken(Token::new(
                 TokenKind::TokenUnknown,
-                "".to_string(),
+                c.to_string(),
                 cursor.location().clone(),
             ))),
+            None => Ok(Lexer::proceed(Box::new(StateEOF), TransitionKind::Consume)),
         }
     }
 }

diff --git a/src/lexer/token.rs b/src/lexer/token.rs
@@ -30,6 +30,7 @@ const MULTIPLY: &str = "*";
 const DIVIDE: &str = "/";
 const GREATER: &str = ">";
 const RIGHT_ARROW: &str = "->";
+const RIGHT_DOUBLE_ARROW: &str = "=>";
 
 #[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
 pub enum Literal {
@@ -55,24 +56,25 @@ pub enum TokenKind {
     TokenKeyword(Keyword),
     TokenIdentifier,
     TokenComment,
-    TokenSpace,        // ' '
-    TokenTab,          // \t
-    TokenNewLine,      // \n
-    TokenColon,        // :
-    TokenSemicolon,    // ;
-    TokenAssign,       // =
-    TokenSingleQuote,  // '
-    TokenDoubleQuote,  // "
-    TokenOpenParen,    // (
-    TokenCloseParen,   // )
-    TokenOpenBrace,    // {
-    TokenCloseBrace,   // }
-    TokenOpenBracket,  // [
-    TokenCloseBracket, // ]
-    TokenComma,        // ,
-    TokenGreater,      // >
-    TokenRightArrow,   // ->
-    TokenEOF,          // End of file
+    TokenSpace,            // ' '
+    TokenTab,              // \t
+    TokenNewLine,          // \n
+    TokenColon,            // :
+    TokenSemicolon,        // ;
+    TokenAssign,           // =
+    TokenSingleQuote,      // '
+    TokenDoubleQuote,      // "
+    TokenOpenParen,        // (
+    TokenCloseParen,       // )
+    TokenOpenBrace,        // {
+    TokenCloseBrace,       // }
+    TokenOpenBracket,      // [
+    TokenCloseBracket,     // ]
+    TokenComma,            // ,
+    TokenGreater,          // >
+    TokenRightArrow,       // ->
+    TokenRightDoubleArrow, // =>
+    TokenEOF,              // End of file
     // Operators
     TokenPlus,     // +
     TokenMinus,    // -
@@ -125,12 +127,11 @@ impl TokenKind {
                 | MULTIPLY
                 | DIVIDE
                 | NEW_LINE
-                | RIGHT_ARROW
         )
     }
 
     pub fn can_be_followed_by_symbol(c: &str) -> bool {
-        matches!(c, MINUS)
+        matches!(c, MINUS | ASSIGN)
     }
 
     fn match_keyword(lexeme: &str) -> Option<TokenKind> {
@@ -178,6 +179,7 @@ impl TokenKind {
             MULTIPLY => Some(TokenKind::TokenMultiply),
             DIVIDE => Some(TokenKind::TokenDivide),
             RIGHT_ARROW => Some(TokenKind::TokenRightArrow),
+            RIGHT_DOUBLE_ARROW => Some(TokenKind::TokenRightDoubleArrow),
             _ => None,
         }
     }
@@ -394,6 +396,7 @@ impl std::fmt::Display for TokenKind {
             TokenKind::TokenGreater => write!(f, "TokenGreater"),
             TokenKind::TokenComma => write!(f, "TokenComma"),
             TokenKind::TokenRightArrow => write!(f, "TokenRightArrow"),
+            TokenKind::TokenRightDoubleArrow => write!(f, "TokenRightDoubleArrow"),
             TokenKind::TokenEOF => write!(f, "TokenEOF"),
             TokenKind::TokenPlus => write!(f, "TokenPlus"),
             TokenKind::TokenMinus => write!(f, "TokenMinus"),

diff --git a/testdata/identifier/id_fun_with_match.fs b/testdata/identifier/id_fun_with_match.fs
@@ -0,0 +1,7 @@
+f_match: (int) -> str = (a) ->
+  match a
+    0 => "zero"
+    1 => "one"
+    _ => "other"
+  ;
+;
diff --git a/testdata/identifier/id_fun_with_match.tokens.json b/testdata/identifier/id_fun_with_match.tokens.json
@@ -0,0 +1,64 @@
+[
+  { "kind": "TokenIdentifier", "lexeme": "f_match", "location": { "file_path": "", "line": 0, "column_start": 0, "column_end": 7 } },
+  { "kind": "TokenColon", "lexeme": ":", "location": { "file_path": "", "line": 0, "column_start": 7, "column_end": 8 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 0, "column_start": 8, "column_end": 9 } },
+  { "kind": "TokenOpenParen", "lexeme": "(", "location": { "file_path": "", "line": 0, "column_start": 9, "column_end": 10 } },
+  { "kind": { "TokenKeyword": "IntType" }, "lexeme": "int", "location": { "file_path": "", "line": 0, "column_start": 10, "column_end": 13 } },
+  { "kind": "TokenCloseParen", "lexeme": ")", "location": { "file_path": "", "line": 0, "column_start": 13, "column_end": 14 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 0, "column_start": 14, "column_end": 15 } },
+  { "kind": "TokenRightArrow", "lexeme": "->", "location": { "file_path": "", "line": 0, "column_start": 15, "column_end": 17 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 0, "column_start": 17, "column_end": 18 } },
+  { "kind": { "TokenKeyword": "StrType" }, "lexeme": "str", "location": { "file_path": "", "line": 0, "column_start": 18, "column_end": 21 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 0, "column_start": 21, "column_end": 22 } },
+  { "kind": "TokenAssign", "lexeme": "=", "location": { "file_path": "", "line": 0, "column_start": 22, "column_end": 23 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 0, "column_start": 23, "column_end": 24 } },
+  { "kind": "TokenOpenParen", "lexeme": "(", "location": { "file_path": "", "line": 0, "column_start": 24, "column_end": 25 } },
+  { "kind": "TokenIdentifier", "lexeme": "a", "location": { "file_path": "", "line": 0, "column_start": 25, "column_end": 26 } },
+  { "kind": "TokenCloseParen", "lexeme": ")", "location": { "file_path": "", "line": 0, "column_start": 26, "column_end": 27 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 0, "column_start": 27, "column_end": 28 } },
+  { "kind": "TokenRightArrow", "lexeme": "->", "location": { "file_path": "", "line": 0, "column_start": 28, "column_end": 30 } },
+  { "kind": "TokenNewLine", "lexeme": "\\n", "location": { "file_path": "", "line": 0, "column_start": 30, "column_end": 30 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 1, "column_start": 0, "column_end": 1 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 1, "column_start": 1, "column_end": 2 } },
+  { "kind": { "TokenKeyword": "Match" }, "lexeme": "match", "location": { "file_path": "", "line": 1, "column_start": 2, "column_end": 7 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 1, "column_start": 7, "column_end": 8 } },
+  { "kind": "TokenIdentifier", "lexeme": "a", "location": { "file_path": "", "line": 1, "column_start": 8, "column_end": 9 } },
+  { "kind": "TokenNewLine", "lexeme": "\\n", "location": { "file_path": "", "line": 1, "column_start": 9, "column_end": 9 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 2, "column_start": 0, "column_end": 1 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 2, "column_start": 1, "column_end": 2 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 2, "column_start": 2, "column_end": 3 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 2, "column_start": 3, "column_end": 4 } },
+  { "kind": { "TokenLiteral": "Int" }, "lexeme": "0", "location": { "file_path": "", "line": 2, "column_start": 4, "column_end": 5 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 2, "column_start": 5, "column_end": 6 } },
+  { "kind": "TokenRightDoubleArrow", "lexeme": "=>", "location": { "file_path": "", "line": 2, "column_start": 6, "column_end": 8 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 2, "column_start": 8, "column_end": 9 } },
+  { "kind": { "TokenLiteral": "Str" }, "lexeme": "\"zero\"", "location": { "file_path": "", "line": 2, "column_start": 9, "column_end": 15 } },
+  { "kind": "TokenNewLine", "lexeme": "\\n", "location": { "file_path": "", "line": 2, "column_start": 15, "column_end": 15 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 3, "column_start": 0, "column_end": 1 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 3, "column_start": 1, "column_end": 2 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 3, "column_start": 2, "column_end": 3 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 3, "column_start": 3, "column_end": 4 } },
+  { "kind": { "TokenLiteral": "Int" }, "lexeme": "1", "location": { "file_path": "", "line": 3, "column_start": 4, "column_end": 5 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 3, "column_start": 5, "column_end": 6 } },
+  { "kind": "TokenRightDoubleArrow", "lexeme": "=>", "location": { "file_path": "", "line": 3, "column_start": 6, "column_end": 8 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 3, "column_start": 8, "column_end": 9 } },
+  { "kind": { "TokenLiteral": "Str" }, "lexeme": "\"one\"", "location": { "file_path": "", "line": 3, "column_start": 9, "column_end": 14 } },
+  { "kind": "TokenNewLine", "lexeme": "\\n", "location": { "file_path": "", "line": 3, "column_start": 14, "column_end": 14 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 4, "column_start": 0, "column_end": 1 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 4, "column_start": 1, "column_end": 2 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 4, "column_start": 2, "column_end": 3 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 4, "column_start": 3, "column_end": 4 } },
+  { "kind": "TokenIdentifier", "lexeme": "_", "location": { "file_path": "", "line": 4, "column_start": 4, "column_end": 5 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 4, "column_start": 5, "column_end": 6 } },
+  { "kind": "TokenRightDoubleArrow", "lexeme": "=>", "location": { "file_path": "", "line": 4, "column_start": 6, "column_end": 8 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 4, "column_start": 8, "column_end": 9 } },
+  { "kind": { "TokenLiteral": "Str" }, "lexeme": "\"other\"", "location": { "file_path": "", "line": 4, "column_start": 9, "column_end": 16 } },
+  { "kind": "TokenNewLine", "lexeme": "\\n", "location": { "file_path": "", "line": 4, "column_start": 16, "column_end": 16 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 5, "column_start": 0, "column_end": 1 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 5, "column_start": 1, "column_end": 2 } },
+  { "kind": "TokenSemicolon", "lexeme": ";", "location": { "file_path": "", "line": 5, "column_start": 2, "column_end": 3 } },
+  { "kind": "TokenNewLine", "lexeme": "\\n", "location": { "file_path": "", "line": 5, "column_start": 3, "column_end": 3 } },
+  { "kind": "TokenSemicolon", "lexeme": ";", "location": { "file_path": "", "line": 6, "column_start": 0, "column_end": 1 } },
+  { "kind": "TokenNewLine", "lexeme": "\\n", "location": { "file_path": "", "line": 6, "column_start": 1, "column_end": 1 } },
+  { "kind": "TokenEOF", "lexeme": "", "location": { "file_path": "", "line": 7, "column_start": 0, "column_end": 0 } }
+]