funs-lang · FedericoBruzzone · Aug 16, 2024 · Aug 14, 2024 · Aug 14, 2024 · Aug 15, 2024
diff --git a/examples/first.fs b/examples/first.fs
@@ -7,6 +7,5 @@ _x_int: int = 0
 _x_float: float = 0.1
 _x_bool: bool = true
 _x_bool2: bool = false
-c_char: char = 'c'
-	c_char2: char = 'c'
 x_str: str = "hello"
+x_list: [int] = [1, 2, 3]
diff --git a/examples/test.fs b/examples/test.fs
@@ -1 +1 @@
-x: str = "hello"
+x_list: [int] = [1, 2, 3]
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs
@@ -129,7 +129,7 @@ mod tests {
     #[test]
     fn identifier() {
         let fs_files = collect_fs_files("./testdata/identifier", true);
-        assert_eq!(fs_files.len(), 16);
+        assert_eq!(fs_files.len(), 17);
 
         for path in fs_files {
             info!("file -> {:?}", path);

diff --git a/src/lexer/states.rs b/src/lexer/states.rs
@@ -4,6 +4,9 @@ use super::Lexer;
 use super::LexerError;
 use crate::lexer::token::Token;
 use crate::lexer::token::TokenKind;
+use crate::lexer::token::TokenKind::TokenCloseBrace;
+use crate::lexer::token::TokenKind::TokenCloseBracket;
+use crate::lexer::token::TokenKind::TokenCloseParen;
 use crate::lexer::token::TokenKind::TokenDoubleQuote;
 use crate::lexer::token::TokenKind::TokenSingleQuote;
 use std::fmt::Debug;
@@ -98,7 +101,7 @@ impl State for StateStart {
             )),
             Some(_) => Err(LexerError::UnexpectedToken(Token::new(
                 TokenKind::TokenUnknown,
-                "".to_string(),
+                cursor.source().content()[cursor.index()..cursor.offset()].to_string(),
                 cursor.location().clone(),
             ))),
             None => Ok(Lexer::proceed(Box::new(StateEOF), TransitionKind::Consume)),
@@ -112,6 +115,11 @@ pub struct StateString;
 impl State for StateString {
     fn visit(&self, cursor: &mut Cursor) -> Result<Transition, LexerError> {
         match cursor.peek() {
+            Some(c) if c.eq(&'\n') => Err(LexerError::UnexpectedToken(Token::new(
+                TokenKind::TokenUnknown,
+                "\\n".to_string(),
+                cursor.location().clone(),
+            ))),
             Some(c) if c.ne(&'"') => Ok(Lexer::proceed(
                 Box::new(StateString),
                 TransitionKind::AdvanceOffset,
@@ -172,12 +180,10 @@ impl State for StateNumber {
                 let lexeme = cursor.source().content()[cursor.index()..cursor.offset()].to_string();
                 let location = cursor.location().clone();
                 let token_kind = TokenKind::from(&lexeme);
-                Ok(Transition {
-                    state: Box::new(StateStart),
-                    transition_kind: TransitionKind::EmitToken(Token::new(
-                        token_kind, lexeme, location,
-                    )),
-                })
+                Ok(Lexer::proceed(
+                    Box::new(StateStart),
+                    TransitionKind::EmitToken(Token::new(token_kind, lexeme, location)),
+                ))
             }
         }
     }
@@ -214,7 +220,10 @@ pub struct StateSymbol;
 
 impl StateSymbol {
     fn is_symbol(c: char) -> bool {
-        matches!(c, ':' | '=' | '\n')
+        matches!(
+            c,
+            ':' | '=' | '\n' | '(' | ')' | '{' | '}' | '[' | ']' | ','
+        )
     }
 }
 
@@ -227,7 +236,14 @@ impl State for StateSymbol {
 
                 // NOTE: if a '\n' is found and it was scanning another "symbol" token, the previous was mangled, and only the '\n' is emitted,
                 // we need to handle the previous token since can be at the end of the line
-                if [TokenSingleQuote, TokenDoubleQuote].contains(&token_kind) {
+                let valid_last_token = vec![
+                    TokenCloseBracket,
+                    TokenCloseParen,
+                    TokenCloseBrace,
+                    TokenDoubleQuote,
+                    TokenSingleQuote,
+                ];
+                if valid_last_token.contains(&token_kind) {
                     return Ok(Lexer::proceed(
                         Box::new(StateStart),
                         TransitionKind::EmitToken(Token::new(

diff --git a/src/lexer/token.rs b/src/lexer/token.rs
@@ -12,6 +12,13 @@ const COLON: &str = ":";
 const ASSIGN: &str = "=";
 const SINGLE_QUOTE: &str = "'";
 const DOUBLE_QUOTE: &str = "\"";
+const OPEN_PAREN: &str = "(";
+const CLOSE_PAREN: &str = ")";
+const OPEN_BRACKET: &str = "{";
+const CLOSE_BRACKET: &str = "}";
+const OPEN_BRACE: &str = "[";
+const CLOSE_BRACE: &str = "]";
+const COMMA: &str = ",";
 
 #[derive(Debug, Clone, PartialEq, Deserialize, Serialize)]
 pub enum Literal {
@@ -28,14 +35,21 @@ pub enum TokenKind {
     TokenKeyword,
     TokenType,
     TokenComment,
-    TokenSpace,       // ' '
-    TokenTab,         // \t
-    TokenNewLine,     // \n
-    TokenColon,       // :
-    TokenAssign,      // =
-    TokenSingleQuote, // '
-    TokenDoubleQuote, // "
-    TokenEOF,         // End of file
+    TokenSpace,        // ' '
+    TokenTab,          // \t
+    TokenNewLine,      // \n
+    TokenColon,        // :
+    TokenAssign,       // =
+    TokenSingleQuote,  // '
+    TokenDoubleQuote,  // "
+    TokenOpenParen,    // (
+    TokenCloseParen,   // )
+    TokenOpenBrace,    // {
+    TokenCloseBrace,   // }
+    TokenOpenBracket,  // [
+    TokenCloseBracket, // ]
+    TokenComma,        // ,
+    TokenEOF,          // End of file
     TokenUnknown,
 }
 
@@ -70,6 +84,13 @@ impl TokenKind {
             ASSIGN => Some(TokenKind::TokenAssign),
             SINGLE_QUOTE => Some(TokenKind::TokenSingleQuote),
             DOUBLE_QUOTE => Some(TokenKind::TokenDoubleQuote),
+            OPEN_PAREN => Some(TokenKind::TokenOpenParen),
+            CLOSE_PAREN => Some(TokenKind::TokenCloseParen),
+            OPEN_BRACE => Some(TokenKind::TokenOpenBrace),
+            CLOSE_BRACE => Some(TokenKind::TokenCloseBrace),
+            OPEN_BRACKET => Some(TokenKind::TokenOpenBracket),
+            CLOSE_BRACKET => Some(TokenKind::TokenCloseBracket),
+            COMMA => Some(TokenKind::TokenComma),
             _ => None,
         }
     }
@@ -264,6 +285,13 @@ impl std::fmt::Display for TokenKind {
             TokenKind::TokenAssign => write!(f, "TokenAssign"),
             TokenKind::TokenSingleQuote => write!(f, "TokenTick"),
             TokenKind::TokenDoubleQuote => write!(f, "TokenDoubleTick"),
+            TokenKind::TokenOpenParen => write!(f, "TokenOpenParen"),
+            TokenKind::TokenCloseParen => write!(f, "TokenCloseParen"),
+            TokenKind::TokenOpenBrace => write!(f, "TokenOpenBrace"),
+            TokenKind::TokenCloseBrace => write!(f, "TokenCloseBrace"),
+            TokenKind::TokenOpenBracket => write!(f, "TokenOpenBracket"),
+            TokenKind::TokenCloseBracket => write!(f, "TokenCloseBracket"),
+            TokenKind::TokenComma => write!(f, "TokenComma"),
             TokenKind::TokenEOF => write!(f, "TokenEOF"),
             TokenKind::TokenUnknown => write!(f, "TokenUnknown"),
         }

diff --git a/testdata/identifier/id_list_assign.fs b/testdata/identifier/id_list_assign.fs
@@ -0,0 +1 @@
+x_list: [int] = [1, 2, 3]
diff --git a/testdata/identifier/id_list_assign.tokens.json b/testdata/identifier/id_list_assign.tokens.json
@@ -0,0 +1,22 @@
+[
+  { "kind": "TokenIdentifier", "lexeme": "x_list", "location": { "file_path": "", "line": 0, "column_start": 0, "column_end": 6 } },
+  { "kind": "TokenColon", "lexeme": ":", "location": { "file_path": "", "line": 0, "column_start": 6, "column_end": 7 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 0, "column_start": 7, "column_end": 8 } },
+  { "kind": "TokenOpenBrace", "lexeme": "[", "location": { "file_path": "", "line": 0, "column_start": 8, "column_end": 9 } },
+  { "kind": "TokenType", "lexeme": "int", "location": { "file_path": "", "line": 0, "column_start": 9, "column_end": 12 } },
+  { "kind": "TokenCloseBrace", "lexeme": "]", "location": { "file_path": "", "line": 0, "column_start": 12, "column_end": 13 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 0, "column_start": 13, "column_end": 14 } },
+  { "kind": "TokenAssign", "lexeme": "=", "location": { "file_path": "", "line": 0, "column_start": 14, "column_end": 15 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 0, "column_start": 15, "column_end": 16 } },
+  { "kind": "TokenOpenBrace", "lexeme": "[", "location": { "file_path": "", "line": 0, "column_start": 16, "column_end": 17 } },
+  { "kind": { "TokenLiteral": "Int" }, "lexeme": "1", "location": { "file_path": "", "line": 0, "column_start": 17, "column_end": 18 } },
+  { "kind": "TokenComma", "lexeme": ",", "location": { "file_path": "", "line": 0, "column_start": 18, "column_end": 19 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 0, "column_start": 19, "column_end": 20 } },
+  { "kind": { "TokenLiteral": "Int" }, "lexeme": "2", "location": { "file_path": "", "line": 0, "column_start": 20, "column_end": 21 } },
+  { "kind": "TokenComma", "lexeme": ",", "location": { "file_path": "", "line": 0, "column_start": 21, "column_end": 22 } },
+  { "kind": "TokenSpace", "lexeme": " ", "location": { "file_path": "", "line": 0, "column_start": 22, "column_end": 23 } },
+  { "kind": { "TokenLiteral": "Int" }, "lexeme": "3", "location": { "file_path": "", "line": 0, "column_start": 23, "column_end": 24 } },
+  { "kind": "TokenCloseBrace", "lexeme": "]", "location": { "file_path": "", "line": 0, "column_start": 24, "column_end": 25 } },
+  { "kind": "TokenNewLine", "lexeme": "\\n", "location": { "file_path": "", "line": 0, "column_start": 25, "column_end": 25 } },
+  { "kind": "TokenEOF", "lexeme": "", "location": { "file_path": "", "line": 1, "column_start": 0, "column_end": 0 } }
+]