Skip to content

Commit

Permalink
lex: Handle the char variable initalization
Browse files Browse the repository at this point in the history
There was a bug also when a symbol is at the end of the line, now it is
handled correctly for the the tick symbol.

Signed-off-by: Federico Guerinoni <[email protected]>
  • Loading branch information
guerinoni authored and FedericoBruzzone committed Aug 13, 2024
1 parent c50fe9e commit e0e8cda
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 10 deletions.
8 changes: 4 additions & 4 deletions src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,13 @@ impl Iterator for Lexer {
Ok(transition) => transition,
Err(err) => {
self.errors.push(err.clone());
match err {
return match err {
LexerError::UnexpectedToken(token) => {
error!("Unexpected token: {}", token);
// TODO: return a transition to continue lexing (for error recovery)
return None;
None
}
}
};
}
};
let (state, transition_kind) = transition.into_parts();
Expand Down Expand Up @@ -133,7 +133,7 @@ mod tests {
#[test]
fn identifier() {
let fs_files = collect_fs_files("./testdata/identifier", true);
assert_eq!(fs_files.len(), 17);
assert_eq!(fs_files.len(), 18);

for path in fs_files {
info!("file -> {:?}", path);
Expand Down
20 changes: 18 additions & 2 deletions src/lexer/states.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use super::Lexer;
use super::LexerError;
use crate::lexer::token::Token;
use crate::lexer::token::TokenKind;
use crate::lexer::token::TokenKind::TokenTick;
use std::fmt::Debug;

pub trait State: Debug {
Expand Down Expand Up @@ -137,7 +138,7 @@ pub struct StateWord;
impl State for StateWord {
fn visit(&self, cursor: &mut Cursor) -> Result<Transition, LexerError> {
match cursor.peek() {
Some(c) if c.is_alphanumeric() || c.eq(&'_') => Ok(Lexer::proceed(
Some(c) if c.is_alphabetic() || c.eq(&'_') => Ok(Lexer::proceed(
Box::new(StateWord),
TransitionKind::AdvanceOffset,
)),
Expand All @@ -162,14 +163,29 @@ pub struct StateSymbol;

impl StateSymbol {
fn is_symbol(c: char) -> bool {
matches!(c, ':' | '=' | '\n')
matches!(c, ':' | '=' | '\n' | '\'')
}
}

impl State for StateSymbol {
fn visit(&self, cursor: &mut Cursor) -> Result<Transition, LexerError> {
match cursor.peek() {
Some('\n') => {
let lexeme = cursor.source().content()[cursor.index()..cursor.offset()].to_string();
let token_kind = TokenKind::from(&lexeme);
// NOTE: if a '\n' is found and it was scanning another "symbol" token, the previous was mangled, and only the '\n' is emitted,
// right now we need to handle only TokenTick since can be at the end of the line, but this can be extended to other symbols
if token_kind == TokenTick {
return Ok(Lexer::proceed(
Box::new(StateStart),
TransitionKind::EmitToken(Token::new(
token_kind,
lexeme,
cursor.location().clone(),
)),
));
}

let transition = Lexer::proceed(
Box::new(StateStart),
TransitionKind::EmitToken(Token::new(
Expand Down
15 changes: 11 additions & 4 deletions src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use std::path::{Path, PathBuf};
const KEYWORD_INT: &str = "int";
const KEYWORD_FLOAT: &str = "float";
const KEYWORD_BOOL: &str = "bool";
const KEYWORD_CHAR: &str = "char";
const KEYWORD_BOOL_TRUE: &str = "true";
const KEYWORD_BOOL_FALSE: &str = "false";
const SEPARATOR_COLON: &str = ":";
Expand All @@ -15,6 +16,7 @@ pub enum Literal {
Int(i64),
Float(f64),
Bool(bool),
Char(char),
}

#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
Expand All @@ -25,6 +27,7 @@ pub enum TokenKind {
TokenNewLine, // \n
TokenColon, // :
TokenAssign, // =
TokenTick, // '
TokenEOF, // End of file
}

Expand All @@ -34,6 +37,7 @@ impl TokenKind {
KEYWORD_INT => Some(TokenKind::TokenKeyword),
KEYWORD_FLOAT => Some(TokenKind::TokenKeyword),
KEYWORD_BOOL => Some(TokenKind::TokenKeyword),
KEYWORD_CHAR => Some(TokenKind::TokenKeyword),
KEYWORD_BOOL_TRUE => Some(TokenKind::TokenLiteral(Literal::Bool(true))),
KEYWORD_BOOL_FALSE => Some(TokenKind::TokenLiteral(Literal::Bool(false))),
_ => None,
Expand All @@ -42,9 +46,10 @@ impl TokenKind {

fn match_number(lexeme: &str) -> Option<TokenKind> {
if lexeme.chars().all(char::is_numeric) {
return Some(TokenKind::TokenLiteral(Literal::Int(
lexeme.parse().unwrap(),
)));
return match lexeme.parse() {
Ok(value) => Some(TokenKind::TokenLiteral(Literal::Int(value))),
Err(_) => None,
};
}

if lexeme.contains('.') {
Expand All @@ -60,6 +65,7 @@ impl TokenKind {
match lexeme {
SEPARATOR_COLON => Some(TokenKind::TokenColon),
SEPARATOR_ASSIGN => Some(TokenKind::TokenAssign),
"'" => Some(TokenKind::TokenTick),
_ => None,
}
}
Expand All @@ -86,7 +92,6 @@ impl From<&String> for TokenKind {
TokenKind::TokenIdentifier
}
}

/// The location of a token in the source code in a uman-readable format
#[derive(Debug, Clone, Eq, PartialEq, Deserialize, Serialize)]
pub struct TokenLocation {
Expand Down Expand Up @@ -231,6 +236,7 @@ impl std::fmt::Display for Literal {
Literal::Int(value) => write!(f, "Int({})", value),
Literal::Float(value) => write!(f, "Float({})", value),
Literal::Bool(value) => write!(f, "Bool({})", value),
Literal::Char(value) => write!(f, "Char({})", value),
}
}
}
Expand All @@ -243,6 +249,7 @@ impl std::fmt::Display for TokenKind {
TokenKind::TokenNewLine => write!(f, "TokenNewLine"),
TokenKind::TokenColon => write!(f, "TokenColon"),
TokenKind::TokenAssign => write!(f, "TokenAssign"),
TokenKind::TokenTick => write!(f, "TokenTick"),
TokenKind::TokenEOF => write!(f, "TokenEOF"),
}
}
Expand Down
1 change: 1 addition & 0 deletions testdata/identifier/id_char_assign.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
my_char: char = 'a'
11 changes: 11 additions & 0 deletions testdata/identifier/id_char_assign.tokens
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[
{"kind": "TokenIdentifier","lexeme": "my_char","location": {"file_path": "","line": 0,"column_start": 0,"column_end": 7}},
{"kind": "TokenColon","lexeme": ":","location": {"file_path": "","line": 0,"column_start": 7,"column_end": 8}},
{"kind": "TokenKeyword","lexeme": "char","location": {"file_path": "","line": 0,"column_start": 9,"column_end": 13}},
{"kind": "TokenAssign","lexeme": "=","location": {"file_path": "","line": 0,"column_start": 14,"column_end": 15}},
{"kind": "TokenTick","lexeme": "'","location": {"file_path": "","line": 0,"column_start": 16,"column_end": 17}},
{"kind": "TokenIdentifier","lexeme": "a","location": {"file_path": "","line": 0,"column_start": 17,"column_end": 18}},
{"kind": "TokenTick","lexeme": "'","location": {"file_path": "","line": 0,"column_start": 18,"column_end": 19}},
{"kind": "TokenNewLine","lexeme": "\\n","location": {"file_path": "","line": 0,"column_start": 19,"column_end": 19}},
{"kind": "TokenEOF","lexeme": "","location": {"file_path": "","line": 1,"column_start": 0,"column_end": 0}}
]

0 comments on commit e0e8cda

Please sign in to comment.