Skip to content

Commit

Permalink
lex: small refactor using Result to handle lexer error
Browse files Browse the repository at this point in the history
Signed-off-by: FedericoBruzzone <[email protected]>
  • Loading branch information
FedericoBruzzone committed Aug 6, 2024
1 parent 55297cf commit f6f85b2
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 67 deletions.
2 changes: 1 addition & 1 deletion examples/first.fs
Original file line number Diff line number Diff line change
@@ -1 +1 @@
_x_int: )int = 0
_x_int: int) = 0
42 changes: 29 additions & 13 deletions src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ impl Lexer {
lexer
}

pub fn proceed(state: Box<dyn State>, consume_kind: TransitionKind) -> Transition {
Transition::new(state, consume_kind)
pub fn proceed(state: Box<dyn State>, transition_kind: TransitionKind) -> Transition {
Transition::new(state, transition_kind)
}
}

Expand All @@ -33,25 +33,34 @@ impl Iterator for Lexer {

fn next(&mut self) -> Option<Self::Item> {
loop {
let transition = self.state.visit(&mut self.cursor);
let transition = match self.state.visit(&mut self.cursor) {
Ok(transition) => transition,
Err(err) => match err {
LexerError::UnexpectedToken(token) => {
error!("Unexpected token: {}", token);
eprintln!("Unexpected token: {}", token);
return None;
}
},
};
self.state = transition.state;
transition.consume_kind.apply(&mut self.cursor);
if let TransitionKind::EmitToken(token) = transition.consume_kind {
transition.transition_kind.apply(&mut self.cursor);
if let TransitionKind::EmitToken(token) = transition.transition_kind {
info!("Emitting token - {}", token);
return Some(token);
}
if let TransitionKind::ErrorToken(token) = transition.consume_kind {
error!("Unexpected token: {}", token);
eprintln!("Unexpected token: {}", token);
return Some(token);
}
if let TransitionKind::End = transition.consume_kind {
if let TransitionKind::End = transition.transition_kind {
return None;
}
}
}
}

#[derive(Debug)]
pub enum LexerError {
UnexpectedToken(Token),
}

#[cfg(test)]
mod tests {
use token::TokenLocation;
Expand All @@ -62,9 +71,16 @@ mod tests {
use crate::utils::file_handler::{create_tmp_file, remove_tmp_file};
use std::path::Path;

#[test]
fn test_lexer_unexpected_token() {
let file_path = "test_lex_unexpected_token.tmp";
let file_content = " _x_int: int £ = 0 ";
create_tmp_file(file_path, file_content);
}

#[test]
fn test_lexer_tokenize_var_int_with_spaces() {
let file_path = "test_var_int_with_spaces.tmp";
let file_path = "test_lexer_var_int_with_spaces.tmp";
let file_content = " _x_int: int = 0 ";
create_tmp_file(file_path, file_content);
let source = Source::new(file_path);
Expand Down Expand Up @@ -232,7 +248,7 @@ mod tests {

#[test]
fn test_lexer_token_identifier_file() {
let file_path = "test_token_identifier.tmp";
let file_path = "test_lexer_token_identifier.tmp";
let file_content = "test_id";
create_tmp_file(file_path, file_content);
let source = Source::new(file_path);
Expand Down
106 changes: 53 additions & 53 deletions src/lexer/states.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
use super::cursor::Cursor;
use super::token::Literal;
use super::Lexer;
use super::LexerError;
use crate::lexer::token::Token;
use crate::lexer::token::TokenKind;
use std::fmt::Debug;

pub trait State: Debug {
fn visit(&self, cursor: &mut Cursor) -> Transition;
fn visit(&self, cursor: &mut Cursor) -> Result<Transition, LexerError>;
}

#[derive(Debug)]
pub enum TransitionKind {
Consume,
Advance,
EmitToken(Token),
ErrorToken(Token),
End,
}

Expand All @@ -28,7 +28,6 @@ impl TransitionKind {
cursor.advance();
}
TransitionKind::EmitToken(_) => cursor.align(),
TransitionKind::ErrorToken(_) => cursor.align(),
TransitionKind::End => {}
}
}
Expand All @@ -38,14 +37,14 @@ impl TransitionKind {
// TODO: Remove pub from fields
pub struct Transition {
pub state: Box<dyn State>,
pub consume_kind: TransitionKind,
pub transition_kind: TransitionKind,
}

impl Transition {
pub fn new(state: Box<dyn State>, consume_kind: TransitionKind) -> Transition {
Transition {
state,
consume_kind,
transition_kind: consume_kind,
}
}
}
Expand All @@ -54,30 +53,29 @@ impl Transition {
pub struct StateStart;

impl State for StateStart {
fn visit(&self, cursor: &mut Cursor) -> Transition {
fn visit(&self, cursor: &mut Cursor) -> Result<Transition, LexerError> {
match cursor.peek() {
Some(c) if c.is_whitespace() => {
Lexer::proceed(Box::new(StateStart), TransitionKind::Consume)
}
Some(c) if c.is_ascii_digit() => {
Lexer::proceed(Box::new(StateNumber), TransitionKind::Advance)
}
Some(c) if c.is_whitespace() => Ok(Lexer::proceed(
Box::new(StateStart),
TransitionKind::Consume,
)),
Some(c) if c.is_ascii_digit() => Ok(Lexer::proceed(
Box::new(StateNumber),
TransitionKind::Advance,
)),
Some(c) if c.is_alphabetic() || c.eq(&'_') => {
Lexer::proceed(Box::new(StateWord), TransitionKind::Advance)
Ok(Lexer::proceed(Box::new(StateWord), TransitionKind::Advance))
}
Some(c) if StateSymbol::is_symbol(c) => {
Lexer::proceed(Box::new(StateSymbol), TransitionKind::Advance)
}
Some(c) => Lexer::proceed(
Box::new(StateEnd), // TODO: Consider to return to the StartState to continue the
// lexing
TransitionKind::ErrorToken(Token::new(
TokenKind::from(&c.to_string()),
c.to_string(),
cursor.location().clone(),
)),
),
None => Lexer::proceed(Box::new(StateEOF), TransitionKind::Consume),
Some(c) if StateSymbol::is_symbol(c) => Ok(Lexer::proceed(
Box::new(StateSymbol),
TransitionKind::Advance,
)),
Some(c) => Err(LexerError::UnexpectedToken(Token::new(
TokenKind::from(&c.to_string()),
c.to_string(),
cursor.location().clone(),
))),
None => Ok(Lexer::proceed(Box::new(StateEOF), TransitionKind::Consume)),
}
}
}
Expand All @@ -86,24 +84,25 @@ impl State for StateStart {
pub struct StateNumber;

impl State for StateNumber {
fn visit(&self, cursor: &mut Cursor) -> Transition {
fn visit(&self, cursor: &mut Cursor) -> Result<Transition, LexerError> {
match cursor.peek() {
Some(c) if c.is_ascii_digit() => {
Lexer::proceed(Box::new(StateNumber), TransitionKind::Advance)
}
Some(c) if c.is_ascii_digit() => Ok(Lexer::proceed(
Box::new(StateNumber),
TransitionKind::Advance,
)),
_ => {
let lexeme = cursor.source().content()
[cursor.location().column_start()..cursor.location().column_end()]
.to_string();
let location = cursor.location().clone();
Transition {
Ok(Transition {
state: Box::new(StateStart),
consume_kind: TransitionKind::EmitToken(Token::new(
transition_kind: TransitionKind::EmitToken(Token::new(
TokenKind::TokenLiteral(Literal::Int(lexeme.parse().unwrap())),
lexeme,
location,
)),
}
})
}
}
}
Expand All @@ -113,10 +112,10 @@ impl State for StateNumber {
pub struct StateWord;

impl State for StateWord {
fn visit(&self, cursor: &mut Cursor) -> Transition {
fn visit(&self, cursor: &mut Cursor) -> Result<Transition, LexerError> {
match cursor.peek() {
Some(c) if c.is_alphanumeric() || c.eq(&'_') => {
Lexer::proceed(Box::new(StateWord), TransitionKind::Advance)
Ok(Lexer::proceed(Box::new(StateWord), TransitionKind::Advance))
}
_ => {
// Emit token when we encounter a non-alphabetic character
Expand All @@ -125,12 +124,12 @@ impl State for StateWord {
.to_string();
let token_kind = TokenKind::from(&lexeme);
let location = cursor.location().clone();
Transition {
Ok(Transition {
state: Box::new(StateStart),
consume_kind: TransitionKind::EmitToken(Token::new(
transition_kind: TransitionKind::EmitToken(Token::new(
token_kind, lexeme, location,
)),
}
})
}
}
}
Expand All @@ -146,23 +145,24 @@ impl StateSymbol {
}

impl State for StateSymbol {
fn visit(&self, cursor: &mut Cursor) -> Transition {
fn visit(&self, cursor: &mut Cursor) -> Result<Transition, LexerError> {
match cursor.peek() {
Some(c) if StateSymbol::is_symbol(c) => {
Lexer::proceed(Box::new(StateSymbol), TransitionKind::Advance)
}
Some(c) if StateSymbol::is_symbol(c) => Ok(Lexer::proceed(
Box::new(StateSymbol),
TransitionKind::Advance,
)),
_ => {
let lexeme = cursor.source().content()
[cursor.location().column_start()..cursor.location().column_end()]
.to_string();
let token_kind = TokenKind::from(&lexeme);
let location = cursor.location().clone();
Transition {
Ok(Transition {
state: Box::new(StateStart),
consume_kind: TransitionKind::EmitToken(Token::new(
transition_kind: TransitionKind::EmitToken(Token::new(
token_kind, lexeme, location,
)),
}
})
}
}
}
Expand All @@ -172,27 +172,27 @@ impl State for StateSymbol {
pub struct StateEOF;

impl State for StateEOF {
fn visit(&self, cursor: &mut Cursor) -> Transition {
fn visit(&self, cursor: &mut Cursor) -> Result<Transition, LexerError> {
cursor.align();
Transition {
Ok(Transition {
state: Box::new(StateEnd),
consume_kind: TransitionKind::EmitToken(Token::new(
transition_kind: TransitionKind::EmitToken(Token::new(
TokenKind::TokenEOF,
"".to_string(),
cursor.location().clone(),
)),
}
})
}
}

#[derive(Debug)]
pub struct StateEnd;

impl State for StateEnd {
fn visit(&self, _cursor: &mut Cursor) -> Transition {
Transition {
fn visit(&self, _cursor: &mut Cursor) -> Result<Transition, LexerError> {
Ok(Transition {
state: Box::new(StateEnd),
consume_kind: TransitionKind::End,
}
transition_kind: TransitionKind::End,
})
}
}

0 comments on commit f6f85b2

Please sign in to comment.