Skip to content

Commit

Permalink
lex: add test for unexpected token and update cursor
Browse files Browse the repository at this point in the history
Signed-off-by: FedericoBruzzone <[email protected]>
  • Loading branch information
FedericoBruzzone committed Aug 6, 2024
1 parent 3d38a40 commit 335f375
Show file tree
Hide file tree
Showing 5 changed files with 224 additions and 40 deletions.
6 changes: 5 additions & 1 deletion src/lexer/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ impl Cursor {
&self.location
}

pub fn mut_location(&mut self) -> &mut TokenLocation {
&mut self.location
}

pub fn is_eof(&self) -> bool {
self.index >= self.source.content().len()
}
Expand Down Expand Up @@ -100,7 +104,7 @@ impl From<&Source> for Cursor {
fn from(source: &Source) -> Cursor {
Cursor {
source: source.clone(),
location: TokenLocation::new(source.file_path()),
location: TokenLocation::from(source.file_path()),
index: 0,
}
}
Expand Down
219 changes: 189 additions & 30 deletions src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,43 @@ pub mod cursor;
pub mod states;
pub mod token;

use crate::lexer::token::Token;
use crate::source::Source;
use crate::{lexer::token::Token, utils::color};
use cursor::Cursor;
use states::{State, StateStart, Transition, TransitionKind};
use tracing::{error, info};

pub struct Lexer {
cursor: Cursor,
state: Box<dyn State>,
errors: Vec<LexerError>,
}

impl Lexer {
pub fn new(source: &Source) -> Lexer {
let lexer = Lexer {
cursor: Cursor::from(source),
state: Box::new(StateStart),
errors: Vec::new(),
};
info!("Created Lexer");
lexer
}

pub fn proceed(state: Box<dyn State>, transition_kind: TransitionKind) -> Transition {
fn proceed(state: Box<dyn State>, transition_kind: TransitionKind) -> Transition {
Transition::new(state, transition_kind)
}

pub fn errors(&self) -> &Vec<LexerError> {
&self.errors
}

pub fn emit_errors(&self) {
eprintln!("{}", color::red("Errors:"));
for error in &self.errors {
eprintln!(" {}", error);
}
}
}

impl Iterator for Lexer {
Expand All @@ -35,13 +48,12 @@ impl Iterator for Lexer {
loop {
let transition = match self.state.visit(&mut self.cursor) {
Ok(transition) => transition,
Err(err) =>
// TODO: return a transition to continue lexing (for error recovery)
{
Err(err) => {
self.errors.push(err.clone());
match err {
LexerError::UnexpectedToken(token) => {
error!("Unexpected token: {}", token);
eprintln!("Unexpected token: {}", token);
// TODO: return a transition to continue lexing (for error recovery)
return None;
}
}
Expand All @@ -62,11 +74,19 @@ impl Iterator for Lexer {
}
}

#[derive(Debug)]
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum LexerError {
UnexpectedToken(Token),
}

impl std::fmt::Display for LexerError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
LexerError::UnexpectedToken(token) => write!(f, "Unexpected token: {}", token),
}
}
}

#[cfg(test)]
mod tests {
use token::TokenLocation;
Expand All @@ -75,13 +95,152 @@ mod tests {
use crate::lexer::token::TokenKind;
use crate::source::Source;
use crate::utils::file_handler::{create_tmp_file, remove_tmp_file};
use std::path::Path;
use std::path::PathBuf;

#[test]
// TODO: The column_end should equal to column_start + 1 but this is fine for now. We return
// only the position (in this case (18, 18)) where the first unexpected character is found.
fn test_lexer_unexpected_token() {
let file_path = "test_lex_unexpected_token.tmp";
let file_content = " _x_int: int £ = 0 ";
create_tmp_file(file_path, file_content);

let source = Source::new(file_path);
let mut lexer = Lexer::new(&source);
let tokens = (&mut lexer).collect::<Vec<_>>();
let errors = lexer.errors();
assert_eq!(tokens.len(), 3);
assert_eq!(
(&tokens[0].kind, &tokens[1].kind, &tokens[2].kind,),
(
&TokenKind::TokenIdentifier,
&TokenKind::TokenColon,
&TokenKind::TokenKeyword,
)
);
assert_eq!(
(&tokens[0].lexeme, &tokens[1].lexeme, &tokens[2].lexeme,),
(&"_x_int".to_string(), &":".to_string(), &"int".to_string())
);
assert_eq!(
(
&tokens[0].location,
&tokens[1].location,
&tokens[2].location,
),
(
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(4)
.with_column_end(10),
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(10)
.with_column_end(11),
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(14)
.with_column_end(17),
)
);
assert_eq!(errors.len(), 1);
assert_eq!(
&errors[0],
&LexerError::UnexpectedToken(Token::new(
TokenKind::TokenIdentifier,
"£".to_string(),
TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(18)
.with_column_end(18)
))
);

remove_tmp_file(file_path);
}

#[test]
fn test_lexer_identifier_with_length_one() {
let file_path = "test_lexer_identifier_with_length_one.tmp";
let file_content = "i: int = 1";
create_tmp_file(file_path, file_content);
let source = Source::new(file_path);
let lexer = Lexer::new(&source);
let tokens = lexer.collect::<Vec<_>>();
assert_eq!(tokens.len(), 6);
assert_eq!(
(
&tokens[0].kind,
&tokens[1].kind,
&tokens[2].kind,
&tokens[3].kind,
&tokens[4].kind,
&tokens[5].kind
),
(
&TokenKind::TokenIdentifier,
&TokenKind::TokenColon,
&TokenKind::TokenKeyword,
&TokenKind::TokenAssign,
&TokenKind::TokenLiteral(token::Literal::Int(1)),
&TokenKind::TokenEOF
)
);
assert_eq!(
(
&tokens[0].lexeme,
&tokens[1].lexeme,
&tokens[2].lexeme,
&tokens[3].lexeme,
&tokens[4].lexeme,
&tokens[5].lexeme
),
(
&"i".to_string(),
&":".to_string(),
&"int".to_string(),
&"=".to_string(),
&"1".to_string(),
&"".to_string()
)
);
assert_eq!(
(
&tokens[0].location,
&tokens[1].location,
&tokens[2].location,
&tokens[3].location,
&tokens[4].location,
&tokens[5].location
),
(
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(0)
.with_column_end(1),
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(1)
.with_column_end(2),
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(3)
.with_column_end(6),
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(7)
.with_column_end(8),
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(9)
.with_column_end(10),
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(10)
.with_column_end(10)
)
);
remove_tmp_file(file_path);
}

#[test]
Expand Down Expand Up @@ -139,27 +298,27 @@ mod tests {
&tokens[5].location
),
(
&TokenLocation::new(&Path::new(file_path))
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(4)
.with_column_end(10),
&TokenLocation::new(&Path::new(file_path))
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(10)
.with_column_end(11),
&TokenLocation::new(&Path::new(file_path))
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(14)
.with_column_end(17),
&TokenLocation::new(&Path::new(file_path))
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(19)
.with_column_end(20),
&TokenLocation::new(&Path::new(file_path))
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(22)
.with_column_end(23),
&TokenLocation::new(&Path::new(file_path))
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(26)
.with_column_end(26)
Expand Down Expand Up @@ -223,27 +382,27 @@ mod tests {
&tokens[5].location
),
(
&TokenLocation::new(&Path::new(file_path))
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(0)
.with_column_end(6),
&TokenLocation::new(&Path::new(file_path))
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(6)
.with_column_end(7),
&TokenLocation::new(&Path::new(file_path))
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(8)
.with_column_end(11),
&TokenLocation::new(&Path::new(file_path))
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(12)
.with_column_end(13),
&TokenLocation::new(&Path::new(file_path))
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(14)
.with_column_end(15),
&TokenLocation::new(&Path::new(file_path))
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(15)
.with_column_end(15)
Expand Down Expand Up @@ -272,11 +431,11 @@ mod tests {
assert_eq!(
(&tokens[0].location, &tokens[1].location),
(
&TokenLocation::new(&Path::new(file_path))
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(0)
.with_column_end(7),
&TokenLocation::new(&Path::new(file_path))
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(7)
.with_column_end(7)
Expand Down Expand Up @@ -305,11 +464,11 @@ mod tests {
assert_eq!(
(&tokens[0].location, &tokens[1].location),
(
&TokenLocation::new(&Path::new(file_path))
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(9)
.with_column_end(18),
&TokenLocation::new(&Path::new(file_path))
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(18)
.with_column_end(18)
Expand Down Expand Up @@ -338,11 +497,11 @@ mod tests {
assert_eq!(
(&tokens[0].location, &tokens[1].location),
(
&TokenLocation::new(&Path::new(file_path))
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(0)
.with_column_end(9),
&TokenLocation::new(&Path::new(file_path))
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(18)
.with_column_end(18)
Expand Down Expand Up @@ -371,11 +530,11 @@ mod tests {
assert_eq!(
(&tokens[0].location, &tokens[1].location),
(
&TokenLocation::new(&Path::new(file_path))
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(9)
.with_column_end(18),
&TokenLocation::new(&Path::new(file_path))
&TokenLocation::from(&PathBuf::from(file_path))
.with_line(0)
.with_column_start(27)
.with_column_end(27)
Expand Down Expand Up @@ -456,11 +615,11 @@ mod tests {
assert_eq!(
(&tokens[0].location, &tokens[1].location),
(
&TokenLocation::new(&Path::new(""))
&TokenLocation::from(&PathBuf::from(""))
.with_line(0)
.with_column_start(0)
.with_column_end(7),
&TokenLocation::new(&Path::new(""))
&TokenLocation::from(&PathBuf::from(""))
.with_line(0)
.with_column_start(7)
.with_column_end(7)
Expand Down
Loading

0 comments on commit 335f375

Please sign in to comment.