diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index f6cd4e0..8d1d4fb 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -25,6 +25,10 @@ impl Lexer { lexer } + pub fn cursor(&self) -> &Cursor { + &self.cursor + } + fn proceed(state: Box, transition_kind: TransitionKind) -> Transition { Transition::new(state, transition_kind) } diff --git a/src/lexer/states.rs b/src/lexer/states.rs index 678a991..d8f31a7 100644 --- a/src/lexer/states.rs +++ b/src/lexer/states.rs @@ -268,14 +268,11 @@ impl State for StateSymbol { TransitionKind::EmitToken(Token::new(token_kind, lexeme, location)), )) } - Some(c) => Ok(Lexer::proceed( - Box::new(StateStart), - TransitionKind::EmitToken(Token::new( - TokenKind::TokenUnknown, - c.to_string(), - cursor.location().clone(), - )), - )), + Some(c) => Err(LexerError::UnexpectedToken(Token::new( + TokenKind::TokenUnknown, + c.to_string(), + cursor.location().clone(), + ))), None => Ok(Lexer::proceed(Box::new(StateEOF), TransitionKind::Consume)), } } diff --git a/src/lexer/token.rs b/src/lexer/token.rs index 4506e46..1bc425e 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -1,4 +1,3 @@ -use crate::utils::color; use serde::{Deserialize, Serialize}; use std::path::{Path, PathBuf}; @@ -320,7 +319,7 @@ impl From<&PathBuf> for TokenLocation { #[derive(Clone, Debug, PartialEq, serde::Deserialize, serde::Serialize)] pub struct Token { /// The kind of the token - kind: TokenKind, + pub kind: TokenKind, /// The lexeme is the string representation of the token /// /// For example: @@ -435,13 +434,14 @@ impl std::fmt::Display for TokenLocation { impl std::fmt::Display for Token { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + // use crate::utils::color; write!( f, - "{} {{ {}, \"{}\", {} }}", - color::cyan("Token"), - color::yellow(&format!("{}", self.kind)), - color::magenta(&self.lexeme), - color::blue(&format!("{}", self.location)) + "Token {{ {}, \"{}\", {} }}", + // color::cyan("Token"), + self.kind, // color::yellow(&format!("{}", self.kind)), + self.lexeme, // color::magenta(&self.lexeme), + self.location, // color::blue(&format!("{}", self.location)) ) } } diff --git a/src/main.rs b/src/main.rs index 601986d..97596b8 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,12 @@ pub mod lexer; pub mod logger; +pub mod parser; pub mod source; pub mod utils; -use lexer::{token::Token, Lexer}; +use lexer::Lexer; use logger::Logger; +use parser::Parser; use source::Source; use std::{env, path::PathBuf}; @@ -29,7 +31,14 @@ fn main() { let file_path: &str = &args[0]; let source = Source::new(file_path); - let mut lexer = Lexer::new(&source); - let _tokens = (&mut lexer).collect::>(); - lexer.emit_errors(); + let lexer = Lexer::new(&source); + // let _tokens = (&mut lexer).collect::>(); + // if lexer.errors().is_empty() { + // println!("No errors found"); + // } else { + // lexer.emit_errors(); + // } + let mut parser = Parser::new(lexer); + // let _ast = (&mut parser).collect::>(); + println!("{}", parser.parse()); } diff --git a/src/parser/ast.rs b/src/parser/ast.rs new file mode 100644 index 0000000..d95c796 --- /dev/null +++ b/src/parser/ast.rs @@ -0,0 +1,119 @@ +use crate::source::Source; + +pub struct Ast { + pub source: Source, + pub root: Block, +} + +#[derive(Debug)] +pub struct Block { + pub stmts: Box<[Stmt]>, +} + +#[derive(Debug)] +pub enum Stmt { + Assign { + ident: Identifier, + type_: Type, + expr: Expr, + }, + Expr(Expr), +} + +#[derive(Debug)] +pub enum Type { + Int, + Float, + Bool, + Str, +} + +#[derive(Debug)] +pub enum Expr { + Literal(Literal), + Identifier(Identifier), +} + +#[derive(Debug)] +pub enum Literal { + Int(i64), + Float(f64), + Bool(bool), + Str(String), +} + +#[derive(Debug)] +pub struct Identifier { + pub name: String, +} + +impl Ast { + pub fn new(source: Source, root: Block) -> Ast { + Ast { source, root } + } +} + +impl std::fmt::Display for Ast { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "Block [{}]", self.root) + } +} + +impl std::fmt::Display for Block { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + for stmt in self.stmts.iter() { + write!(f, " {} ", stmt)?; + } + Ok(()) + } +} + +impl std::fmt::Display for Stmt { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Stmt::Assign { ident, type_, expr } => write!( + f, + "Stmt::Assign {{ ident: {}, type: {}, expr: {} }}", + ident, type_, expr + ), + Stmt::Expr(expr) => write!(f, "Stmt::Expr({})", expr), + } + } +} + +impl std::fmt::Display for Type { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Type::Int => write!(f, "int"), + Type::Float => write!(f, "float"), + Type::Bool => write!(f, "bool"), + Type::Str => write!(f, "str"), + } + } +} + +impl std::fmt::Display for Expr { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Expr::Literal(literal) => write!(f, "{}", literal), + Expr::Identifier(ident) => write!(f, "{}", ident), + } + } +} + +impl std::fmt::Display for Literal { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Literal::Int(int) => write!(f, "{}", int), + Literal::Float(float) => write!(f, "{}", float), + Literal::Bool(bool) => write!(f, "{}", bool), + Literal::Str(string) => write!(f, "{}", string), + } + } +} + +impl std::fmt::Display for Identifier { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.name) + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..9c3a78f --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,206 @@ +pub mod ast; + +use crate::{ + lexer::{ + token::{Keyword, Literal, Token, TokenKind}, + Lexer, + }, + source::Source, +}; +use std::iter::Peekable; +use tracing::info; + +pub struct Parser { + lexer: Peekable, + curr_token: Option, + source: Source, +} + +impl> Parser { + pub fn new(lexer: I) -> Parser { + let mut lexer: Lexer = lexer.into_iter(); + let source = lexer.cursor().source().clone(); + info!("Created Parser"); + let curr_token = lexer.next(); + Parser { + lexer: lexer.peekable(), + curr_token, + source, + } + } + + fn consume(&mut self) { + self.curr_token = self.lexer.next(); + } + + fn consume_until(&mut self, kinds: &[TokenKind]) { + while let Some(token) = self.curr_token.clone() { + if kinds.contains(&token.kind) { + self.consume(); + } else { + break; + } + } + } + + // The skippable tokens are space and tab for now + // TODO: Add continuation character `\` as skippable + fn skip_skippable(&mut self) { + self.consume_until(&[TokenKind::TokenSpace, TokenKind::TokenTab]); + } + + pub fn parse(&mut self) -> ast::Ast { + let source = self.source.clone(); + let ast::Block { stmts } = self.parse_block().unwrap(); + let root = ast::Block { stmts }; + ast::Ast::new(source, root) + } + + fn parse_block(&mut self) -> Option { + let mut stmts = Vec::new(); + let stmt = self.parse_stmt(); + match stmt { + Some(stmt) => stmts.push(stmt), + None => return None, + } + Some(ast::Block { + stmts: stmts.into_boxed_slice(), + }) + } + + fn parse_stmt(&mut self) -> Option { + match self.curr_token { + Some(Token { + kind: TokenKind::TokenIdentifier, + .. + }) => { + let stms = self.parse_identifier_stmt(); + info!("Parsed identifier - {}", stms); + Some(stms) + } + Some(Token { + kind: TokenKind::TokenEOF, + .. + }) => None, + _ => todo!(), + } + } + + fn parse_identifier_stmt(&mut self) -> ast::Stmt { + let token = self.curr_token.clone().unwrap(); // Safe to unwrap because we checked for Some + // in parse_stmt + + self.consume(); + self.skip_skippable(); + + match self.curr_token { + Some(Token { + kind: TokenKind::TokenColon, + .. + }) => { + self.consume(); + self.skip_skippable(); + match self.curr_token { + Some(Token { + kind: TokenKind::TokenKeyword(_), + .. + }) => { + let type_ = self.parse_type(); + self.consume(); + self.skip_skippable(); + match self.curr_token { + Some(Token { + kind: TokenKind::TokenAssign, + .. + }) => { + self.consume(); + self.skip_skippable(); + let expr = self.parse_expr(); + self.consume(); + self.skip_skippable(); + match self.curr_token { + Some(Token { + kind: TokenKind::TokenNewLine, + .. + }) => { + self.consume(); + self.skip_skippable(); + ast::Stmt::Assign { + ident: ast::Identifier { + name: token.lexeme().to_string(), + }, + type_, + expr, + } + } + _ => todo!(), + } + } + _ => todo!(), + } + } + _ => todo!(), // Match `(` and parse a function + } + } + _ => todo!(), + } + } + + fn parse_type(&mut self) -> ast::Type { + match &self.curr_token { + Some(Token { + kind: TokenKind::TokenKeyword(keyword), + .. + }) => match keyword { + Keyword::IntType => ast::Type::Int, + Keyword::FloatType => ast::Type::Float, + Keyword::BoolType => ast::Type::Bool, + Keyword::StrType => ast::Type::Str, + _ => todo!(), // Error of invalid type + }, + _ => todo!(), // Error of unexpected token + } + } + + fn parse_expr(&mut self) -> ast::Expr { + match self.curr_token { + Some(Token { + kind: TokenKind::TokenLiteral(_), + .. + }) => self.parse_literal_expr(), + // Some(Token { + // kind: TokenKind::TokenIdentifier, + // .. + // }) => self.parse_identifier_expr(), + _ => todo!(), + } + } + + fn parse_literal_expr(&mut self) -> ast::Expr { + let token = self.curr_token.clone().unwrap(); // Safe to unwrap + match &self.curr_token { + Some(Token { + kind: TokenKind::TokenLiteral(literal), + .. + }) => match literal { + Literal::Int => { + let int = match token.lexeme().parse::() { + Ok(int) => int, + Err(_) => todo!(), // Error of invalid integer + }; + ast::Expr::Literal(ast::Literal::Int(int)) + } + _ => todo!(), + }, + _ => todo!(), + } + } +} + +impl Iterator for Parser { + type Item = ast::Block; + + fn next(&mut self) -> Option { + self.parse_block() + } +}