Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

parser: first implementation of error recovery #17

Merged
merged 4 commits into from
Sep 1, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@ on:
- 'rustfmt.toml'
- 'config/**'
- '.github/workflows/**'
pull_request:
paths:
- 'src/**'
- 'Cargo.lock'
- 'Cargo.toml'
- 'rustfmt.toml'
- 'config/**'
- '.github/workflows/**'

env:
CARGO_TERM_COLOR: always
Expand Down
6 changes: 3 additions & 3 deletions src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,9 @@ pub mod tests {
}

#[test]
fn test_lexer_panics() {
let fs_files = collect_fs_files("./testdata/panics", true);
assert_eq!(fs_files.len(), 1);
fn test_lexer_errors() {
let fs_files = collect_fs_files("./testdata/errors", true);
assert_eq!(fs_files.len(), 2);

for path in fs_files {
info!("file -> {:?}", path);
Expand Down
22 changes: 20 additions & 2 deletions src/parser/ast.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,24 @@
use crate::{lexer::token::TokenLocation, source::Source};
use crate::{
lexer::token::{Token, TokenLocation},
source::Source,
};
use serde::{Deserialize, Serialize};

#[derive(Debug, Deserialize, Serialize, PartialEq)]
pub enum AstError {
UnexpectedToken { token: Token },
}

impl std::fmt::Display for AstError {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
AstError::UnexpectedToken { token } => {
write!(f, "Unexpected token: {}", token)
}
}
}
}

#[derive(Debug, Deserialize, Serialize, PartialEq)]
pub struct Ast {
pub source: Source,
Expand All @@ -17,7 +35,7 @@ pub enum Stmt {
Assign {
lhs: Expr,
type_: Type,
rhs: Expr,
rhs: Result<Expr, AstError>,
},
Expr(Expr),
Comment {
Expand Down
88 changes: 57 additions & 31 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::{
lexer::token::{Keyword, Literal, Token, TokenKind},
source::Source,
};
use tracing::error;
use tracing::info;

pub struct Parser<I: IntoIterator> {
Expand All @@ -15,8 +16,8 @@ impl<I: IntoIterator<Item = Token>> Parser<I> {
pub fn new(source: Source, lexer: I) -> Parser<I> {
let mut lexer = lexer.into_iter();
let source = source.clone();
info!("Created Parser");
let curr_token = lexer.next();
info!("Created Parser");
Parser {
lexer,
curr_token,
Expand All @@ -30,12 +31,12 @@ impl<I: IntoIterator<Item = Token>> Parser<I> {

pub fn parse(&mut self) -> ast::Ast {
let source = self.source.clone();
let ast::Block { stmts } = self.parse_block().unwrap();
let ast::Block { stmts } = self.parse_block();
let root = ast::Block { stmts };
ast::Ast::new(source, root)
}

fn parse_block(&mut self) -> Option<ast::Block> {
fn parse_block(&mut self) -> ast::Block {
let mut stmts = Vec::new();
loop {
match self.curr_token {
Expand All @@ -45,38 +46,35 @@ impl<I: IntoIterator<Item = Token>> Parser<I> {
}) => break,
Some(_) => {
let stmt = self.parse_stmt();
match stmt {
Some(stmt) => stmts.push(stmt),
None => break,
}
stmts.push(stmt);
}
_ => (),
}
}
Some(ast::Block {
ast::Block {
stmts: stmts.into_boxed_slice(),
})
}
}

fn parse_stmt(&mut self) -> Option<ast::Stmt> {
match self.curr_token {
fn parse_stmt(&mut self) -> ast::Stmt {
match &self.curr_token {
Some(Token {
kind: TokenKind::TokenIdentifier,
..
}) => {
let stms = self.parse_identifier_stmt();
info!("Parsed identifier - {:?}", stms);
Some(stms)
stms
}
Some(Token {
kind: TokenKind::TokenComment,
..
}) => {
let comment = self.parse_comment_stmt();
info!("Parsed comment - {:?}", comment);
Some(comment)
comment
}
_ => todo!(),
c => todo!("{:?}", c),
}
}

Expand All @@ -103,6 +101,10 @@ impl<I: IntoIterator<Item = Token>> Parser<I> {
}

fn parse_assign_stmt(&mut self, lhs: Token) -> ast::Stmt {
let lhs = ast::Expr::Identifier {
name: lhs.lexeme,
location: lhs.location,
};
let type_ = self.parse_type();
info!("Parsed type - {:?}", type_);
self.consume();
Expand All @@ -122,12 +124,9 @@ impl<I: IntoIterator<Item = Token>> Parser<I> {
}) => {
self.consume();
ast::Stmt::Assign {
lhs: ast::Expr::Identifier {
name: lhs.lexeme,
location: lhs.location,
},
lhs,
type_,
rhs,
rhs: Ok(rhs),
}
}
_ => todo!(),
Expand All @@ -136,7 +135,20 @@ impl<I: IntoIterator<Item = Token>> Parser<I> {
Some(Token {
kind: TokenKind::TokenUnknown,
..
}) => panic!("Unexpected token {:?}", self.curr_token),
}) => {
let err = ast::AstError::UnexpectedToken {
token: self.curr_token.clone().unwrap(),
};
error!("{}", err);

self.consume_until_new_statement();

ast::Stmt::Assign {
lhs,
type_,
rhs: Err(err),
}
}
_ => todo!(),
}
}
Expand Down Expand Up @@ -237,6 +249,21 @@ impl<I: IntoIterator<Item = Token>> Parser<I> {
_ => todo!(),
}
}

fn consume_until_new_statement(&mut self) {
// Consume all tokens until a newline token is found
while self.curr_token.is_some() {
if let Some(Token {
kind: TokenKind::TokenNewLine,
..
}) = self.curr_token
{
self.consume();
break;
}
self.consume();
}
}
}

#[cfg(test)]
Expand Down Expand Up @@ -287,10 +314,9 @@ pub mod tests {
}

#[test]
#[should_panic]
fn test_parser_panics() {
let fs_files = collect_fs_files("./testdata/panics", true);
assert_eq!(fs_files.len(), 1);
fn test_parser_errors() {
let fs_files = collect_fs_files("./testdata/errors", true);
assert_eq!(fs_files.len(), 2);

for path in fs_files {
info!("file -> {:?}", path);
Expand All @@ -301,14 +327,14 @@ pub mod tests {
let content = content.replace("\r\n", "\n");
let source = Source::from(content);

let _fs_file = path.to_str().unwrap();
let fs_file = path.to_str().unwrap();

let _output_ast = Parser::new(source.clone(), Lexer::new(&source)).parse();
// let ast_file = fs_file.to_string().replace(".fs", ".ast.json");
// let ast = std::fs::File::open(ast_file).unwrap();
// println!("{}", serde_json::to_string(&output_ast.root).unwrap());
// let expected_ast = serde_json::from_reader(ast).unwrap();
// assert_eq!(output_ast.root, expected_ast);
let output_ast = Parser::new(source.clone(), Lexer::new(&source)).parse();
let ast_file = fs_file.to_string().replace(".fs", ".ast.json");
let ast = std::fs::File::open(ast_file).unwrap();
println!("{}", serde_json::to_string(&output_ast.root).unwrap());
let expected_ast = serde_json::from_reader(ast).unwrap();
assert_eq!(output_ast.root, expected_ast);
}
}
}
67 changes: 67 additions & 0 deletions testdata/errors/id_int_unexpected_two_lines.ast.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
{
"stmts": [
{
"Assign": {
"lhs": {
"Identifier": {
"name": "x",
"location": {
"file_path": "",
"line": 0,
"column_start": 0,
"column_end": 1
}
}
},
"type_": "Int",
"rhs": {
"Err": {
"UnexpectedToken": {
"token": {
"kind": "TokenUnknown",
"lexeme": "~",
"location": {
"file_path": "",
"line": 0,
"column_start": 7,
"column_end": 8
}
}
}
}
}
}
},
{
"Assign": {
"lhs": {
"Identifier": {
"name": "y",
"location": {
"file_path": "",
"line": 1,
"column_start": 0,
"column_end": 1
}
}
},
"type_": "Int",
"rhs": {
"Ok": {
"Literal": {
"literal": {
"Int": 100
},
"location": {
"file_path": "",
"line": 1,
"column_start": 9,
"column_end": 12
}
}
}
}
}
}
]
}
2 changes: 2 additions & 0 deletions testdata/errors/id_int_unexpected_two_lines.fs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
x: int ~ = 0
y: int = 100
Loading