Skip to content

Commit

Permalink
lex: ignore \r in a separate case
Browse files Browse the repository at this point in the history
Signed-off-by: FedericoBruzzone <[email protected]>
  • Loading branch information
FedericoBruzzone committed Aug 12, 2024
1 parent 3bdff3e commit d12e29c
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 35 deletions.
10 changes: 5 additions & 5 deletions dev_doc/syntax_definition.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ x_char: char = '👾' # unicode
x_list: [int] = [1, 2, 3]
x_tuple: (int, str) = (1, "hello")
x_option: option<int> = Just(1) # Nil
x_f1: () -> unit = () -> print "hello" ;
x_f2: () -> int = () -> 1 ;
x_f3: (T) -> T = (x) -> x ; # Generic
x_f1: () -> unit = () -> print "hello"
x_f2: () -> int = () -> 1
x_f3: (T) -> T = (x) -> x # Generic

# Without type annotation (type inference)
x_int = 1
Expand Down Expand Up @@ -69,7 +69,7 @@ match_variant: MyVariant -> str = (v) ->
match v
| First => "first"
| Second => "second"
| Third(n) => "third"
| Third(_) => "third"
;
;

Expand Down Expand Up @@ -109,7 +109,7 @@ f3 = (a, b) ->
;

f4 = (a, b) ->
f_inner = (..) -> # `..` thakes all arguments of the parent function
f_inner = (..) -> # `..` inherits all arguments of the parent function
if gt a b then
a_square \ # `\` is the line continuation character
= a * a
Expand Down
54 changes: 41 additions & 13 deletions src/lexer/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,16 @@ impl Cursor {
/// Before consume:
///
/// test
/// ^_____ index
/// ^_____ offset
/// ^_____ column_start
/// ^_____ column_end
///
/// After two consume:
///
/// test
/// ^_____ index
/// ^_____ offset
/// ^_____ column_start
/// ^_____ column_end
/// ```
Expand All @@ -65,19 +69,23 @@ impl Cursor {
/// Advances the cursor without consuming the current character
///
/// ```text
/// Before advance:
/// Before advance offset:
///
/// test
/// ^_____ column_start
/// ^_____ column_end
/// ^_____ index = 0
/// ^_____ offset = 0
/// ^_____ column_start = 0
/// ^_____ column_end = 0
///
/// After two advance:
/// After advance:
///
/// test
/// ^_______ column_start
/// ^_____ column_end
/// ^_____ index = 0
/// ^_____ offset = 1
/// ^_______ column_start = 0
/// ^_____ column_end = 1
/// ```
pub fn advance(&mut self) {
pub fn advance_offset(&mut self) {
if self.is_eof() {
return;
}
Expand All @@ -92,20 +100,40 @@ impl Cursor {
/// Before align:
///
/// test
/// ^_________ column_start
/// ^_____ column_end
/// ^_________ column_start = 0
/// ^_____ column_end = 3
///
/// After align:
///
/// test
/// ^_____ column_start
/// ^_____ column_end
/// ^_____ column_start = 3
/// ^_____ column_end = 3
/// ```
pub fn align(&mut self) {
self.location.set_column_start(self.location.column_end());
self.index = self.offset;
}

/// Advances only the cursor indexes
pub fn remove_carriage_return(&mut self) {
self.source.content_mut().remove(self.offset - 1);
}

/// Advances the cursor to the next line
/// ```text
/// Before new line:
/// test\ntest2
/// ^_____ index = 4
/// ^_____ offset = 4
/// ^_____ column_start = 4
/// ^_____ column_end = 4
///
/// After new line:
/// test\ntest2
/// ^_____ index = 5
/// ^_____ offset = 5
/// ^_____ column_start = 0
/// ^_____ column_end = 0
pub fn new_line(&mut self) {
if self.is_eof() {
return;
Expand Down Expand Up @@ -152,7 +180,7 @@ mod tests {
fn test_lexer_cursor_advance() {
let source = Source::from("test_id".to_string());
let mut cursor = Cursor::from(&source);
cursor.advance();
cursor.advance_offset();
assert_eq!(cursor.location().column_start(), 0);
assert_eq!(cursor.location().column_end(), 1);
}
Expand All @@ -161,7 +189,7 @@ mod tests {
fn test_lexer_cursor_align() {
let source = Source::from("test_id".to_string());
let mut cursor = Cursor::from(&source);
cursor.advance();
cursor.advance_offset();
cursor.align();
assert_eq!(cursor.location().column_start(), 1);
assert_eq!(cursor.location().column_end(), 1);
Expand Down
4 changes: 2 additions & 2 deletions src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ mod tests {

#[test]
fn identifier() {
let fs_files = collect_fs_files("./testdata/identifier", false);
let fs_files = collect_fs_files("./testdata/identifier", true);
assert_eq!(fs_files.len(), 12);

for path in fs_files {
Expand Down Expand Up @@ -166,7 +166,7 @@ mod tests {
fn test_lexer_transition_apply_advance() {
let source = Source::from("test_id".to_string());
let mut cursor = Cursor::from(&source);
let transition_kind = TransitionKind::Advance;
let transition_kind = TransitionKind::AdvanceOffset;
transition_kind.apply(&mut cursor);
assert_eq!(cursor.location().column_start(), 0);
assert_eq!(cursor.location().column_end(), 1);
Expand Down
32 changes: 19 additions & 13 deletions src/lexer/states.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub trait State: Debug {
#[derive(Debug)]
pub enum TransitionKind {
Consume,
Advance,
AdvanceOffset,
Empty, // Keep cursors in the same position
EmitToken(Token),
End,
Expand All @@ -25,8 +25,8 @@ impl TransitionKind {
TransitionKind::Consume => {
cursor.consume();
}
TransitionKind::Advance => {
cursor.advance();
TransitionKind::AdvanceOffset => {
cursor.advance_offset();
}
TransitionKind::Empty => {}
TransitionKind::EmitToken(_) => cursor.align(),
Expand Down Expand Up @@ -60,21 +60,26 @@ pub struct StateStart;
impl State for StateStart {
fn visit(&self, cursor: &mut Cursor) -> Result<Transition, LexerError> {
match cursor.peek() {
Some(c) if c.eq(&' ') || c.eq(&'\t') || c.eq(&'\r') => Ok(Lexer::proceed(
Some(c) if c.eq(&' ') || c.eq(&'\t') => Ok(Lexer::proceed(
Box::new(StateStart),
TransitionKind::Consume,
)),
Some(c) if c.eq(&'\r') => {
cursor.remove_carriage_return();
Ok(Lexer::proceed(Box::new(StateStart), TransitionKind::Empty))
}
Some('#') => Ok(Lexer::proceed(
Box::new(StateComment),
TransitionKind::Consume,
)),
Some(c) if c.is_ascii_digit() => Ok(Lexer::proceed(
Box::new(StateNumber),
TransitionKind::Advance,
TransitionKind::AdvanceOffset,
)),
Some(c) if c.is_alphabetic() || c.eq(&'_') => Ok(Lexer::proceed(
Box::new(StateWord),
TransitionKind::AdvanceOffset,
)),
Some(c) if c.is_alphabetic() || c.eq(&'_') => {
Ok(Lexer::proceed(Box::new(StateWord), TransitionKind::Advance))
}
Some(c) if StateSymbol::is_symbol(c) => {
Ok(Lexer::proceed(Box::new(StateSymbol), TransitionKind::Empty))
}
Expand Down Expand Up @@ -110,7 +115,7 @@ impl State for StateNumber {
match cursor.peek() {
Some(c) if c.is_ascii_digit() => Ok(Lexer::proceed(
Box::new(StateNumber),
TransitionKind::Advance,
TransitionKind::AdvanceOffset,
)),
_ => {
let lexeme = cursor.source().content()[cursor.index()..cursor.offset()].to_string();
Expand All @@ -134,9 +139,10 @@ pub struct StateWord;
impl State for StateWord {
fn visit(&self, cursor: &mut Cursor) -> Result<Transition, LexerError> {
match cursor.peek() {
Some(c) if c.is_alphanumeric() || c.eq(&'_') => {
Ok(Lexer::proceed(Box::new(StateWord), TransitionKind::Advance))
}
Some(c) if c.is_alphanumeric() || c.eq(&'_') => Ok(Lexer::proceed(
Box::new(StateWord),
TransitionKind::AdvanceOffset,
)),
_ => {
// Emit token when we encounter a non-alphabetic character
let lexeme = cursor.source().content()[cursor.index()..cursor.offset()].to_string();
Expand Down Expand Up @@ -179,7 +185,7 @@ impl State for StateSymbol {
}
Some(c) if StateSymbol::is_symbol(c) => Ok(Lexer::proceed(
Box::new(StateSymbol),
TransitionKind::Advance,
TransitionKind::AdvanceOffset,
)),
_ => {
let lexeme = cursor.source().content()[cursor.index()..cursor.offset()].to_string();
Expand Down
4 changes: 2 additions & 2 deletions src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,8 @@ impl TokenLocation {
self.column_end += 1;
}

pub fn set_column_start(&mut self, column_start: usize) {
self.column_start = column_start;
pub fn set_column_start(&mut self, new_column_start: usize) {
self.column_start = new_column_start;
}

pub fn with_file_path(&self, file_path: &Path) -> TokenLocation {
Expand Down
4 changes: 4 additions & 0 deletions src/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ impl Source {
pub fn content(&self) -> &str {
&self.content
}

pub fn content_mut(&mut self) -> &mut String {
&mut self.content
}
}

impl From<String> for Source {
Expand Down

0 comments on commit d12e29c

Please sign in to comment.