Skip to content

Commit

Permalink
refactor: do not nest alternations
Browse files Browse the repository at this point in the history
When compiling [(a|b|c)], we used to parse it as:

Re.alt [ x ; Re.alt [ y ; z ] ]

Now, we just flatten the alternation:

Re.alt [ x ; y ; z ]
  • Loading branch information
rgrinberg committed Oct 27, 2024
1 parent 0278d81 commit e99cfa1
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 30 deletions.
4 changes: 2 additions & 2 deletions lib/emacs.ml
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ let parse s =
let eos () = Parse_buffer.eos buf in
let test2 = Parse_buffer.test2 buf in
let get () = Parse_buffer.get buf in
let rec regexp () = regexp' (branch ())
let rec regexp () = regexp' [ branch () ]
and regexp' left =
if accept2 '\\' '|' then regexp' (Re.alt [ left; branch () ]) else left
if accept2 '\\' '|' then regexp' (branch () :: left) else Re.alt (List.rev left)
and branch () = branch' []
and branch' left =
if eos () || test2 '\\' '|' || test2 '\\' ')'
Expand Down
5 changes: 3 additions & 2 deletions lib/perl.ml
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,9 @@ let parse ~multiline ~dollar_endonly ~dotall ~ungreedy s =
let gr = if ungreedy then not gr else gr in
if gr then Re.non_greedy r else Re.greedy r
in
let rec regexp () = regexp' (branch ())
and regexp' left = if accept '|' then regexp' (Re.alt [ left; branch () ]) else left
let rec regexp () = regexp' [ branch () ]
and regexp' left =
if accept '|' then regexp' (branch () :: left) else Re.alt (List.rev left)
and branch () = branch' []
and branch' left =
if eos () || test '|' || test ')'
Expand Down
5 changes: 3 additions & 2 deletions lib/posix.ml
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ let parse newline s =
let test c = Parse_buffer.test buf c in
let unget () = Parse_buffer.unget buf in
let get () = Parse_buffer.get buf in
let rec regexp () = regexp' (branch ())
and regexp' left = if accept '|' then regexp' (Re.alt [ left; branch () ]) else left
let rec regexp () = regexp' [ branch () ]
and regexp' left =
if accept '|' then regexp' (branch () :: left) else Re.alt (List.rev left)
and branch () = branch' []
and branch' left =
if eos () || test '|' || test ')'
Expand Down
28 changes: 4 additions & 24 deletions lib_test/expect/test_alternation.ml
Original file line number Diff line number Diff line change
Expand Up @@ -9,40 +9,20 @@ let test f string =

let%expect_test "pcre" =
test Re.Pcre.re_result "(a|b|c)";
[%expect
{|
(Group
(Set
(Cast (Alternative (Cast (Alternative (Cset 97) (Cset 98))) (Cset 99)))))
|}]
[%expect {| (Group (Set (Cast (Alternative (Cset 97) (Cset 98) (Cset 99))))) |}]
;;

let%expect_test "emacs" =
test Re.Emacs.re_result {|\(a\|b\|c\)|};
[%expect
{|
(Group
(Set
(Cast (Alternative (Cast (Alternative (Cset 97) (Cset 98))) (Cset 99)))))
|}]
[%expect {| (Group (Set (Cast (Alternative (Cset 97) (Cset 98) (Cset 99))))) |}]
;;

let%expect_test "perl" =
test Re.Perl.re_result "(a|b|c)";
[%expect
{|
(Group
(Set
(Cast (Alternative (Cast (Alternative (Cset 97) (Cset 98))) (Cset 99)))))
|}]
[%expect {| (Group (Set (Cast (Alternative (Cset 97) (Cset 98) (Cset 99))))) |}]
;;

let%expect_test "posix" =
test Re.Posix.re_result "(a|b|c)";
[%expect
{|
(Group
(Set
(Cast (Alternative (Cast (Alternative (Cset 97) (Cset 98))) (Cset 99)))))
|}]
[%expect {| (Group (Set (Cast (Alternative (Cset 97) (Cset 98) (Cset 99))))) |}]
;;

0 comments on commit e99cfa1

Please sign in to comment.