diff --git a/examples/regressions.ml b/examples/regressions.ml index fa86516d..40f9301c 100644 --- a/examples/regressions.ml +++ b/examples/regressions.ml @@ -1,51 +1,57 @@ -(* This test that unicode_old.ml is a strict sub-set of - * new unicode.ml. *) +(* This test that unicode_old.ml is a strict sub-set of new unicode.ml. *) -let test_versions = ("14.0.0", "15.0.0") -let regressions = [] -let interval s e = Array.to_list (Array.init (e - s) (fun pos -> s + pos)) +module CSet = Sedlex_ppx.Sedlex_cset +module Unicode = Sedlex_ppx.Unicode -exception Found +let test_versions = ("14.0.0", "15.0.0") -let test_exception name x = - try - let l = List.assoc name regressions in - List.iter (fun (s, e) -> if s <= x && x <= e then raise Found) l - with Not_found -> () +let regressions = + [ (* Example *) + (* ("lt", CSet.union (CSet.singleton 0x1c5) (CSet.singleton (0x0001))) *) ] -let compare name (old_l : (int * int) list) (new_l : Sedlex_ppx.Sedlex_cset.t) = - let new_l = (new_l :> (int * int) list) in - let code_points = - List.fold_left (fun res (s, e) -> res @ interval s e) [] old_l +let compare name (old_ : CSet.t) (new_ : CSet.t) = + let diff = CSet.difference old_ new_ in + let regressions = + match List.assoc name regressions with + | exception Not_found -> CSet.empty + | x -> x in - let test x = - try - test_exception name x; - List.iter (fun (s, e) -> if s <= x && x <= e then raise Found) new_l; - false - with Found -> true - in - List.iter + let regressions_intersect = CSet.intersection regressions old_ in + let regressions = CSet.difference regressions regressions_intersect in + let regressions_useless = CSet.difference regressions new_ in + let diff = CSet.difference diff regressions in + Seq.iter + (fun x -> + Printf.printf + "Invalid regression for 0x%x in %s: already present in old set.\n" x + name) + (CSet.to_seq regressions_intersect); + Seq.iter (fun x -> - if not (test x) then - Printf.printf "Code point 0x%x missing in %s!\n" x name) - code_points + Printf.printf "Invalid regression for 0x%x in %s: absent in new set.\n" x + name) + (CSet.to_seq regressions_useless); + Seq.iter + (fun x -> Printf.printf "Code point 0x%x missing in %s!\n" x name) + (CSet.to_seq diff) let test new_l (name, old_l) = (* Cn is for unassigned code points, which are allowed to be * used in future version. *) - if name <> "cn" then compare name old_l (List.assoc name new_l) + if name <> "cn" then ( + let old_l = + List.fold_left + (fun acc (a, b) -> CSet.union acc (CSet.interval a b)) + CSet.empty old_l + in + compare name old_l (List.assoc name new_l)) let () = - if (Unicode_old.version, Sedlex_ppx.Unicode.version) <> test_versions then + if (Unicode_old.version, Unicode.version) <> test_versions then failwith (Printf.sprintf "Test written for versions: %s => %s\n%!" - Unicode_old.version Sedlex_ppx.Unicode.version); + Unicode_old.version Unicode.version); Printf.printf "Testing Unicode regression: %s => %s\n%!" Unicode_old.version - Sedlex_ppx.Unicode.version; - List.iter - (test Sedlex_ppx.Unicode.Categories.list) - Unicode_old.Categories.list; - List.iter - (test Sedlex_ppx.Unicode.Properties.list) - Unicode_old.Properties.list + Unicode.version; + List.iter (test Unicode.Categories.list) Unicode_old.Categories.list; + List.iter (test Unicode.Properties.list) Unicode_old.Properties.list diff --git a/src/common/cset.ml b/src/common/cset.ml index 9171f100..2e84e24d 100644 --- a/src/common/cset.ml +++ b/src/common/cset.ml @@ -8,6 +8,12 @@ type t = (int * int) list +let rec range_to_seq a b next () = + if a = b then Seq.Cons (a, next) else Seq.Cons (a, range_to_seq (a + 1) b next) + +let rec to_seq x () = + match x with [] -> Seq.Nil | (a, b) :: xs -> range_to_seq a b (to_seq xs) () + let check_invariant l = let rec loop prev = function | [] -> () diff --git a/src/common/cset.mli b/src/common/cset.mli index c43a2f40..3606fc98 100644 --- a/src/common/cset.mli +++ b/src/common/cset.mli @@ -22,3 +22,4 @@ val is_empty : t -> bool val eof : t val singleton : int -> t val interval : int -> int -> t +val to_seq : t -> int Seq.t