From 0dd67c881d1d65f8028e84f879da35b8192b6b99 Mon Sep 17 00:00:00 2001 From: myfreess Date: Mon, 2 Dec 2024 14:22:42 +0800 Subject: [PATCH 1/4] add ignore --- next/.gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/next/.gitignore b/next/.gitignore index a41cafda..622412d4 100644 --- a/next/.gitignore +++ b/next/.gitignore @@ -1,3 +1,4 @@ _build *.mo -__pycache__ \ No newline at end of file +__pycache__ +.env \ No newline at end of file From 7382025f1846694a1cbfbf1fdcf24d0bca4551f8 Mon Sep 17 00:00:00 2001 From: myfreess Date: Mon, 2 Dec 2024 14:23:16 +0800 Subject: [PATCH 2/4] modify example/gmachine struture --- next/tutorial/example/gmachine/gmachine-1.md | 2 +- next/tutorial/example/gmachine/index.md | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/next/tutorial/example/gmachine/gmachine-1.md b/next/tutorial/example/gmachine/gmachine-1.md index 2cbef9b9..f19fa4e8 100644 --- a/next/tutorial/example/gmachine/gmachine-1.md +++ b/next/tutorial/example/gmachine/gmachine-1.md @@ -1,6 +1,6 @@ # G-Machine 1 -Lazy evaluation stands as a foundational concept in the realm of programming languages. Haskell, renowned as a purely functional programming language, boasts a robust lazy evaluation mechanism. This mechanism not only empowers developers to craft code that's both more efficient and concise but also enhances program performance and responsiveness, especially when tackling sizable datasets or intricate data streams. In this article, we'll delve into the Lazy Evaluation mechanism, thoroughly examining its principles and implementation methods, and then explore how to implement Haskell's evaluation semantics in [MoonBit](https://www.moonbitlang.com/). +This article is the first in the series on implementing lazy evaluation in MoonBit. In this article, we will exploring the purposes of lazy evaluation and a typical abstract machine for lazy evaluation, the G-Machine. ## Higher-Order Functions and Performance Challenges diff --git a/next/tutorial/example/gmachine/index.md b/next/tutorial/example/gmachine/index.md index e10b929a..46ec94c1 100644 --- a/next/tutorial/example/gmachine/index.md +++ b/next/tutorial/example/gmachine/index.md @@ -1,5 +1,9 @@ # G-Machine +Lazy evaluation stands as a foundational concept in the realm of programming languages. Haskell, renowned as a purely functional programming language, boasts a robust lazy evaluation mechanism. This mechanism not only empowers developers to craft code that's both more efficient and concise but also enhances program performance and responsiveness, especially when tackling sizable datasets or intricate data streams. + +In this article, we'll delve into the Lazy Evaluation mechanism, thoroughly examining its principles and implementation methods, and then explore how to implement Haskell's evaluation semantics in [MoonBit](https://www.moonbitlang.com/). + ```{toctree} :maxdepth: 2 :caption: Contents: From a667198cc7d1219deffdd48da9d4520a43129990 Mon Sep 17 00:00:00 2001 From: myfreess Date: Tue, 3 Dec 2024 14:09:03 +0800 Subject: [PATCH 3/4] G-Machine Part1 --- next/sources/gmachine/.gitignore | 2 + next/sources/gmachine/LICENSE | 202 ++++++++ next/sources/gmachine/README.md | 1 + next/sources/gmachine/moon.mod.json | 10 + next/sources/gmachine/src/part1/ast.mbt | 90 ++++ next/sources/gmachine/src/part1/compile.mbt | 56 +++ .../gmachine/src/part1/instruction.mbt | 11 + next/sources/gmachine/src/part1/lazy.mbt | 25 + next/sources/gmachine/src/part1/moon.pkg.json | 1 + next/sources/gmachine/src/part1/programs.mbt | 41 ++ next/sources/gmachine/src/part1/syntax.mbt | 332 ++++++++++++++ next/sources/gmachine/src/part1/top.mbt | 37 ++ next/sources/gmachine/src/part1/vm.mbt | 234 ++++++++++ next/tutorial/example/gmachine/gmachine-1.md | 434 +++--------------- 14 files changed, 1118 insertions(+), 358 deletions(-) create mode 100644 next/sources/gmachine/.gitignore create mode 100644 next/sources/gmachine/LICENSE create mode 100644 next/sources/gmachine/README.md create mode 100644 next/sources/gmachine/moon.mod.json create mode 100644 next/sources/gmachine/src/part1/ast.mbt create mode 100644 next/sources/gmachine/src/part1/compile.mbt create mode 100644 next/sources/gmachine/src/part1/instruction.mbt create mode 100644 next/sources/gmachine/src/part1/lazy.mbt create mode 100644 next/sources/gmachine/src/part1/moon.pkg.json create mode 100644 next/sources/gmachine/src/part1/programs.mbt create mode 100644 next/sources/gmachine/src/part1/syntax.mbt create mode 100644 next/sources/gmachine/src/part1/top.mbt create mode 100644 next/sources/gmachine/src/part1/vm.mbt diff --git a/next/sources/gmachine/.gitignore b/next/sources/gmachine/.gitignore new file mode 100644 index 00000000..b1283a74 --- /dev/null +++ b/next/sources/gmachine/.gitignore @@ -0,0 +1,2 @@ +target/ +.mooncakes/ diff --git a/next/sources/gmachine/LICENSE b/next/sources/gmachine/LICENSE new file mode 100644 index 00000000..d6456956 --- /dev/null +++ b/next/sources/gmachine/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/next/sources/gmachine/README.md b/next/sources/gmachine/README.md new file mode 100644 index 00000000..ddd9c89a --- /dev/null +++ b/next/sources/gmachine/README.md @@ -0,0 +1 @@ +# G-Machine \ No newline at end of file diff --git a/next/sources/gmachine/moon.mod.json b/next/sources/gmachine/moon.mod.json new file mode 100644 index 00000000..c11eb2b7 --- /dev/null +++ b/next/sources/gmachine/moon.mod.json @@ -0,0 +1,10 @@ +{ + "name": "moonbit-community/gmachine", + "version": "0.1.0", + "readme": "README.md", + "repository": "", + "license": "Apache-2.0", + "keywords": [], + "description": "", + "source": "src" +} \ No newline at end of file diff --git a/next/sources/gmachine/src/part1/ast.mbt b/next/sources/gmachine/src/part1/ast.mbt new file mode 100644 index 00000000..532e3495 --- /dev/null +++ b/next/sources/gmachine/src/part1/ast.mbt @@ -0,0 +1,90 @@ +typealias List[E] = @immut/list.T[E] + +// start expr_and_scdef definition +enum RawExpr[T] { + Var(T) + Num(Int) + Constructor(tag~:Int, arity~:Int) // tag, arity + App(RawExpr[T], RawExpr[T]) + Let(Bool, List[(T, RawExpr[T])], RawExpr[T]) // isRec, Defs, Body + Case(RawExpr[T], List[(Int, List[T], RawExpr[T])]) +} derive(Show) + +struct ScDef[T] { + name : String + args : List[T] + body : RawExpr[T] +} derive(Show) +// end expr_and_scdef definition + +fn is_atom[T](self : RawExpr[T]) -> Bool { + match self { + Var(_) => true + Num(_) => true + _ => false + } +} + +fn binders_of[L, R](l : List[(L, R)]) -> List[L] { + fn fst(pair) { + let (l, _) = pair + return l + } + + l.map(fst) +} + +fn rhss_of[L, R](l : List[(L, R)]) -> List[R] { + fn snd(pair) { + let (_, r) = pair + return r + } + + l.map(snd) +} + +fn ScDef::new[T]( + name : String, + args : List[T], + body : RawExpr[T] +) -> ScDef[T] { + { name : name, args : args, body : body } +} + +// start prelude_defs definition +let prelude_defs : List[ScDef[String]] = { + let args : (FixedArray[String]) -> List[String] = List::of + let id = ScDef::new("I", args(["x"]), Var("x")) // id x = x + let k = + ScDef::new( + "K", + args(["x", "y"]), + Var("x") + ) // K x y = x + let k1 = + ScDef::new( + "K1", + args(["x", "y"]), + Var("y") + ) // K1 x y = y + let s = + ScDef::new( + "S", + args(["f", "g", "x"]), + App(App(Var("f"), Var("x")), App(Var("g"), Var("x"))) + ) // S f g x = f x (g x) + let compose = + ScDef::new( + "compose", + args(["f", "g", "x"]), + App(Var("f"), App(Var("g"), Var("x"))) + ) // compose f g x = f (g x) + let twice = + ScDef::new( + "twice", + args(["f"]), + App(App(Var("compose"), Var("f")), Var("f")) + ) // twice f = compose f f + List::of([id, k, k1, s, compose, twice]) +} +// end prelude_defs definition \ No newline at end of file diff --git a/next/sources/gmachine/src/part1/compile.mbt b/next/sources/gmachine/src/part1/compile.mbt new file mode 100644 index 00000000..b00ab4f6 --- /dev/null +++ b/next/sources/gmachine/src/part1/compile.mbt @@ -0,0 +1,56 @@ +// start compile_sc definition +fn compileSC(self : ScDef[String]) -> (String, Int, List[Instruction]) { + let name = self.name + let body = self.body + let mut arity = 0 + fn gen_env(i : Int, args : List[String]) -> List[(String, Int)] { + match args { + Nil => { + arity = i + return Nil + } + Cons(s, ss) => Cons((s, i), gen_env(i + 1, ss)) + } + } + + let env = gen_env(0, self.args) + (name, arity, compileR(body, env, arity)) +} +// end compile_sc definition + +// start compile_r definition +fn compileR( + self : RawExpr[String], + env : List[(String, Int)], + arity : Int +) -> List[Instruction] { + if arity == 0 { + compileC(self, env) + List::of([Update(arity), Unwind]) + } else { + compileC(self, env) + List::of([Update(arity), Pop(arity), Unwind]) + } +} +// end compile_r definition + +// start compile_c definition +fn compileC( + self : RawExpr[String], + env : List[(String, Int)] +) -> List[Instruction] { + match self { + Var(s) => + match env.lookup(s) { + None => List::of([PushGlobal(s)]) + Some(n) => List::of([PushArg(n)]) + } + Num(n) => List::of([PushInt(n)]) + App(e1, e2) => + compileC(e2, env) + compileC(e1, argOffset(1, env)) + List::of([MkApp]) + _ => abort("not support yet") + } +} +// end compile_c definition + +fn argOffset(n : Int, env : List[(String, Int)]) -> List[(String, Int)] { + env.map(fn { (name, offset) => (name, offset + n) }) +} diff --git a/next/sources/gmachine/src/part1/instruction.mbt b/next/sources/gmachine/src/part1/instruction.mbt new file mode 100644 index 00000000..c7af2059 --- /dev/null +++ b/next/sources/gmachine/src/part1/instruction.mbt @@ -0,0 +1,11 @@ +// start instr definition +enum Instruction { + Unwind + PushGlobal(String) + PushInt(Int) + PushArg(Int) + MkApp + Update(Int) + Pop(Int) +} derive (Eq, Show) +// end instr definition \ No newline at end of file diff --git a/next/sources/gmachine/src/part1/lazy.mbt b/next/sources/gmachine/src/part1/lazy.mbt new file mode 100644 index 00000000..5d490062 --- /dev/null +++ b/next/sources/gmachine/src/part1/lazy.mbt @@ -0,0 +1,25 @@ +// start lazy definition +enum LazyData[T] { + Waiting(() -> T) + Done(T) +} + +struct LazyRef[T] { + mut data : LazyData[T] +} + +fn extract[T](self : LazyRef[T]) -> T { + match self.data { + Waiting(thunk) => { + let value = thunk() + self.data = Done(value) // in-place update + value + } + Done(value) => value + } +} + +fn square(x : LazyRef[Int]) -> Int { + x.extract() * x.extract() +} +// end lazy definition \ No newline at end of file diff --git a/next/sources/gmachine/src/part1/moon.pkg.json b/next/sources/gmachine/src/part1/moon.pkg.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/next/sources/gmachine/src/part1/moon.pkg.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/next/sources/gmachine/src/part1/programs.mbt b/next/sources/gmachine/src/part1/programs.mbt new file mode 100644 index 00000000..bb6b38b3 --- /dev/null +++ b/next/sources/gmachine/src/part1/programs.mbt @@ -0,0 +1,41 @@ +let programs : @hashmap.T[String, String] = { + let programs = @hashmap.new(capacity=40) + programs["square"] = + #| (defn square[x] (mul x x)) + programs["fix"] = + #| (defn fix[f] (letrec ([x (f x)]) x)) + programs["isNil"] = + #| (defn isNil[x] + #| (case x [(Nil) 1] [(Cons n m) 0])) + programs["tail"] = + #| (defn tail[l] (case l [(Cons x xs) xs])) + programs["fibs"] = + // fibs = 0 : 1 : zipWith (+) fibs (tail fibs) + #| (defn fibs[] (Cons 0 (Cons 1 (zipWith add fibs (tail fibs))))) + programs["take"] = + #| (defn take[n l] + #| (case l + #| [(Nil) Nil] + #| [(Cons x xs) + #| (if (le n 0) Nil (Cons x (take (sub n 1) xs)))])) + programs["zipWith"] = + #| (defn zipWith[op l1 l2] + #| (case l1 + #| [(Nil) Nil] + #| [(Cons x xs) + #| (case l2 + #| [(Nil) Nil] + #| [(Cons y ys) (Cons (op x y) (zipWith op xs ys))])])) + programs["factorial"] = + #| (defn factorial[n] + #| (if (eq n 0) 1 (mul n (factorial (sub n 1))))) + programs["abs"] = + #| (defn abs[n] + #| (if (lt n 0) (negate n) n)) + programs["length"] = + #| (defn length[l] + #| (case l + #| [(Nil) 0] + #| [(Cons x xs) (add 1 (length xs))])) + programs +} diff --git a/next/sources/gmachine/src/part1/syntax.mbt b/next/sources/gmachine/src/part1/syntax.mbt new file mode 100644 index 00000000..3a59bf26 --- /dev/null +++ b/next/sources/gmachine/src/part1/syntax.mbt @@ -0,0 +1,332 @@ +enum Token { + DefFn + Let + NIL + CONS + Case + Letrec + Open(Char) // { [ ( + Close(Char) // } ] ) + Id(String) + Number(Int) + EOF +} derive(Eq, Show) + +fn between(this : Char, lw : Char, up : Char) -> Bool { + this >= lw && this <= up +} + +fn isDigit(this : Char) -> Bool { + between(this, '0', '9') +} + +fn isAlpha(this : Char) -> Bool { + between(this, 'A', 'Z') || between(this, 'a', 'z') +} + +fn isIdChar(this : Char) -> Bool { + isAlpha(this) || isDigit(this) || this == '_' || this == '-' +} + +fn isWhiteSpace(this : Char) -> Bool { + this == ' ' || this == '\t' || this == '\n' +} + +fn to_number(this : Char) -> Int { + this.to_int() - 48 +} + +fn isOpen(this : Char) -> Bool { + this == '(' || this == '[' || this == '{' +} + +fn isClose(this : Char) -> Bool { + this == ')' || this == ']' || this == '}' +} + +struct Tokens { + tokens : Array[Token] + mut current : Int +} derive(Show) + +fn Tokens::new(tokens : Array[Token]) -> Tokens { + Tokens::{ tokens, current: 0 } +} + +fn peek(self : Tokens) -> Token { + if self.current < self.tokens.length() { + return self.tokens[self.current] + } else { + return EOF + } +} + +type! ParseError String + +fn next(self : Tokens, loc~ : SourceLoc = _) -> Unit { + self.current = self.current + 1 + if self.current > self.tokens.length() { + abort("Tokens::next(): \{loc}") + } +} + +fn eat(self : Tokens, tok : Token, loc~ : SourceLoc = _) -> Unit!ParseError { + let __tok = self.peek() + // assert tok_ != EOF + if __tok != tok { + raise ParseError("\{loc} - Tokens::eat(): expect \{tok} but got \{__tok}") + } else { + self.next() + } +} + +fn tokenize(source : String) -> Tokens { + let tokens : Array[Token] = Array::new(capacity=source.length() / 2) + let mut current = 0 + let source = source.to_array() + fn peek() -> Char { + source[current] + } + + fn next() -> Unit { + current = current + 1 + } + + while current < source.length() { + let ch = peek() + if isWhiteSpace(ch) { + next() + continue + } else if isDigit(ch) { + let mut num = to_number(ch) + next() + while current < source.length() && isDigit(peek()) { + num = num * 10 + to_number(peek()) + next() + } + tokens.push(Number(num)) + continue + } else if isOpen(ch) { + next() + tokens.push(Open(ch)) + continue + } else if isClose(ch) { + next() + tokens.push(Close(ch)) + continue + } else if isAlpha(ch) { + let identifier = @buffer.new(size_hint=42) + identifier.write_char(ch) + next() + while current < source.length() && isIdChar(peek()) { + identifier.write_char(peek()) + next() + } + let identifier = identifier.to_unchecked_string() + match identifier { + "let" => tokens.push(Let) + "letrec" => tokens.push(Letrec) + "Nil" => tokens.push(NIL) + "Cons" => tokens.push(CONS) + "case" => tokens.push(Case) + "defn" => tokens.push(DefFn) + _ => tokens.push(Id(identifier)) + } + } else { + abort("error : invalid Character '\{ch}' in [\{current}]") + } + } else { + return Tokens::new(tokens) + } +} + +test "tokenize" { + inspect!(tokenize("").tokens, content="[]") + inspect!(tokenize("12345678").tokens, content="[Number(12345678)]") + inspect!(tokenize("1234 5678").tokens, content="[Number(1234), Number(5678)]") + inspect!( + tokenize("a0 a_0 a-0").tokens, + content= + #|[Id("a0"), Id("a_0"), Id("a-0")] + , + ) + inspect!( + tokenize("(Cons 0 (Cons 1 Nil))").tokens, + content="[Open('('), CONS, Number(0), Open('('), CONS, Number(1), NIL, Close(')'), Close(')')]", + ) +} + +fn parse_num(self : Tokens) -> Int!ParseError { + match self.peek() { + Number(n) => { + self.next() + return n + } + other => raise ParseError("parse_num(): expect a number but got \{other}") + } +} + +fn parse_var(self : Tokens) -> String!ParseError { + match self.peek() { + Id(s) => { + self.next() + return s + } + other => raise ParseError("parse_var(): expect a variable but got \{other}") + } +} + +fn parse_cons(self : Tokens) -> RawExpr[String]!ParseError { + match self.peek() { + CONS => { + self.next() + let x = self.parse_expr!() + let xs = self.parse_expr!() + return App(App(Constructor(tag=1, arity=2), x), xs) + } + other => raise ParseError("parse_cons(): expect Cons but got \{other}") + } +} + +fn parse_let(self : Tokens) -> RawExpr[String]!ParseError { + self.eat!(Let) + self.eat!(Open('(')) + let defs = self.parse_defs!() + self.eat!(Close(')')) + let exp = self.parse_expr!() + Let(false, defs, exp) +} + +fn parse_letrec(self : Tokens) -> RawExpr[String]!ParseError { + self.eat!(Letrec) + self.eat!(Open('(')) + let defs = self.parse_defs!() + self.eat!(Close(')')) + let exp = self.parse_expr!() + Let(true, defs, exp) +} + +fn parse_case(self : Tokens) -> RawExpr[String]!ParseError { + self.eat!(Case) + let exp = self.parse_expr!() + let alts = self.parse_alts!() + Case(exp, alts) +} + +fn parse_alts( + self : Tokens +) -> List[(Int, List[String], RawExpr[String])]!ParseError { + let acc : List[(Int, List[String], RawExpr[String])] = Nil + loop self.peek(), acc { + Open('['), acc => { + self.next() + self.eat!(Open('(')) + let (tag, variables) = match self.peek() { + NIL => { + self.next() + (0, List::Nil) + } + CONS => { + self.next() + let x = self.parse_var!() + let xs = self.parse_var!() + (1, List::of([x, xs])) + } + other => + raise ParseError("parse_alts(): expect NIL or CONS but got \{other}") + } + self.eat!(Close(')')) + let exp = self.parse_expr!() + let alt = (tag, variables, exp) + self.eat!(Close(']')) + continue self.peek(), Cons(alt, acc) + } + _, acc => acc.rev() + } +} + +fn parse_defs(self : Tokens) -> List[(String, RawExpr[String])]!ParseError { + let acc : List[(String, RawExpr[String])] = Nil + loop self.peek(), acc { + Open('['), acc => { + self.next() + let var = self.parse_var!() + let value = self.parse_expr!() + self.eat!(Close(']')) + continue self.peek(), Cons((var, value), acc) + } + _, acc => acc.rev() + } +} + +fn parse_apply(self : Tokens) -> RawExpr[String]!ParseError { + let mut res = self.parse_expr!() + while self.peek() != Close(')') { + res = App(res, self.parse_expr!()) + } + return res +} + +fn parse_expr(self : Tokens) -> RawExpr[String]!ParseError { + match self.peek() { + EOF => + raise ParseError( + "parse_expr() : expect a token but got a empty token stream", + ) + Number(n) => { + self.next() + Num(n) + } + Id(s) => { + self.next() + Var(s) + } + NIL => { + self.next() + Constructor(tag=0, arity=0) + } + Open('(') => { + self.next() + let exp = match self.peek() { + Let => self.parse_let!() + Letrec => self.parse_letrec!() + Case => self.parse_case!() + CONS => self.parse_cons!() + Id(_) | Open('(') => self.parse_apply!() + other => + raise ParseError("parse_expr(): cant parse \{other} behind a '('") + } + self.eat!(Close(')')) + return exp + } + other => raise ParseError("parse_expr(): cant parse \{other}") + } +} + +fn parse_sc(self : Tokens) -> ScDef[String]!ParseError { + self.eat!(Open('(')) + self.eat!(DefFn) + let fn_name = self.parse_var!() + self.eat!(Open('[')) + let args = loop self.peek(), List::Nil { + tok, acc => + if tok != Close(']') { + let var = self.parse_var!() + continue self.peek(), Cons(var, acc) + } else { + acc.rev() + } + } + self.eat!(Close(']')) + let body = self.parse_expr!() + self.eat!(Close(')')) + ScDef::{ name: fn_name, args, body } +} + +test "parse scdef" { + let test_ = fn!(s) { ignore(tokenize(s).parse_sc!()) } + for p in programs { + let (_, p) = p + test_!(p) + } +} diff --git a/next/sources/gmachine/src/part1/top.mbt b/next/sources/gmachine/src/part1/top.mbt new file mode 100644 index 00000000..cb8242e0 --- /dev/null +++ b/next/sources/gmachine/src/part1/top.mbt @@ -0,0 +1,37 @@ +// start run definition +fn run(codes : List[String]) -> Node { + fn parse_then_compile(code : String) -> (String, Int, List[Instruction]) { + let tokens = tokenize(code) + let code = + try { + tokens.parse_sc!() + } catch { + ParseError(s) => abort(s) + } else { + expr => expr + } + let code = compileSC(code) + return code + } + let codes = codes.map(parse_then_compile) + prelude_defs.map(compileSC) + let (heap, globals) = build_initial_heap(codes) + let initialState : GState = { + heap : heap, + stack : Nil, + code : List::of([PushGlobal("main"), Unwind]), + globals : globals, + stats : 0 + } + initialState.reify() +} +// end run definition + +test "basic eval" { + // S K K x => ((K x (K x)) => x + let main = "(defn main[] (S K K 3))" + inspect!(run(List::of([main])), content = "NNum(3)") + let main = "(defn main[] (K 0 1))" + inspect!(run(List::of([main])), content = "NNum(0)") + let main = "(defn main[] (K1 0 1))" + inspect!(run(List::of([main])), content = "NNum(1)") +} \ No newline at end of file diff --git a/next/sources/gmachine/src/part1/vm.mbt b/next/sources/gmachine/src/part1/vm.mbt new file mode 100644 index 00000000..b8a87458 --- /dev/null +++ b/next/sources/gmachine/src/part1/vm.mbt @@ -0,0 +1,234 @@ +// start heap definition +// Use the 'type' keyword to encapsulate an address type. +type Addr Int derive(Eq, Show) + +// Describe graph nodes with an enumeration type. +enum Node { + NNum(Int) + // The application node + NApp(Addr, Addr) + // To store the number of parameters and + // the corresponding sequence of instructions for a super combinator + NGlobal(String, Int, List[Instruction]) + // The Indirection node,The key component of implementing lazy evaluation + NInd(Addr) +} derive(Eq, Show) + +struct GHeap { + // The heap uses an array, + // and the space with None content in the array is available as free memory. + mut object_count : Int + memory : Array[Node?] +} + +// Allocate heap space for nodes. +fn alloc(self : GHeap, node : Node) -> Addr { + let heap = self + fn next(n : Int) -> Int { + (n + 1) % heap.memory.length() + } + + fn free(i : Int) -> Bool { + match heap.memory[i] { + None => true + _ => false + } + } + + let mut i = heap.object_count + while not(free(i)) { + i = next(i) + } + heap.memory[i] = Some(node) + heap.object_count = heap.object_count + 1 + return Addr(i) +} +// end heap definition + +fn op_get(self : GHeap, key : Addr) -> Node { + let Addr(i) = key + match self.memory[i] { + Some(node) => node + None => abort("GHeap::get(): index \{i} was empty") + } +} + +fn op_set(self : GHeap, key : Addr, val : Node) -> Unit { + self.memory[key._] = Some(val) +} + +// start state definition +struct GState { + mut stack : List[Addr] + heap : GHeap + globals : @hashmap.T[String, Addr] + mut code : List[Instruction] + mut stats : GStats +} + +type GStats Int + +fn stat_incr(self : GState) -> Unit { + self.stats = self.stats._ + 1 +} + +fn put_stack(self : GState, addr : Addr) -> Unit { + self.stack = Cons(addr, self.stack) +} + +fn put_code(self : GState, instrs : List[Instruction]) -> Unit { + self.code = instrs + self.code +} + +fn pop1(self : GState) -> Addr { + match self.stack { + Cons(addr, reststack) => { + self.stack = reststack + addr + } + Nil => abort("pop1(): stack size smaller than 1") + } +} + +// e1 e2 ..... -> (e1, e2) ...... +fn pop2(self : GState) -> (Addr, Addr) { + match self.stack { + Cons(addr1, Cons(addr2, reststack)) => { + self.stack = reststack + (addr1, addr2) + } + _ => abort("pop2(): stack size smaller than 2") + } +} +// end state definition + +// start push_int definition +fn push_int(self : GState, num : Int) -> Unit { + let addr = self.heap.alloc(NNum(num)) + self.put_stack(addr) +} +// end push_int definition + +// start push_global definition +fn push_global(self : GState, name : String) -> Unit { + let sc = self.globals[name] + match sc { + None => abort("push_global(): cant find supercombinator \{name}") + Some(addr) => self.put_stack(addr) + } +} +// end push_global definition + +// start push_arg definition +fn push_arg(self : GState, offset : Int) -> Unit { + let appaddr = self.stack.unsafe_nth(offset + 1) + let arg = match self.heap[appaddr] { + NApp(_, arg) => arg + otherwise => + abort( + "pusharg: stack offset \{offset} address \{appaddr} node \{otherwise}", + ) + } + self.put_stack(arg) +} +// end push_arg definition + +// start mk_apply definition +fn mk_apply(self : GState) -> Unit { + let (a1, a2) = self.pop2() + let appaddr = self.heap.alloc(NApp(a1, a2)) + self.put_stack(appaddr) +} +// end mk_apply definition + +// start update definition +fn update(self : GState, n : Int) -> Unit { + let addr = self.pop1() + let dst = self.stack.unsafe_nth(n) + self.heap[dst] = NInd(addr) +} +// end update definition + +// start unwind definition +fn unwind(self : GState) -> Unit { + let addr = self.pop1() + match self.heap[addr] { + NNum(_) => self.put_stack(addr) + NApp(a1, _) => { + self.put_stack(addr) + self.put_stack(a1) + self.put_code(Cons(Unwind, Nil)) + } + NGlobal(_, n, c) => + if self.stack.length() < n { + abort("Unwinding with too few arguments") + } else { + self.put_stack(addr) + self.put_code(c) + } + NInd(a) => { + self.put_stack(a) + self.put_code(Cons(Unwind, Nil)) + } + otherwise => + abort("unwind() : wrong kind of node \{otherwise}, address \{addr}") + } +} +// end unwind definition + +// start build_ih definition +fn build_initial_heap( + scdefs : List[(String, Int, List[Instruction])] +) -> (GHeap, @hashmap.T[String, Addr]) { + let heap = { object_count: 0, memory: Array::make(10000, None) } + let globals = @hashmap.new(capacity=50) + loop scdefs { + Nil => () + Cons((name, arity, instrs), rest) => { + let addr = heap.alloc(NGlobal(name, arity, instrs)) + globals[name] = addr + continue rest + } + } + return (heap, globals) +} +// end build_ih definition + +// start step definition +fn step(self : GState) -> Bool { + match self.code { + Nil => return false + Cons(i, is) => { + self.code = is + self.stat_incr() + match i { + PushGlobal(f) => self.push_global(f) + PushInt(n) => self.push_int(n) + PushArg(n) => self.push_arg(n) + MkApp => self.mk_apply() + Unwind => self.unwind() + Update(n) => self.update(n) + Pop(n) => self.stack = self.stack.drop(n) + } + return true + } + } +} +// end step definition + +// start reify definition +fn reify(self : GState) -> Node { + if self.step() { + self.reify() + } else { + let stack = self.stack + match stack { + Cons(addr, Nil) => { + let res = self.heap[addr] + return res + } + _ => abort("wrong stack \{stack}") + } + } +} +// end reify definition diff --git a/next/tutorial/example/gmachine/gmachine-1.md b/next/tutorial/example/gmachine/gmachine-1.md index f19fa4e8..29497b11 100644 --- a/next/tutorial/example/gmachine/gmachine-1.md +++ b/next/tutorial/example/gmachine/gmachine-1.md @@ -103,47 +103,18 @@ coreF excludes anonymous functions because anonymous functions introduce extra f Super combinators will eventually be parsed into `ScDef[String]`, but writing a parser is a tedious task. I will provide it along with the final code. -```moonbit -enum RawExpr[T] { - Var(T) - Num(Int) - Constructor(Int, Int) // tag, arity - App(RawExpr[T], RawExpr[T]) - Let(Bool, List[(T, RawExpr[T])], RawExpr[T]) // isRec, Defs, Body - Case(RawExpr[T], List[(Int, List[T], RawExpr[T])]) -} derive(Show) - -struct ScDef[T] { - name : String - args : List[T] - body : RawExpr[T] -} derive(Show) +```{literalinclude} /sources/gmachine/src/part1/ast.mbt +:language: moonbit +:start-after: start expr_and_scdef definition +:end-before: end expr_and_scdef definition ``` Additionally, some predefined coreF programs are required. -```moonbit -let preludeDefs : List[ScDef[String]] = { - let id = ScDef::new("I", List::of(["x"]), Var("x")) // id x = x - let k = ScDef::new("K", List::of(["x", "y"]), Var("x")) // K x y = x - let k1 = ScDef::new("K1", List::of(["x", "y"]), Var("y")) // K1 x y = y - let s = ScDef::new( - "S", - List::of(["f", "g", "x"]), - App(App(Var("f"), Var("x")), App(Var("g"), Var("x"))), - ) // S f g x = f x (g x) - let compose = ScDef::new( - "compose", - List::of(["f", "g", "x"]), - App(Var("f"), App(Var("g"), Var("x"))), - ) // compose f g x = f (g x) - let twice = ScDef::new( - "twice", - List::of(["f"]), - App(App(Var("compose"), Var("f")), Var("f")), - ) // twice f = compose f f - Cons(id, Cons(k, Cons(k1, Cons(s, Cons(compose, Cons(twice, Nil)))))) -} +```{literalinclude} /sources/gmachine/src/part1/ast.mbt +:language: moonbit +:start-after: start prelude_defs definition +:end-before: end prelude_defs definition ``` ## Why Graph @@ -175,30 +146,10 @@ fn square(thunk : () -> Int) -> Int { To represent the program using a graph is to facilitate sharing of computation results and avoid redundant calculations. To achieve this purpose, it's crucial to implement an in-place update algorithm when reducing the graph. Regarding in-place update, let's simulate it using MoonBit code: -```moonbit -enum LazyData[T] { - Waiting(() -> T) - Done(T) -} - -struct LazyRef[T] { - mut data : LazyData[T] -} - -fn extract[T](self : LazyRef[T]) -> T { - match self.data { - Waiting(thunk) => { - let value = thunk() - self.data = Done(value) // in-place update - value - } - Done(value) => value - } -} - -fn square(x : LazyRef[Int]) -> Int { - x.extract() * x.extract() -} +```{literalinclude} /sources/gmachine/src/part1/lazy.mbt +:language: moonbit +:start-after: start lazy definition +:end-before: end lazy definition ``` Regardless of which side executes the `extract` method first, it will update the referenced mutable field and replace its content with the computed result. Therefore, there's no need to recompute it during the second execution of the `extract` method. @@ -311,44 +262,10 @@ In this simple version of the G-Machine, the state includes: - Heap: This is where the expression graph and the sequences of instructions corresponding to super combinators are stored. -```moonbit -// Use the 'type' keyword to encapsulate an address type. -type Addr Int derive(Eq, Show) - -// Describe graph nodes with an enumeration type. -enum Node { - NNum(Int) - // The application node - NApp(Addr, Addr) - // To store the number of parameters and - // the corresponding sequence of instructions for a super combinator. - NGlobal(String, Int, List[Instruction]) - // The Indirection node,The key component of implementing lazy evaluation - NInd(Addr) -} derive (Eq, Show) - -struct GHeap { // The heap uses an array, and the space with None content in the array is available as free memory. - mut objectCount : Int - memory : Array[Option[Node]] -} - -// Allocate heap space for nodes. -fn alloc(self : GHeap, node : Node) -> Addr { - let heap = self - // Assuming there is still available space in the heap. - fn next(n : Int) -> Int { - (n + 1) % heap.memory.length() - } - fn free(i : Int) -> Bool { - heap.memory[i].is_empty() - } - let mut i = heap.objectCount - while not(free(i)) { - i = next(i) - } - heap.memory[i] = Some(node) - return Addr(i) -} +```{literalinclude} /sources/gmachine/src/part1/vm.mbt +:language: moonbit +:start-after: start heap definition +:end-before: end heap definition ``` - Stack: The stack only holds addresses pointing to the heap. A simple implementation can use `List[Addr]`. @@ -356,66 +273,12 @@ fn alloc(self : GHeap, node : Node) -> Addr { - Current code sequence to be executed. - Execution status statistics: A simple implementation involves calculating how many instructions have been executed. -```moonbit -type GStats Int - -let statInitial : GStats = GStats(0) - -fn statInc(self : GStats) -> GStats { - let GStats(n) = self - GStats(n + 1) -} - -fn statGet(self : GStats) -> Int { - let GStats(n) = self - return n -} -``` - The entire state is represented using the type `GState`. -```moonbit -struct GState { - mut stack : List[Addr] - heap : GHeap - globals : RHTable[String, Addr] - mut code : List[Instruction] - stats : GStats -} - -fn putStack(self : GState, addr : Addr) -> Unit { - self.stack = Cons(addr, self.stack) -} - -fn putCode(self : GState, is : List[Instruction]) -> Unit { - self.code = append(is, self.code) -} - -fn pop1(self : GState) -> Addr { - match self.stack { - Cons(addr, reststack) => { - self.stack = reststack - addr - } - Nil => { - abort("pop1: stack size smaller than 1") - } - } -} - -fn pop2(self : GState) -> (Addr, Addr) { - // Pop 2 pops the top two elements from the stack. - // Returns (the first, the second). - match self.stack { - Cons(addr1, Cons(addr2, reststack)) => { - self.stack = reststack - (addr1, addr2) - } - otherwise => { - abort("pop2: stack size smaller than 2") - } - } -} +```{literalinclude} /sources/gmachine/src/part1/vm.mbt +:language: moonbit +:start-after: start state definition +:end-before: end state definition ``` Now, we can map each step of the graph reduction algorithm we deduced on paper to this abstract machine: @@ -436,77 +299,50 @@ All of these tasks have corresponding instruction implementations. The highly simplified G-Machine currently consists of 7 instructions. -```moonbit -enum Instruction { - Unwind - PushGlobal(String) - PushInt(Int) - PushArg(Int) - MkApp - Update(Int) - Pop(Int) -} derive(Eq, Show) +```{literalinclude} /sources/gmachine/src/part1/instruction.mbt +:language: moonbit +:start-after: start instr definition +:end-before: end instr definition ``` The `PushInt` instruction is the simplest. It allocates an `NNum` node on the heap and pushes its address onto the stack. -```moonbit -fn push_int(self : GState, num : Int) -> Unit { - let addr = self.heap.alloc(NNum(num)) - self.putStack(addr) -} +```{literalinclude} /sources/gmachine/src/part1/vm.mbt +:language: moonbit +:start-after: start push_int definition +:end-before: end push_int definition ``` The `PushGlobal` instruction retrieves the address of the specified super combinator from the global table and then pushes the address onto the stack. -```moonbit -fn push_global(self : GState, name : String) -> Unit { - let sc = self.globals[name] - match sc { - None => abort("push_global(): cant find super combinator \{name}") - Some(addr) => { - self.putStack(addr) - } - } -} +```{literalinclude} /sources/gmachine/src/part1/vm.mbt +:language: moonbit +:start-after: start push_global definition +:end-before: end push_global definition ``` The `PushArg` instruction is a bit more complex. It has specific requirements regarding the layout of addresses on the stack: the first address should point to the super combinator node, followed by n addresses pointing to N `NApp` nodes. `PushArg` retrieves the Nth parameter, starting from the `offset + 1`. -```moonbit -fn push_arg(self : GState, offset : Int) -> Unit { - // Skip the first super combinator node. - // Access the (offset + 1)th NApp node - let appaddr = @immut/list.unsafe_nth(self.stack, offset + 1) - let arg = match self.heap[appaddr] { - NApp(_, arg) => arg - otherwise => - abort( - "push_arg: stack offset \{offset} address \{appaddr} node \{otherwise}, not a applicative node" - ) - } - self.putStack(arg) -} +```{literalinclude} /sources/gmachine/src/part1/vm.mbt +:language: moonbit +:start-after: start push_arg definition +:end-before: end push_arg definition ``` The `MkApp` instruction takes two addresses from the top of the stack, constructs an `NApp` node, and pushes its address onto the stack. -```moonbit -fn mkapp(self : GState) -> Unit { - let (a1, a2) = self.pop2() - let appaddr = self.heap.alloc(NApp(a1, a2)) - self.putStack(appaddr) -} +```{literalinclude} /sources/gmachine/src/part1/vm.mbt +:language: moonbit +:start-after: start mk_apply definition +:end-before: end mk_apply definition ``` The `Update` instruction assumes that the first address on the stack points to the current redex's evaluation result. It skips the addresses of the immediately following super combinator nodes and replaces the Nth `NApp` node with an indirect node pointing to the evaluation result. If the current redex is a CAF, it directly replaces its corresponding `NGlobal` node on the heap. From this, we can see why in lazy functional languages, there is not much distinction between functions without parameters and ordinary variables. -```moonbit -fn update(self : GState, n : Int) -> Unit { - let addr = self.pop1() - let dst = @immut/list.unsafe_nth(self.stack, n) - self.heap[dst] = NInd(addr) -} +```{literalinclude} /sources/gmachine/src/part1/vm.mbt +:language: moonbit +:start-after: start update definition +:end-before: end update definition ``` The `Unwind` instruction in the G-Machine is akin to an evaluation loop. It has several branching conditions based on the type of node corresponding to the address at the top of the stack: @@ -516,32 +352,10 @@ The `Unwind` instruction in the G-Machine is akin to an evaluation loop. It has - For `NGlobal` nodes: If there are enough parameters on the stack, load this super combinator into the current code. - For `NInd` nodes: Push the address contained within this indirect node onto the stack and Unwind again. -```moonbit -fn unwind(self : GState) -> Unit { - let addr = self.pop1() - match self.heap[addr] { - NNum(_) => self.putStack(addr) - NApp(a1, _) => { - self.putStack(addr) - self.putStack(a1) - self.putCode(Cons(Unwind, Nil)) - } - NGlobal(_, n, c) => { - if self.stack.length() < n { - abort("Unwinding with too few arguments") - } else { - self.putStack(addr) - self.putCode(c) - } - } - NInd(a) => { - self.putStack(a) - self.putCode(Cons(Unwind, Nil)) - } - otherwise => - abort("unwind() : wrong kind of node \{otherwise}, address \{addr}") - } -} +```{literalinclude} /sources/gmachine/src/part1/vm.mbt +:language: moonbit +:start-after: start unwind definition +:end-before: end unwind definition ``` The `Pop` instruction pops N addresses, eliminating the need for a separate function implementation. @@ -559,23 +373,10 @@ When compiling a super combinator, we need to maintain an environment that allow > Here, "parameters" refer to addresses pointing to App nodes on the heap, and the actual parameter addresses can be accessed through the pusharg instruction. -```moonbit -fn compileSC(self : ScDef[String]) -> (String, Int, List[Instruction]) { - let name = self.name - let body = self.body - let mut arity = 0 - fn gen_env(i : Int, args : List[String]) -> List[(String, Int)] { - match args { - Nil => { - arity = i - return Nil - } - Cons(s, ss) => Cons((s, i), gen_env(i + 1, ss)) - } - } - let env = gen_env(0, self.args) - (name, arity, compileR(body, env, arity)) -} +```{literalinclude} /sources/gmachine/src/part1/compile.mbt +:language: moonbit +:start-after: start compile_sc definition +:end-before: end compile_sc definition ``` The `compileR` function generates code for instantiating super combinators by calling the `compileC` function, and then appends three instructions: @@ -584,135 +385,52 @@ The `compileR` function generates code for instantiating super combinators by ca - `Pop(N)`: Clears the stack of redundant addresses. - `Unwind`: Searches for the next redex to start the next reduction. -```moonbit -fn compileR( - self : RawExpr[String], - env : List[(String, Int)], - arity : Int -) -> List[Instruction] { - if arity == 0 { - // The Pop 0 instruction does nothing in practice, - // so it is not generated when the arity is 0. - compileC(self, env).concat(@immut/list.of([Update(arity), Unwind])) - } else { - compileC(self, env).concat( - @immut/list.of([Update(arity), Pop(arity), Unwind]), - ) - } -} +```{literalinclude} /sources/gmachine/src/part1/compile.mbt +:language: moonbit +:start-after: start compile_r definition +:end-before: end compile_r definition ``` When compiling the definition of super combinators, a rather crude approach is used: if a variable is not a parameter, it is treated as another super combinator (writing it incorrectly will result in a runtime error). For function application, the right-hand expression is compiled first, then all offsets corresponding to parameters in the environment are incremented (because an extra address pointing to the instantiated right-hand expression is added to the top of the stack), then the left-hand expression is compiled, and finally the `MkApp` instruction is added. -```moonbit -fn compileC( - self : RawExpr[String], - env : List[(String, Int)] -) -> List[Instruction] { - match self { - Var(s) => - match lookupENV(env, s) { - None => @immut/list.of([PushGlobal(s)]) - Some(n) => @immut/list.of([PushArg(n)]) - } - Num(n) => @immut/list.of([PushInt(n)]) - App(e1, e2) => - compileC(e2, env) - .concat(compileC(e1, argOffset(1, env))) - .concat(@immut/list.of([MkApp])) - _ => abort("not support yet") - } -} +```{literalinclude} /sources/gmachine/src/part1/compile.mbt +:language: moonbit +:start-after: start compile_c definition +:end-before: end compile_c definition ``` ## Running the G-Machine Once the super combinators are compiled, they need to be placed on the heap (along with adding their addresses to the global table). This can be done recursively. -```moonbit -fn buildInitialHeap(scdefs : List[(String, Int, List[Instruction])]) -> (GHeap, RHTable[String, Addr]) { - let heap = { objectCount : 0, memory : Array::make(10000, None) } - let globals = RHTable::new(50) - fn go(lst : List[(String, Int, List[Instruction])]) { - match lst { - Nil => () - Cons((name, arity, instrs), rest) => { - let addr = heap.alloc(NGlobal(name, arity, instrs)) - globals[name] = addr - go(rest) - } - } - } - go(scdefs) - return (heap, globals) -} +```{literalinclude} /sources/gmachine/src/part1/vm.mbt +:language: moonbit +:start-after: start build_ih definition +:end-before: end build_ih definition ``` Define a function "step" that updates the state of the G-Machine by one step, returning false if the final state has been reached. -```moonbit -fn step(self : GState) -> Bool { - match self.code { - Nil => return false - Cons(i, is) => { - self.code = is - self.statInc() - match i { - PushGlobal(f) => self.push_global(f) - PushInt(n) => self.push_int(n) - PushArg(n) => self.push_arg(n) - MkApp => self.mkapp() - Unwind => self.unwind() - Update(n) => self.update(n) - Pop(n) => self.stack = self.stack.drop(n) - } // without the need for additional functions - return true - } - } -} +```{literalinclude} /sources/gmachine/src/part1/vm.mbt +:language: moonbit +:start-after: start step definition +:end-before: end step definition ``` Additionally, define a function "reify" that continuously executes the "step" function until the final state is reached. -```moonbit -fn reify(self : GState) -> Unit { - if self.step() { - self.reify() - } else { - let stack = self.stack - match stack { - Cons(addr, Nil) => { - let res = self.heap[addr] - println("\{res}") - } - _ => abort("wrong stack \{stack}") - } - } -} +```{literalinclude} /sources/gmachine/src/part1/vm.mbt +:language: moonbit +:start-after: start reify definition +:end-before: end reify definition ``` Combine the above components. -```moonbit -fn run(codes : List[String]) -> Unit { - fn parse_then_compile(code : String) -> (String, Int, List[Instruction]) { - let code = TokenStream::new(code) - let code = parseSC(code) - let code = compileSC(code) - return code - } - - let codes = codes.map(parse_then_compile).concat(preludeDefs.map(compileSC)) - let (heap, globals) = buildInitialHeap(codes) - let initialState : GState = { - heap, - stack: Nil, - code: initialCode, - globals, - stats: initialStat, - } - initialState.reify() -} +```{literalinclude} /sources/gmachine/src/part1/top.mbt +:language: moonbit +:start-after: start run definition +:end-before: end run definition ``` ## Conclusion From 5a9545766821548039f5577746c3b18fa4e2edec Mon Sep 17 00:00:00 2001 From: myfreess Date: Thu, 5 Dec 2024 15:03:10 +0800 Subject: [PATCH 4/4] G-Machine Part2 --- next/sources/gmachine/src/part2/ast.mbt | 87 +++++ next/sources/gmachine/src/part2/compile.mbt | 183 +++++++++ .../gmachine/src/part2/instruction.mbt | 26 ++ next/sources/gmachine/src/part2/moon.pkg.json | 1 + next/sources/gmachine/src/part2/programs.mbt | 41 ++ next/sources/gmachine/src/part2/syntax.mbt | 332 ++++++++++++++++ next/sources/gmachine/src/part2/top.mbt | 37 ++ next/sources/gmachine/src/part2/vm.mbt | 358 ++++++++++++++++++ next/tutorial/example/gmachine/gmachine-2.md | 213 +++-------- 9 files changed, 1113 insertions(+), 165 deletions(-) create mode 100644 next/sources/gmachine/src/part2/ast.mbt create mode 100644 next/sources/gmachine/src/part2/compile.mbt create mode 100644 next/sources/gmachine/src/part2/instruction.mbt create mode 100644 next/sources/gmachine/src/part2/moon.pkg.json create mode 100644 next/sources/gmachine/src/part2/programs.mbt create mode 100644 next/sources/gmachine/src/part2/syntax.mbt create mode 100644 next/sources/gmachine/src/part2/top.mbt create mode 100644 next/sources/gmachine/src/part2/vm.mbt diff --git a/next/sources/gmachine/src/part2/ast.mbt b/next/sources/gmachine/src/part2/ast.mbt new file mode 100644 index 00000000..65cdbe18 --- /dev/null +++ b/next/sources/gmachine/src/part2/ast.mbt @@ -0,0 +1,87 @@ +typealias List[E] = @immut/list.T[E] + + +enum RawExpr[T] { + Var(T) + Num(Int) + Constructor(tag~:Int, arity~:Int) // tag, arity + App(RawExpr[T], RawExpr[T]) + Let(Bool, List[(T, RawExpr[T])], RawExpr[T]) // isRec, Defs, Body + Case(RawExpr[T], List[(Int, List[T], RawExpr[T])]) +} derive(Show) + +struct ScDef[T] { + name : String + args : List[T] + body : RawExpr[T] +} derive(Show) + +fn is_atom[T](self : RawExpr[T]) -> Bool { + match self { + Var(_) => true + Num(_) => true + _ => false + } +} + +fn binders_of[L, R](l : List[(L, R)]) -> List[L] { + fn fst(pair) { + let (l, _) = pair + return l + } + + l.map(fst) +} + +fn rhss_of[L, R](l : List[(L, R)]) -> List[R] { + fn snd(pair) { + let (_, r) = pair + return r + } + + l.map(snd) +} + +fn ScDef::new[T]( + name : String, + args : List[T], + body : RawExpr[T] +) -> ScDef[T] { + { name : name, args : args, body : body } +} + +let prelude_defs : List[ScDef[String]] = { + let args : (FixedArray[String]) -> List[String] = List::of + let id = ScDef::new("I", args(["x"]), Var("x")) // id x = x + let k = + ScDef::new( + "K", + args(["x", "y"]), + Var("x") + ) // K x y = x + let k1 = + ScDef::new( + "K1", + args(["x", "y"]), + Var("y") + ) // K1 x y = y + let s = + ScDef::new( + "S", + args(["f", "g", "x"]), + App(App(Var("f"), Var("x")), App(Var("g"), Var("x"))) + ) // S f g x = f x (g x) + let compose = + ScDef::new( + "compose", + args(["f", "g", "x"]), + App(Var("f"), App(Var("g"), Var("x"))) + ) // compose f g x = f (g x) + let twice = + ScDef::new( + "twice", + args(["f"]), + App(App(Var("compose"), Var("f")), Var("f")) + ) // twice f = compose f f + List::of([id, k, k1, s, compose, twice]) +} \ No newline at end of file diff --git a/next/sources/gmachine/src/part2/compile.mbt b/next/sources/gmachine/src/part2/compile.mbt new file mode 100644 index 00000000..307b11b8 --- /dev/null +++ b/next/sources/gmachine/src/part2/compile.mbt @@ -0,0 +1,183 @@ +fn compileSC(self : ScDef[String]) -> (String, Int, List[Instruction]) { + let name = self.name + let body = self.body + let mut arity = 0 + fn gen_env(i : Int, args : List[String]) -> List[(String, Int)] { + match args { + Nil => { + arity = i + return Nil + } + Cons(s, ss) => Cons((s, i), gen_env(i + 1, ss)) + } + } + + let env = gen_env(0, self.args) + (name, arity, compileR(body, env, arity)) +} + + +fn compileR( + self : RawExpr[String], + env : List[(String, Int)], + arity : Int +) -> List[Instruction] { + if arity == 0 { + compileC(self, env) + List::of([Update(arity), Unwind]) + } else { + compileC(self, env) + List::of([Update(arity), Pop(arity), Unwind]) + } +} + +fn compileC( + self : RawExpr[String], + env : List[(String, Int)] +) -> List[Instruction] { + match self { + Var(s) => + match env.lookup(s) { + None => List::of([PushGlobal(s)]) + Some(n) => List::of([Push(n)]) + } + Num(n) => List::of([PushInt(n)]) + App(e1, e2) => + compileC(e2, env) + compileC(e1, argOffset(1, env)) + List::of([MkApp]) + Let(rec, defs, e) => + if rec { + compileLetrec(compileC, defs, e, env) + } else { + compileLet(compileC, defs, e, env) + } + _ => abort("not support yet") + } +} + +fn argOffset(n : Int, env : List[(String, Int)]) -> List[(String, Int)] { + env.map(fn { (name, offset) => (name, offset + n) }) +} + +// start compile_let definition +fn compileLet( + comp : (RawExpr[String], List[(String, Int)]) -> List[Instruction], + defs : List[(String, RawExpr[String])], + expr : RawExpr[String], + env : List[(String, Int)] +) -> List[Instruction] { + let (env, codes) = loop env, List::Nil, defs { + env, acc, Nil => (env, acc) + env, acc, Cons((name, expr), rest) => { + let code = compileC(expr, env) + let env = List::Cons((name, 0), argOffset(1, env)) + continue env, acc + code, rest + } + } + codes + comp(expr, env) + List::of([Slide(defs.length())]) +} +// end compile_let definition + +// start compile_letrec definition +fn compileLetrec( + comp : (RawExpr[String], List[(String, Int)]) -> List[Instruction], + defs : List[(String, RawExpr[String])], + expr : RawExpr[String], + env : List[(String, Int)] +) -> List[Instruction] { + let mut env = env + loop defs { + Nil => () + Cons((name, _), rest) => { + env = Cons((name, 0), argOffset(1, env)) + continue rest + } + } + let n = defs.length() + fn compileDefs( + defs : List[(String, RawExpr[String])], + offset : Int + ) -> List[Instruction] { + match defs { + Nil => comp(expr, env) + List::of([Slide(n)]) + Cons((_, expr), rest) => + compileC(expr, env) + + Cons(Update(offset), compileDefs(rest, offset - 1)) + } + } + + Cons(Alloc(n), compileDefs(defs, n - 1)) +} +// end compile_letrec definition + +// start prim definition +let compiled_primitives : List[(String, Int, List[Instruction])] = List::of( + [ + // Arith + ( + "add", + 2, + List::of([Push(1), Eval, Push(1), Eval, Add, Update(2), Pop(2), Unwind]), + ), + ( + "sub", + 2, + List::of([Push(1), Eval, Push(1), Eval, Sub, Update(2), Pop(2), Unwind]), + ), + ( + "mul", + 2, + List::of([Push(1), Eval, Push(1), Eval, Mul, Update(2), Pop(2), Unwind]), + ), + ( + "div", + 2, + List::of([Push(1), Eval, Push(1), Eval, Div, Update(2), Pop(2), Unwind]), + ), + // Compare + ( + "eq", + 2, + List::of([Push(1), Eval, Push(1), Eval, Eq, Update(2), Pop(2), Unwind]), + ), + ( + "neq", + 2, + List::of([Push(1), Eval, Push(1), Eval, Ne, Update(2), Pop(2), Unwind]), + ), + ( + "ge", + 2, + List::of([Push(1), Eval, Push(1), Eval, Ge, Update(2), Pop(2), Unwind]), + ), + ( + "gt", + 2, + List::of([Push(1), Eval, Push(1), Eval, Gt, Update(2), Pop(2), Unwind]), + ), + ( + "le", + 2, + List::of([Push(1), Eval, Push(1), Eval, Le, Update(2), Pop(2), Unwind]), + ), + ( + "lt", + 2, + List::of([Push(1), Eval, Push(1), Eval, Lt, Update(2), Pop(2), Unwind]), + ), + // MISC + ("negate", 1, List::of([Push(0), Eval, Neg, Update(1), Pop(1), Unwind])), + ( + "if", + 3, + List::of( + [ + Push(0), + Eval, + Cond(List::of([Push(1)]), List::of([Push(2)])), + Update(3), + Pop(3), + Unwind, + ], + ), + ), + ], +) +// end prim definition \ No newline at end of file diff --git a/next/sources/gmachine/src/part2/instruction.mbt b/next/sources/gmachine/src/part2/instruction.mbt new file mode 100644 index 00000000..d98a6542 --- /dev/null +++ b/next/sources/gmachine/src/part2/instruction.mbt @@ -0,0 +1,26 @@ +// start instr definition +enum Instruction { + Unwind + PushGlobal(String) + PushInt(Int) + Push(Int) + MkApp + Slide(Int) + Update(Int) + Pop(Int) + Alloc(Int) + Eval + Add + Sub + Mul + Div + Neg + Eq // == + Ne // != + Lt // < + Le // <= + Gt // > + Ge // >= + Cond(List[Instruction], List[Instruction]) +} derive (Eq, Show) +// end instr definition \ No newline at end of file diff --git a/next/sources/gmachine/src/part2/moon.pkg.json b/next/sources/gmachine/src/part2/moon.pkg.json new file mode 100644 index 00000000..9e26dfee --- /dev/null +++ b/next/sources/gmachine/src/part2/moon.pkg.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/next/sources/gmachine/src/part2/programs.mbt b/next/sources/gmachine/src/part2/programs.mbt new file mode 100644 index 00000000..bb6b38b3 --- /dev/null +++ b/next/sources/gmachine/src/part2/programs.mbt @@ -0,0 +1,41 @@ +let programs : @hashmap.T[String, String] = { + let programs = @hashmap.new(capacity=40) + programs["square"] = + #| (defn square[x] (mul x x)) + programs["fix"] = + #| (defn fix[f] (letrec ([x (f x)]) x)) + programs["isNil"] = + #| (defn isNil[x] + #| (case x [(Nil) 1] [(Cons n m) 0])) + programs["tail"] = + #| (defn tail[l] (case l [(Cons x xs) xs])) + programs["fibs"] = + // fibs = 0 : 1 : zipWith (+) fibs (tail fibs) + #| (defn fibs[] (Cons 0 (Cons 1 (zipWith add fibs (tail fibs))))) + programs["take"] = + #| (defn take[n l] + #| (case l + #| [(Nil) Nil] + #| [(Cons x xs) + #| (if (le n 0) Nil (Cons x (take (sub n 1) xs)))])) + programs["zipWith"] = + #| (defn zipWith[op l1 l2] + #| (case l1 + #| [(Nil) Nil] + #| [(Cons x xs) + #| (case l2 + #| [(Nil) Nil] + #| [(Cons y ys) (Cons (op x y) (zipWith op xs ys))])])) + programs["factorial"] = + #| (defn factorial[n] + #| (if (eq n 0) 1 (mul n (factorial (sub n 1))))) + programs["abs"] = + #| (defn abs[n] + #| (if (lt n 0) (negate n) n)) + programs["length"] = + #| (defn length[l] + #| (case l + #| [(Nil) 0] + #| [(Cons x xs) (add 1 (length xs))])) + programs +} diff --git a/next/sources/gmachine/src/part2/syntax.mbt b/next/sources/gmachine/src/part2/syntax.mbt new file mode 100644 index 00000000..3a59bf26 --- /dev/null +++ b/next/sources/gmachine/src/part2/syntax.mbt @@ -0,0 +1,332 @@ +enum Token { + DefFn + Let + NIL + CONS + Case + Letrec + Open(Char) // { [ ( + Close(Char) // } ] ) + Id(String) + Number(Int) + EOF +} derive(Eq, Show) + +fn between(this : Char, lw : Char, up : Char) -> Bool { + this >= lw && this <= up +} + +fn isDigit(this : Char) -> Bool { + between(this, '0', '9') +} + +fn isAlpha(this : Char) -> Bool { + between(this, 'A', 'Z') || between(this, 'a', 'z') +} + +fn isIdChar(this : Char) -> Bool { + isAlpha(this) || isDigit(this) || this == '_' || this == '-' +} + +fn isWhiteSpace(this : Char) -> Bool { + this == ' ' || this == '\t' || this == '\n' +} + +fn to_number(this : Char) -> Int { + this.to_int() - 48 +} + +fn isOpen(this : Char) -> Bool { + this == '(' || this == '[' || this == '{' +} + +fn isClose(this : Char) -> Bool { + this == ')' || this == ']' || this == '}' +} + +struct Tokens { + tokens : Array[Token] + mut current : Int +} derive(Show) + +fn Tokens::new(tokens : Array[Token]) -> Tokens { + Tokens::{ tokens, current: 0 } +} + +fn peek(self : Tokens) -> Token { + if self.current < self.tokens.length() { + return self.tokens[self.current] + } else { + return EOF + } +} + +type! ParseError String + +fn next(self : Tokens, loc~ : SourceLoc = _) -> Unit { + self.current = self.current + 1 + if self.current > self.tokens.length() { + abort("Tokens::next(): \{loc}") + } +} + +fn eat(self : Tokens, tok : Token, loc~ : SourceLoc = _) -> Unit!ParseError { + let __tok = self.peek() + // assert tok_ != EOF + if __tok != tok { + raise ParseError("\{loc} - Tokens::eat(): expect \{tok} but got \{__tok}") + } else { + self.next() + } +} + +fn tokenize(source : String) -> Tokens { + let tokens : Array[Token] = Array::new(capacity=source.length() / 2) + let mut current = 0 + let source = source.to_array() + fn peek() -> Char { + source[current] + } + + fn next() -> Unit { + current = current + 1 + } + + while current < source.length() { + let ch = peek() + if isWhiteSpace(ch) { + next() + continue + } else if isDigit(ch) { + let mut num = to_number(ch) + next() + while current < source.length() && isDigit(peek()) { + num = num * 10 + to_number(peek()) + next() + } + tokens.push(Number(num)) + continue + } else if isOpen(ch) { + next() + tokens.push(Open(ch)) + continue + } else if isClose(ch) { + next() + tokens.push(Close(ch)) + continue + } else if isAlpha(ch) { + let identifier = @buffer.new(size_hint=42) + identifier.write_char(ch) + next() + while current < source.length() && isIdChar(peek()) { + identifier.write_char(peek()) + next() + } + let identifier = identifier.to_unchecked_string() + match identifier { + "let" => tokens.push(Let) + "letrec" => tokens.push(Letrec) + "Nil" => tokens.push(NIL) + "Cons" => tokens.push(CONS) + "case" => tokens.push(Case) + "defn" => tokens.push(DefFn) + _ => tokens.push(Id(identifier)) + } + } else { + abort("error : invalid Character '\{ch}' in [\{current}]") + } + } else { + return Tokens::new(tokens) + } +} + +test "tokenize" { + inspect!(tokenize("").tokens, content="[]") + inspect!(tokenize("12345678").tokens, content="[Number(12345678)]") + inspect!(tokenize("1234 5678").tokens, content="[Number(1234), Number(5678)]") + inspect!( + tokenize("a0 a_0 a-0").tokens, + content= + #|[Id("a0"), Id("a_0"), Id("a-0")] + , + ) + inspect!( + tokenize("(Cons 0 (Cons 1 Nil))").tokens, + content="[Open('('), CONS, Number(0), Open('('), CONS, Number(1), NIL, Close(')'), Close(')')]", + ) +} + +fn parse_num(self : Tokens) -> Int!ParseError { + match self.peek() { + Number(n) => { + self.next() + return n + } + other => raise ParseError("parse_num(): expect a number but got \{other}") + } +} + +fn parse_var(self : Tokens) -> String!ParseError { + match self.peek() { + Id(s) => { + self.next() + return s + } + other => raise ParseError("parse_var(): expect a variable but got \{other}") + } +} + +fn parse_cons(self : Tokens) -> RawExpr[String]!ParseError { + match self.peek() { + CONS => { + self.next() + let x = self.parse_expr!() + let xs = self.parse_expr!() + return App(App(Constructor(tag=1, arity=2), x), xs) + } + other => raise ParseError("parse_cons(): expect Cons but got \{other}") + } +} + +fn parse_let(self : Tokens) -> RawExpr[String]!ParseError { + self.eat!(Let) + self.eat!(Open('(')) + let defs = self.parse_defs!() + self.eat!(Close(')')) + let exp = self.parse_expr!() + Let(false, defs, exp) +} + +fn parse_letrec(self : Tokens) -> RawExpr[String]!ParseError { + self.eat!(Letrec) + self.eat!(Open('(')) + let defs = self.parse_defs!() + self.eat!(Close(')')) + let exp = self.parse_expr!() + Let(true, defs, exp) +} + +fn parse_case(self : Tokens) -> RawExpr[String]!ParseError { + self.eat!(Case) + let exp = self.parse_expr!() + let alts = self.parse_alts!() + Case(exp, alts) +} + +fn parse_alts( + self : Tokens +) -> List[(Int, List[String], RawExpr[String])]!ParseError { + let acc : List[(Int, List[String], RawExpr[String])] = Nil + loop self.peek(), acc { + Open('['), acc => { + self.next() + self.eat!(Open('(')) + let (tag, variables) = match self.peek() { + NIL => { + self.next() + (0, List::Nil) + } + CONS => { + self.next() + let x = self.parse_var!() + let xs = self.parse_var!() + (1, List::of([x, xs])) + } + other => + raise ParseError("parse_alts(): expect NIL or CONS but got \{other}") + } + self.eat!(Close(')')) + let exp = self.parse_expr!() + let alt = (tag, variables, exp) + self.eat!(Close(']')) + continue self.peek(), Cons(alt, acc) + } + _, acc => acc.rev() + } +} + +fn parse_defs(self : Tokens) -> List[(String, RawExpr[String])]!ParseError { + let acc : List[(String, RawExpr[String])] = Nil + loop self.peek(), acc { + Open('['), acc => { + self.next() + let var = self.parse_var!() + let value = self.parse_expr!() + self.eat!(Close(']')) + continue self.peek(), Cons((var, value), acc) + } + _, acc => acc.rev() + } +} + +fn parse_apply(self : Tokens) -> RawExpr[String]!ParseError { + let mut res = self.parse_expr!() + while self.peek() != Close(')') { + res = App(res, self.parse_expr!()) + } + return res +} + +fn parse_expr(self : Tokens) -> RawExpr[String]!ParseError { + match self.peek() { + EOF => + raise ParseError( + "parse_expr() : expect a token but got a empty token stream", + ) + Number(n) => { + self.next() + Num(n) + } + Id(s) => { + self.next() + Var(s) + } + NIL => { + self.next() + Constructor(tag=0, arity=0) + } + Open('(') => { + self.next() + let exp = match self.peek() { + Let => self.parse_let!() + Letrec => self.parse_letrec!() + Case => self.parse_case!() + CONS => self.parse_cons!() + Id(_) | Open('(') => self.parse_apply!() + other => + raise ParseError("parse_expr(): cant parse \{other} behind a '('") + } + self.eat!(Close(')')) + return exp + } + other => raise ParseError("parse_expr(): cant parse \{other}") + } +} + +fn parse_sc(self : Tokens) -> ScDef[String]!ParseError { + self.eat!(Open('(')) + self.eat!(DefFn) + let fn_name = self.parse_var!() + self.eat!(Open('[')) + let args = loop self.peek(), List::Nil { + tok, acc => + if tok != Close(']') { + let var = self.parse_var!() + continue self.peek(), Cons(var, acc) + } else { + acc.rev() + } + } + self.eat!(Close(']')) + let body = self.parse_expr!() + self.eat!(Close(')')) + ScDef::{ name: fn_name, args, body } +} + +test "parse scdef" { + let test_ = fn!(s) { ignore(tokenize(s).parse_sc!()) } + for p in programs { + let (_, p) = p + test_!(p) + } +} diff --git a/next/sources/gmachine/src/part2/top.mbt b/next/sources/gmachine/src/part2/top.mbt new file mode 100644 index 00000000..7eef55e9 --- /dev/null +++ b/next/sources/gmachine/src/part2/top.mbt @@ -0,0 +1,37 @@ +fn run(codes : List[String]) -> Node { + fn parse_then_compile(code : String) -> (String, Int, List[Instruction]) { + let tokens = tokenize(code) + let code = + try { + tokens.parse_sc!() + } catch { + ParseError(s) => abort(s) + } else { + expr => expr + } + let code = compileSC(code) + return code + } + let codes = codes.map(parse_then_compile) + prelude_defs.map(compileSC) + let codes = compiled_primitives + codes + let (heap, globals) = build_initial_heap(codes) + let initialState : GState = { + heap : heap, + stack : Nil, + // start init definition + code : List::of([PushGlobal("main"), Eval]), + // end init definition + globals : globals, + stats : 0, + dump : Nil + } + initialState.reify() +} + + +test "basic eval" { + let main = "(defn main[] (let ([add1 (add 1)]) (add1 1)))" + inspect!(run(List::of([main])), content = "NNum(2)") + let main = "(defn main[] (let ([x 4] [y 5]) (sub x y)))" + inspect!(run(List::of([main])), content = "NNum(-1)") +} \ No newline at end of file diff --git a/next/sources/gmachine/src/part2/vm.mbt b/next/sources/gmachine/src/part2/vm.mbt new file mode 100644 index 00000000..6f0d6a93 --- /dev/null +++ b/next/sources/gmachine/src/part2/vm.mbt @@ -0,0 +1,358 @@ + +// Use the 'type' keyword to encapsulate an address type. +type Addr Int derive(Eq, Show) + +// Describe graph nodes with an enumeration type. +enum Node { + NNum(Int) + // The application node + NApp(Addr, Addr) + // To store the number of parameters and + // the corresponding sequence of instructions for a super combinator + NGlobal(String, Int, List[Instruction]) + // The Indirection node,The key component of implementing lazy evaluation + NInd(Addr) +} derive(Eq, Show) + +struct GHeap { + // The heap uses an array, + // and the space with None content in the array is available as free memory. + mut object_count : Int + memory : Array[Node?] +} + +// Allocate heap space for nodes. +fn alloc(self : GHeap, node : Node) -> Addr { + let heap = self + fn next(n : Int) -> Int { + (n + 1) % heap.memory.length() + } + + fn free(i : Int) -> Bool { + match heap.memory[i] { + None => true + _ => false + } + } + + let mut i = heap.object_count + while not(free(i)) { + i = next(i) + } + heap.memory[i] = Some(node) + heap.object_count = heap.object_count + 1 + return Addr(i) +} + + +fn op_get(self : GHeap, key : Addr) -> Node { + let Addr(i) = key + match self.memory[i] { + Some(node) => node + None => abort("GHeap::get(): index \{i} was empty") + } +} + +fn op_set(self : GHeap, key : Addr, val : Node) -> Unit { + self.memory[key._] = Some(val) +} + + +struct GState { + mut stack : List[Addr] + heap : GHeap + globals : @hashmap.T[String, Addr] + mut dump : List[(List[Instruction], List[Addr])] + mut code : List[Instruction] + mut stats : GStats +} + +type GStats Int + +fn stat_incr(self : GState) -> Unit { + self.stats = self.stats._ + 1 +} + +fn put_stack(self : GState, addr : Addr) -> Unit { + self.stack = Cons(addr, self.stack) +} + +fn put_dump(self : GState, codes : List[Instruction], stack : List[Addr]) -> Unit { + self.dump = Cons((codes, stack), self.dump) +} + +fn put_code(self : GState, instrs : List[Instruction]) -> Unit { + self.code = instrs + self.code +} + +fn pop1(self : GState) -> Addr { + match self.stack { + Cons(addr, reststack) => { + self.stack = reststack + addr + } + Nil => abort("pop1(): stack size smaller than 1") + } +} + +// e1 e2 ..... -> (e1, e2) ...... +fn pop2(self : GState) -> (Addr, Addr) { + match self.stack { + Cons(addr1, Cons(addr2, reststack)) => { + self.stack = reststack + (addr1, addr2) + } + _ => abort("pop2(): stack size smaller than 2") + } +} + +fn push_int(self : GState, num : Int) -> Unit { + let addr = self.heap.alloc(NNum(num)) + self.put_stack(addr) +} + +fn push_global(self : GState, name : String) -> Unit { + let sc = self.globals[name] + match sc { + None => abort("push_global(): cant find supercombinator \{name}") + Some(addr) => self.put_stack(addr) + } +} + +// start push definition +fn push(self : GState, offset : Int) -> Unit { + // Push(n) a0 : . . . : an : s + // => an : a0 : . . . : an : s + let addr = self.stack.unsafe_nth(offset) + self.put_stack(addr) +} +// end push definition + +// start slide definition +fn slide(self : GState, n : Int) -> Unit { + let addr = self.pop1() + self.stack = Cons(addr, self.stack.drop(n)) +} +// end slide definition + +// start rearrange definition +fn rearrange(self : GState, n : Int) -> Unit { + let appnodes = self.stack.take(n) + let args = appnodes.map(fn (addr) { + let NApp(_, arg) = self.heap[addr] + arg + }) + self.stack = args + appnodes.drop(n - 1) +} +// end rearrange definition + +fn mk_apply(self : GState) -> Unit { + let (a1, a2) = self.pop2() + let appaddr = self.heap.alloc(NApp(a1, a2)) + self.put_stack(appaddr) +} +fn update(self : GState, n : Int) -> Unit { + let addr = self.pop1() + let dst = self.stack.unsafe_nth(n) + self.heap[dst] = NInd(addr) +} + +// start unwind definition +fn unwind(self : GState) -> Unit { + let addr = self.pop1() + match self.heap[addr] { + NNum(_) => { + match self.dump { + Nil => self.put_stack(addr) + Cons((instrs, stack), rest_dump) => { + self.stack = stack + self.put_stack(addr) + self.dump = rest_dump + self.code = instrs + } + } + } + NApp(a1, _) => { + self.put_stack(addr) + self.put_stack(a1) + self.put_code(List::of([Unwind])) + } + NGlobal(_, n, c) => { + let k = self.stack.length() + if k < n { + match self.dump { + Nil => abort("Unwinding with too few arguments") + Cons((i, s), rest) => { + // a1 : ...... : ak + // || + // ak : s + self.stack = self.stack.drop(k - 1) + s + self.dump = rest + self.code = i + } + } + } else { + if n != 0 { + self.rearrange(n) + } else { + self.put_stack(addr) + } + self.put_code(c) + } + } + NInd(a) => { + self.put_stack(a) + self.put_code(List::of([Unwind])) + } + otherwise => + abort("unwind() : wrong kind of node \{otherwise}, address \{addr}") + } +} +// end unwind definition + +// start alloc definition +fn alloc_nodes(self : GState, n : Int) -> Unit { + let dummynode : Node = NInd(Addr(-1)) + for i = 0; i < n; i = i + 1 { + let addr = self.heap.alloc(dummynode) + self.put_stack(addr) + } +} +// end alloc definition + +// start eval definition +fn eval(self : GState) -> Unit { + let addr = self.pop1() + self.put_dump(self.code, self.stack) + self.stack = List::of([addr]) + self.code = List::of([Unwind]) +} +// end eval definition + +// start cond definition +fn condition(self : GState, i1 : List[Instruction], i2 : List[Instruction]) -> Unit { + let addr = self.pop1() + match self.heap[addr] { + NNum(0) => { + // false + self.code = i2 + self.code + } + NNum(1) => { + // true + self.code = i1 + self.code + } + otherwise => abort("cond : \{addr} = \{otherwise}") + } +} +// end cond definition + +// start op definition +fn negate(self : GState) -> Unit { + let addr = self.pop1() + match self.heap[addr] { + NNum(n) => { + let addr = self.heap.alloc(NNum(-n)) + self.put_stack(addr) + } + otherwise => { + abort("negate: wrong kind of node \{otherwise}, address \{addr}") + } + } +} + +fn lift_arith2(self : GState, op : (Int, Int) -> Int) -> Unit { + let (a1, a2) = self.pop2() + match (self.heap[a1], self.heap[a2]) { + (NNum(n1), NNum(n2)) => { + let newnode = Node::NNum(op(n1, n2)) + let addr = self.heap.alloc(newnode) + self.put_stack(addr) + } + (node1, node2) => abort("liftArith2: \{a1} = \{node1} \{a2} = \{node2}") + } +} + +fn lift_cmp2(self : GState, op : (Int, Int) -> Bool) -> Unit { + let (a1, a2) = self.pop2() + match (self.heap[a1], self.heap[a2]) { + (NNum(n1), NNum(n2)) => { + let flag = op(n1, n2) + let newnode = if flag { Node::NNum(1) } else { Node::NNum(0) } + let addr = self.heap.alloc(newnode) + self.put_stack(addr) + } + (node1, node2) => abort("liftCmp2: \{a1} = \{node1} \{a2} = \{node2}") + } +} +// end op definition + +fn build_initial_heap( + scdefs : List[(String, Int, List[Instruction])] +) -> (GHeap, @hashmap.T[String, Addr]) { + let heap = { object_count: 0, memory: Array::make(10000, None) } + let globals = @hashmap.new(capacity=50) + loop scdefs { + Nil => () + Cons((name, arity, instrs), rest) => { + let addr = heap.alloc(NGlobal(name, arity, instrs)) + globals[name] = addr + continue rest + } + } + return (heap, globals) +} + + +// start step definition +fn step(self : GState) -> Bool { + match self.code { + Nil => return false + Cons(i, is) => { + self.code = is + self.stat_incr() + match i { + PushGlobal(f) => self.push_global(f) + PushInt(n) => self.push_int(n) + Push(n) => self.push(n) + MkApp => self.mk_apply() + Unwind => self.unwind() + Update(n) => self.update(n) + Pop(n) => self.stack = self.stack.drop(n) + Alloc(n) => self.alloc_nodes(n) + Eval => self.eval() + Slide(n) => self.slide(n) + Add => self.lift_arith2(fn (x, y) { x + y}) + Sub => self.lift_arith2(fn (x, y) { x - y}) + Mul => self.lift_arith2(fn (x, y) { x * y}) + Div => self.lift_arith2(fn (x, y) { x / y}) + Neg => self.negate() + Eq => self.lift_cmp2(fn (x, y) { x == y }) + Ne => self.lift_cmp2(fn (x, y) { x != y }) + Lt => self.lift_cmp2(fn (x, y) { x < y }) + Le => self.lift_cmp2(fn (x, y) { x <= y }) + Gt => self.lift_cmp2(fn (x, y) { x > y }) + Ge => self.lift_cmp2(fn (x, y) { x >= y }) + Cond(i1, i2) => self.condition(i1, i2) + } + return true + } + } +} +// end step definition + + +fn reify(self : GState) -> Node { + if self.step() { + self.reify() + } else { + let stack = self.stack + match stack { + Cons(addr, Nil) => { + let res = self.heap[addr] + return res + } + _ => abort("wrong stack \{stack}") + } + } +} + diff --git a/next/tutorial/example/gmachine/gmachine-2.md b/next/tutorial/example/gmachine/gmachine-2.md index 824dcd18..d1639b9c 100644 --- a/next/tutorial/example/gmachine/gmachine-2.md +++ b/next/tutorial/example/gmachine/gmachine-2.md @@ -36,29 +36,20 @@ Before implementing `let` (and the more complex `letrec`), we first need to modi The adjustment is done in the implementation of the `Unwind` instruction. If the supercombinator has no parameters, it is the same as the original unwind. When there are parameters, the top address of the supercombinator node is discarded, and the `rearrange` function is called. -```moonbit -fn rearrange(self : GState, n : Int) -> Unit { - let appnodes = take(self.stack, n) - let args = map(fn (addr) { - let NApp(_, arg) = self.heap[addr] - arg - }, appnodes) - self.stack = append(args, drop(appnodes, n - 1)) -} +```{literalinclude} /sources/gmachine/src/part2/vm.mbt +:language: moonbit +:start-after: start rearrange definition +:end-before: end rearrange definition ``` The `rearrange` function assumes that the first N addresses on the stack point to a series of `NApp` nodes. It keeps the bottommost one (used as Redex update), cleans up the top N-1 addresses, and then places N addresses that directly point to the parameters. After this, both parameters and local variables can be accessed using the same command by changing the `PushArg` instruction to a more general `Push` instruction. -```rust -fn push(self : GState, offset : Int) -> Unit { - // Copy the address at offset + 1 to the top of the stack - // Push(n) a0 : . . . : an : s - // => an : a0 : . . . : an : s - let appaddr = nth(self.stack, offset) - self.putStack(appaddr) -} +```{literalinclude} /sources/gmachine/src/part2/vm.mbt +:language: moonbit +:start-after: start push definition +:end-before: end push definition ``` The next issue is that we need something to clean up. Consider the following expression: @@ -83,42 +74,28 @@ After constructing the graph corresponding to the expression `expr`, the stack s Therefore, we need a new instruction to clean up these no longer needed addresses. It is called `Slide`. As the name suggests, the function of `Slide(n)` is to skip the first address and delete the following N addresses. -```rust -fn slide(self : GState, n : Int) -> Unit { - let addr = self.pop1() - self.stack = Cons(addr, drop(self.stack, n)) -} +```{literalinclude} /sources/gmachine/src/part2/vm.mbt +:language: moonbit +:start-after: start slide definition +:end-before: end slide definition ``` Now we can compile `let`. We will compile the expressions corresponding to local variables using the `compileC` function. Then, traverse the list of variable definitions (`defs`), compile and update the corresponding offsets in order. Finally, use the passed `comp` function to compile the main expression and add the `Slide` instruction to clean up the unused addresses. > Compiling the main expression using the passed function makes it easy to reuse when adding subsequent features. -```rust -fn compileLet(comp : (RawExpr[String], List[(String, Int)]) -> List[Instruction], defs : List[(String, RawExpr[String])], expr : RawExpr[String], env : List[(String, Int)]) -> List[Instruction] { - let (env, codes) = loop env, List::Nil, defs { - env, acc, Nil => (env, acc) - env, acc, Cons((name, expr), rest) => { - let code = compileC(expr, env) - // Update offsets and add offsets for local variables corresponding to name - let env = List::Cons((name, 0), argOffset(1, env)) - continue env, append(acc, code), rest - } - } - append(codes, append(comp(expr, env), List::[Slide(length(defs))])) -} +```{literalinclude} /sources/gmachine/src/part2/compile.mbt +:language: moonbit +:start-after: start compile_let definition +:end-before: end compile_let definition ``` The semantics of `letrec` are more complex - it allows the N variables within the expression to reference each other, so we need to pre-allocate N addresses and place them on the stack. We need a new instruction: `Alloc(N)`, which pre-allocates N `NInd` nodes and pushes the addresses onto the stack sequentially. The addresses in these indirect nodes are negative and only serve as placeholders. -```rust -fn allocNodes(self : GState, n : Int) -> Unit { - let dummynode : Node = NInd(Addr(-1)) - for i = 0; i < n; i = i + 1 { - let addr = self.heap.alloc(dummynode) - self.putStack(addr) - } -} +```{literalinclude} /sources/gmachine/src/part2/vm.mbt +:language: moonbit +:start-after: start alloc definition +:end-before: end alloc definition ``` The steps to compile letrec are similar to `let`: @@ -128,24 +105,10 @@ The steps to compile letrec are similar to `let`: - Compile the local variables in `defs`, using the `Update` instruction to update the results to the pre-allocated addresses after compiling each one. - Compile the main expression and use the `Slide` instruction to clean up. -```rust -fn compileLetrec(comp : (RawExpr[String], List[(String, Int)]) -> List[Instruction], defs : List[(String, RawExpr[String])], expr : RawExpr[String], env : List[(String, Int)]) -> List[Instruction] { - let env = loop env, defs { - env, Nil => env - env, Cons((name, _), rest) => { - let env = List::Cons((name, 0), argOffset(1, env)) - continue env, rest - } - } - let n = length(defs) - fn compileDefs(defs : List[(String, RawExpr[String])], offset : Int) -> List[Instruction] { - match defs { - Nil => append(comp(expr, env), List::[Slide(n)]) - Cons((_, expr), rest) => append(compileC(expr, env), Cons(Update(offset), compileDefs(rest, offset - 1))) - } - } - Cons(Alloc(n), compileDefs(defs, n - 1)) -} +```{literalinclude} /sources/gmachine/src/part2/compile.mbt +:language: moonbit +:start-after: start compile_letrec definition +:end-before: end compile_letrec definition ``` ## Adding Primitives @@ -201,130 +164,50 @@ The implementation of the `Eval` instruction is not complicated: > This is similar to how strict evaluation languages handle saving caller contexts, but practical implementations would use more efficient methods. -```rust -fn eval(self : GState) -> Unit { - let addr = self.pop1() - self.putDump(self.code, self.stack) - self.stack = List::[addr] - self.code = List::[Unwind] -} +```{literalinclude} /sources/gmachine/src/part2/vm.mbt +:language: moonbit +:start-after: start eval definition +:end-before: end eval definition ``` This simple definition requires modifying the `Unwind` instruction to restore the context when `Unwind` in the `NNum` branch finds that there is a recoverable context (`dump` is not empty). -```rust -fn unwind(self : GState) -> Unit { - let addr = self.pop1() - match self.heap[addr] { - NNum(_) => { - match self.dump { - Nil => self.putStack(addr) - Cons((instrs, stack), restDump) => { - // Restore the stack - self.stack = stack - self.putStack(addr) - self.dump = restDump - // Return to original code execution - self.code = instrs - } - } - } - ...... - } -} +```{literalinclude} /sources/gmachine/src/part2/vm.mbt +:language: moonbit +:start-after: start unwind definition +:end-before: end unwind definition ``` Next, we need to implement arithmetic and comparison instructions. We use two functions to simplify the form of binary operations. The result of the comparison instruction is a boolean value, and for simplicity, we use numbers to represent it: 0 for `false`, 1 for `true`. -```rust -fn liftArith2(self : GState, op : (Int, Int) -> Int) -> Unit { - // Binary arithmetic operations - let (a1, a2) = self.pop2() - match (self.heap[a1], self.heap[a2]) { - (NNum(n1), NNum(n2)) => { - let newnode = Node::NNum(op(n1, n2)) - let addr = self.heap.alloc(newnode) - self.putStack(addr) - } - (node1, node2) => abort("liftArith2: \{a1} = \{node1} \{a2} = \{node2}") - } -} - -fn liftCmp2(self : GState, op : (Int, Int) -> Bool) -> Unit { - // Binary comparison operations - let (a1, a2) = self.pop2() - match (self.heap[a1], self.heap[a2]) { - (NNum(n1), NNum(n2)) => { - let flag = op(n1, n2) - let newnode = if flag { Node::NNum(1) } else { Node::NNum(0) } - let addr = self.heap.alloc(newnode) - self.putStack(addr) - } - (node1, node2) => abort("liftCmp2: \{a1} = \{node1} \{a2} = \{node2}") - } -} - -// Implement negation separately -fn negate(self : GState) -> Unit { - let addr = self.pop1() - match self.heap[addr] { - NNum(n) => { - let addr = self.heap.alloc(NNum(-n)) - self.putStack(addr) - } - otherwise => { - // If not NNum, throw an error - abort("negate: wrong kind of node \{otherwise}, address \{addr} ") - } - } -} +```{literalinclude} /sources/gmachine/src/part2/vm.mbt +:language: moonbit +:start-after: start op definition +:end-before: end op definition ``` Finally, implement branching: -```rust -fn condition(self : GState, i1 : List[Instruction], i2 : List[Instruction]) -> Unit { - let addr = self.pop1() - match self.heap[addr] { - NNum(0) => { - // If false, jump to i2 - self.code = append(i2, self.code) - } - NNum(1) => { - // If true, jump to i1 - self.code = append(i1, self.code) - } - otherwise => abort("cond : \{addr} = \{otherwise}") - } -} +```{literalinclude} /sources/gmachine/src/part2/vm.mbt +:language: moonbit +:start-after: start cond definition +:end-before: end cond definition ``` No major adjustments are needed in the compilation part, just add some predefined programs: -```rust -let compiledPrimitives : List[(String, Int, List[Instruction])] = List::[ - // Arithmetic - ("add", 2, List::[Push(1), Eval, Push(1), Eval, Add, Update(2), Pop(2), Unwind]), - ("sub", 2, List::[Push(1), Eval, Push(1), Eval, Sub, Update(2), Pop(2), Unwind]), - ("mul", 2, List::[Push(1), Eval, Push(1), Eval, Mul, Update(2), Pop(2), Unwind]), - ("div", 2, List::[Push(1), Eval, Push(1), Eval, Div, Update(2), Pop(2), Unwind]), - // Comparison - ("eq", 2, List::[Push(1), Eval, Push(1), Eval, Eq, Update(2), Pop(2), Unwind]), - ("neq", 2, List::[Push(1), Eval, Push(1), Eval, Ne, Update(2), Pop(2), Unwind]), - ("ge", 2, List::[Push(1), Eval, Push(1), Eval, Ge, Update(2), Pop(2), Unwind]), - ("gt", 2, List::[Push(1), Eval, Push(1), Eval, Gt, Update(2), Pop(2), Unwind]), - ("le", 2, List::[Push(1), Eval, Push(1), Eval, Le, Update(2), Pop(2), Unwind]), - ("lt", 2, List::[Push(1), Eval, Push(1), Eval, Lt, Update(2), Pop(2), Unwind]), - // Miscellaneous - ("negate", 1, List::[Push(0), Eval, Neg, Update(1), Pop(1), Unwind]), - ("if", 3, List::[Push(0), Eval, Cond(List::[Push(1)], List::[Push(2)]), Update(3), Pop(3), Unwind]) -] +```{literalinclude} /sources/gmachine/src/part2/compile.mbt +:language: moonbit +:start-after: start prim definition +:end-before: end prim definition ``` and modify the initial instruction sequence -```rust -let initialCode : List[Instruction] = List::[PushGlobal("main"), Eval] +```{literalinclude} /sources/gmachine/src/part2/top.mbt +:language: moonbit +:start-after: start init definition +:end-before: end init definition ``` ## Conclusion