From a4c63c57328c3f81f7bc74e2667ca491d168fcfc Mon Sep 17 00:00:00 2001 From: Mateusz Russak Date: Thu, 30 Nov 2023 17:06:42 +0100 Subject: [PATCH 1/7] feat: first draft on compiler and VM --- Cargo.lock | 10 ++ Cargo.toml | 3 + crates/compiler/Cargo.toml | 15 ++ crates/compiler/src/lib.rs | 260 +++++++++++++++++++++++++++++++++ crates/compiler/src/simple.as | 43 ++++++ luac.out | Bin 0 -> 94 bytes test.lua | 1 + tests/compute_compiled_test.rs | 42 ++++++ 8 files changed, 374 insertions(+) create mode 100644 crates/compiler/Cargo.toml create mode 100644 crates/compiler/src/lib.rs create mode 100644 crates/compiler/src/simple.as create mode 100644 luac.out create mode 100644 test.lua create mode 100644 tests/compute_compiled_test.rs diff --git a/Cargo.lock b/Cargo.lock index cbf7dae..c002a20 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -219,6 +219,14 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "compiler" +version = "0.1.0" +dependencies = [ + "lexer", + "parser", +] + [[package]] name = "dashmap" version = "5.5.3" @@ -438,6 +446,8 @@ checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" name = "loom" version = "0.1.0" dependencies = [ + "cli", + "compiler", "interpreter", "lexer", "parser", diff --git a/Cargo.toml b/Cargo.toml index ae35259..fff7cad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ members = [ "crates/lexer", "crates/parser", "crates/interpreter", + "crates/compiler", "bins/lsp", "bins/cli", "tree-sitter-loom", @@ -19,5 +20,7 @@ members = [ lexer = { path = "./crates/lexer" } parser = { path = "./crates/parser" } interpreter = { path = "./crates/interpreter" } +compiler = { path = "./crates/compiler" } +cli = { path = "./bins/cli" } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/crates/compiler/Cargo.toml b/crates/compiler/Cargo.toml new file mode 100644 index 0000000..464ac4d --- /dev/null +++ b/crates/compiler/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "compiler" +version = "0.1.0" +edition = "2021" + +[lib] +tests = true +path = "src/lib.rs" +crate-type = ["lib"] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +lexer = { path = "../lexer" } +parser = { path = "../parser" } diff --git a/crates/compiler/src/lib.rs b/crates/compiler/src/lib.rs new file mode 100644 index 0000000..8bc2590 --- /dev/null +++ b/crates/compiler/src/lib.rs @@ -0,0 +1,260 @@ +use std::{fmt::{Display, Formatter}}; + +use lexer::PError; +use parser::{Node, Op, Value}; + +#[derive(Debug, Clone, Copy)] +enum OpCode { + Move, + Load, + Load0, + Load1, + Store, + Exit, + Add, + Sub, + Mul, + Div, +} + +impl Display for OpCode { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + OpCode::Move => write!(f, "Move"), + OpCode::Load => write!(f, "Load"), + OpCode::Load0 => write!(f, "Load0"), + OpCode::Load1 => write!(f, "Load1"), + OpCode::Store => write!(f, "Store"), + OpCode::Exit => write!(f, "Pop"), + OpCode::Add => write!(f, "Add"), + OpCode::Sub => write!(f, "Sub"), + OpCode::Mul => write!(f, "Mul"), + OpCode::Div => write!(f, "Div"), + } + } +} + +impl From for OpCode { + fn from(val: u8) -> OpCode { + match val { + 0 => OpCode::Move, + 1 => OpCode::Load, + 2 => OpCode::Load0, + 3 => OpCode::Load1, + 4 => OpCode::Store, + 5 => OpCode::Exit, + 6 => OpCode::Add, + 7 => OpCode::Sub, + 8 => OpCode::Mul, + 9 => OpCode::Div, + _ => panic!("Unknown op code: {}", val), + } + } +} + +struct Instruction { + op_code: OpCode, + arg0: Option, + arg1: Option, + arg2: Option, +} + + +impl Instruction { + pub fn new(op_code: OpCode) -> Instruction { + Instruction { + op_code, + arg0: None, + arg1: None, + arg2: None, + } + } + pub fn with_arg0(mut self, arg0: u8) -> Instruction { + self.arg0 = Some(arg0); + self + } + pub fn with_arg1(mut self, arg1: u8) -> Instruction { + self.arg1 = Some(arg1); + self + } + pub fn with_arg2(mut self, arg2: u8) -> Instruction { + self.arg2 = Some(arg2); + self + } + pub fn to_bytes(&self) -> Vec { + let mut bytes = Vec::new(); + bytes.push(self.op_code as u8); + if let Some(arg0) = self.arg0 { + bytes.push(arg0); + } + if let Some(arg1) = self.arg1 { + bytes.push(arg1); + } + if let Some(arg2) = self.arg2 { + bytes.push(arg2); + } + bytes + } +} + + +struct Code { + code: Vec, +} + +impl Code { + pub fn new() -> Code { + Code { + code: Vec::new(), + } + } + pub fn push(&mut self, inst: Instruction) { + self.code.push(inst); + } + pub fn to_bytes(&self) -> Vec { + self.code.iter().flat_map(|i| i.to_bytes()).collect::>() + } +} + +struct Mem { + data: Vec, + memptr: usize, +} + +impl Mem { + pub fn new() -> Mem { + Mem { + data: Vec::new(), + memptr: 0, + } + } + pub fn write(&mut self, val: u32) -> usize { + let location = self.memptr; + for i in 0..4 { + self.data[self.memptr + i] = (val >> (i * 8)) as u8; + + } + self.memptr += 4; + location + } + + pub fn to_bytes(&self) -> Vec { + self.data.clone() + } +} +impl Display for Mem { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "Mem: {:?}", self.data) + } +} + +pub fn compile_node(node: &Node, mut consts: &mut Mem, inst: &mut Code) -> Result<(), PError> { + match node.op { + Op::Add => { + compile_node(&node.children[0], &mut consts, inst)?; + compile_node(&node.children[1], &mut consts, inst)?; + inst.push(Instruction::new(OpCode::Add)); + }, + Op::Value => { + match &node.value { + Some(Value::Number(0)) => inst.push(Instruction::new(OpCode::Load0)), + Some(Value::Number(1)) => inst.push(Instruction::new(OpCode::Load1)), + Some(Value::Number(val)) => { + let addr = consts.write(*val as u32); + if addr < 256 { + inst.push(Instruction::new(OpCode::Load).with_arg0(addr.try_into().unwrap())); + } + }, + Some(..) => { + panic!("Unknown value: {}", node); + }, + None => panic!("No value"), + } + }, + Op::Scope => { + for child in &node.children { + compile_node(child, &mut consts, inst)?; + } + inst.push(Instruction::new(OpCode::Exit)); + }, + _ => { + panic!("Unknown op: {}", node); + } + } + Ok(()) +} + +pub fn compile(node: &Node) -> Result, PError> { + let mut inst = Code::new(); + let mut consts = Mem::new(); + compile_node(node, &mut consts, &mut inst)?; + let code = inst.to_bytes(); + let mem = consts.to_bytes(); + let mut bytes = Vec::new(); + bytes.extend_from_slice(&code); + bytes.extend_from_slice(&mem); + Ok(bytes) +} + +pub struct VM { + regs: Vec, + stack: Vec, + pc: usize, +} + + +impl VM { + pub fn new() -> VM { + VM { + regs: Vec::new(), + stack: Vec::new(), + pc: 0, + } + } + pub fn run(&mut self, bytes: Vec) -> Result { + let byte = bytes[self.pc]; + + let op_code = OpCode::from(byte); + match op_code { + OpCode::Load1 => { + self.regs[0] = 1; + }, + OpCode::Exit => { + return Ok(self.regs[0]); + }, + _ => { + panic!("Unknown op code: {}", op_code); + } + } + + Ok(0) + } +} + +#[cfg(test)] +mod tests { + use parser::Node; + use lexer::Location; + + use super::*; + + #[test] + fn compile_simple() { + let mut node = Node::new(Op::Scope, Location::Eof); + node.add(Node::new(Op::Value, Location::Eof).set_value(1.into())); + let bytes = compile(&node).unwrap(); + assert_eq!(bytes, vec![3, 5]); + } + + #[test] + fn compile_binary_op() { + let mut node = Node::new(Op::Scope, Location::Eof); + let mut add = Node::new(Op::Add, Location::Eof); + add.add(Node::new(Op::Value, Location::Eof).set_value(1.into())); + add.add(Node::new(Op::Value, Location::Eof).set_value(2.into())); + node.add(add); + let bytes = compile(&node).unwrap(); + assert_eq!(bytes, vec![3, 2, 0, 6, 5]); + } +} + diff --git a/crates/compiler/src/simple.as b/crates/compiler/src/simple.as new file mode 100644 index 0000000..f55b8c0 --- /dev/null +++ b/crates/compiler/src/simple.as @@ -0,0 +1,43 @@ +mem: + a: 123 + b: 2 + c: "Hello World" + d: "print" + +main: + load a + load b + add + load c + load d + call 3 + +```loom +fn gdc (a,b){ + while(b!=0) { + t = b; + b = a % b; + a = t; + }; + a +} +``` + +gdc: + movl -2 r1 + movl -1 r2 +while_start: + neqz r1 gdc1 + mov r1 r3 + mod r1 r2 r1 + mov r2 r1 + jmp while_start +while_end: + mov r1 r0 + ret 1 + + + + + + diff --git a/luac.out b/luac.out new file mode 100644 index 0000000000000000000000000000000000000000..ec7dea1c99d7182f6520ca9a35965c0be19c8219 GIT binary patch literal 94 zcmb34DNPJvketlRCB?K5HWMF7; iVPIl(V_;-#WNA&U$Si4O1d6o+2_V(b*c$KZ7X|=4s}n;2 literal 0 HcmV?d00001 diff --git a/test.lua b/test.lua new file mode 100644 index 0000000..63a3a69 --- /dev/null +++ b/test.lua @@ -0,0 +1 @@ +exit(1) diff --git a/tests/compute_compiled_test.rs b/tests/compute_compiled_test.rs new file mode 100644 index 0000000..feff994 --- /dev/null +++ b/tests/compute_compiled_test.rs @@ -0,0 +1,42 @@ +use lexer::{Token,PError,Tokenizer}; + +fn parse(text: &str) -> Result { + let tokens:Vec = Tokenizer::new(text).collect::, PError>>()?; + let mut iter = tokens.iter(); + parser::parse(&mut iter) +} + +#[macro_export] +macro_rules! test_return_code{ + ( $name:ident, $i:expr, $o:expr ) => { + #[test] + fn $name() { + let text = $i; + let node = parse(text).unwrap_or_else(|e| { + panic!("\nError:\n{}\n", e.format_error($i, "file.lum", false)); + }); + //println!("{}", node); + let bytes = compiler::compile(&node).unwrap_or_else(|e| { + panic!("\nError:\n{}\n", e.format_error($i, "file.lum", false)); + }); + + let mut vm = compiler::VM::new(); + let val = vm.run(bytes).unwrap_or_else(|e| { + panic!("\nError:\n{}\n", e.format_error($i, "file.lum", false)); + }); + assert_eq!(val, $o); + } + }; +} + +test_return_code!(simple, "1", 1); +/* +test_return_code!(simple, "1 + 2", 3); +test_return_code!(with_braces, "2 * (3 + 4) ", 14); +test_return_code!(with_variable, "asd = 4; 2 * (3 + asd) ", 14); +test_return_code!(with_two_variables, "qwe=3; asd = 4; 2 * (qwe + asd) ", 14); +test_return_code!(conditional_positive, "if(1){5}else{7}", 5); +test_return_code!(conditional_negative, "if(0){5}else{7}", 7); +test_return_code!(strings_multiplication, "qwe='oko'; qwe*3; 7", 7); +test_return_code!(while_loop, "a=0; while(a!=2) a = a + 1; a", 2); +*/ From aca4271ddac505ab1cc39334af9ad9cdc9974f7f Mon Sep 17 00:00:00 2001 From: Mateusz Russak Date: Sat, 2 Dec 2023 02:07:44 +0100 Subject: [PATCH 2/7] chore: fun with macros --- crates/compiler/src/lib.rs | 39 +------- crates/compiler/src/vm.rs | 194 +++++++++++++++++++++++++++++++++++++ 2 files changed, 197 insertions(+), 36 deletions(-) create mode 100644 crates/compiler/src/vm.rs diff --git a/crates/compiler/src/lib.rs b/crates/compiler/src/lib.rs index 8bc2590..6388852 100644 --- a/crates/compiler/src/lib.rs +++ b/crates/compiler/src/lib.rs @@ -1,10 +1,12 @@ +mod vm; use std::{fmt::{Display, Formatter}}; use lexer::PError; use parser::{Node, Op, Value}; +pub use vm::VM; #[derive(Debug, Clone, Copy)] -enum OpCode { +pub enum OpCode { Move, Load, Load0, @@ -196,41 +198,6 @@ pub fn compile(node: &Node) -> Result, PError> { Ok(bytes) } -pub struct VM { - regs: Vec, - stack: Vec, - pc: usize, -} - - -impl VM { - pub fn new() -> VM { - VM { - regs: Vec::new(), - stack: Vec::new(), - pc: 0, - } - } - pub fn run(&mut self, bytes: Vec) -> Result { - let byte = bytes[self.pc]; - - let op_code = OpCode::from(byte); - match op_code { - OpCode::Load1 => { - self.regs[0] = 1; - }, - OpCode::Exit => { - return Ok(self.regs[0]); - }, - _ => { - panic!("Unknown op code: {}", op_code); - } - } - - Ok(0) - } -} - #[cfg(test)] mod tests { use parser::Node; diff --git a/crates/compiler/src/vm.rs b/crates/compiler/src/vm.rs new file mode 100644 index 0000000..9c65855 --- /dev/null +++ b/crates/compiler/src/vm.rs @@ -0,0 +1,194 @@ +use crate::OpCode; +use lexer::PError; + +macro_rules! make_instr_set { + ($p:ident; $($inst:ident($($arg:ty),*; $c:expr; $($name:ident[$a:expr;$b:expr]),+; $($n:ident = $dec:expr),*)),+ ) => { + enum $p { + $( + $inst($($arg),*) + ),+ + } + impl $p{ + fn byte_description(&self) -> (u8, [u8;4]) { + match self { + $( + $p::$inst(..) => make_instr!($c; $($name[$a;$b]),+; $($n = $dec),*), + )+ + } + } + } + }; +} + +make_instr_set!( + Lum32; + Load(u8,u16; 4; op[29;24], target[23;16], val[15;0]; op = 0b11000001), + Load0(u8; 2; op[31;24], target[23;16]; op = 0b01000001), + Load1(u8; 2; op[31;24], target[23;16]; op = 0b01000010), + Exit(; 1; op[31;24]; op = 0b00000000), + Store(u8; 2; op[31;24], target[23;16]; op = 0b01000011) +); + + +enum Instr { + Load(u8,u16), + Load0(u8), + Load1(u8), + Store(u8), + Exit, +} + +macro_rules! make_instr { + ($c:expr; $($name:ident[$a:expr;$b:expr]),+; $($n:ident = $dec:expr),*) => { + { + $( + let $n = $dec; + )* + let mut result: u32 = 0; + $( + result |= { + let mask: u32 = (1 << ($a)-($b)+1) - 1; + (mask & $name as u32) << $b + }; + )+ + ($c, result.to_be_bytes()) + } + }; +} +/* +macro_rules! make_instr_parser { + ($fn_name:ident; $($name:ident[$a:expr;$b:expr]),+) => { + $( + let $name: u32 = { + let mask: u32 = (1 << ($a)-($b)+1) - 1; + let c = val >> $b; + c & mask; + }; + )+ + match op { + 0b11000001 => Instr::Load(target, val), + 0b01000001 => Instr::Load0(target), + 0b01000010 => Instr::Load1(target), + 0b00000000 => Instr::Exit, + 0b01000011 => Instr::Store(target), + _ => panic!("Unknown op code: {}", op), + } + } + }; +} +*/ +impl Instr { + fn byte_description(self) -> (u8, [u8;4]) { + match self { + Instr::Load(target, val) => make_instr!(4; op[29;24], target[23;16], val[15;0]; op = 0b11000001), + Instr::Load0(target) => make_instr!(2; op[31;24], target[23;16]; op = 0b01000001), + Instr::Load1(target) => make_instr!(2; op[31;24], target[23;16]; op = 0b01000010), + Instr::Exit => make_instr!(1; op[31;24]; op = 0b00000000), + Instr::Store(target) => make_instr!(2; op[31;24], target[23;16]; op = 0b01000011), + } + } +} + +struct Instrs (Vec<(u8,[u8;4])>); + +impl Into> for Instrs { + fn into(self) -> Vec { + let mut bytes = Vec::new(); + for (count, instr) in self.0.iter() { + for b in 0..(*count as usize) { + bytes.push(instr[b]); + } + } + bytes + } +} + + +pub struct VM { + regs: Vec, + stack: Vec, + pc: usize, +} + + +impl VM { + pub fn new() -> VM { + VM { + regs: Vec::new(), + stack: Vec::new(), + pc: 0, + } + } + + pub fn run(&mut self, bytes: Vec) -> Result { + let byte = bytes[self.pc]; + + let op_code = OpCode::from(byte); + match op_code { + OpCode::Load1 => { + self.regs[0] = 1; + }, + OpCode::Exit => { + return Ok(self.regs[0]); + }, + _ => { + panic!("Unknown op code: {}", op_code); + } + } + + Ok(0) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + macro_rules! code { + [$($x:ident$(($($arg:expr),*))?),+] => { + { + let c = Instrs(vec![ + $( + Lum32::$x($($arg),*).byte_description() + ),+ + ]); + c.into() + } + }; + } + + #[test] + fn test_instr() { + let i0:(u8,[u8;4]) = Instr::Load(1, 1).byte_description(); + let i1:(u8,[u8;4]) = Instr::Load0(1).byte_description(); + let i2:(u8,[u8;4]) = Instr::Load1(1).byte_description(); + let i3:(u8,[u8;4]) = Instr::Exit.byte_description(); + assert_eq!(i0, (4, [193,1,0,1])); + assert_eq!(i1, (2, [65,1,0,0])); + assert_eq!(i2, (2, [66,1,0,0])); + assert_eq!(i3, (1, [0,0,0,0])); + } + + #[test] + fn test_code() { + let bytecode: Vec = code![Load(1,1), Load0(1), Load1(1), Exit]; + assert_eq!(bytecode, vec![1,1,0,1,2,1,3,1,4]); + + } + + #[test] + fn test_vm() { + let mut vm = VM::new(); + let bytes:Vec = code![Load(1,1), Load0(1), Load1(1), Exit]; + let result = vm.run(bytes); + assert_eq!(result.unwrap(), 1); + } + + #[test] + fn test_stack_1() { + let mut vm = VM::new(); + let bytes:Vec = code![Load1(1), Store(1), Exit]; + let result = vm.run(bytes); + assert_eq!(vm.stack, vec![1]); + } +} From 1601cc7242a3dd377332647c198bf0fa32fd077d Mon Sep 17 00:00:00 2001 From: Mateusz Russak Date: Sat, 2 Dec 2023 02:32:35 +0100 Subject: [PATCH 3/7] chore: working but i would like to have Exit without param --- crates/compiler/src/vm.rs | 69 +++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/crates/compiler/src/vm.rs b/crates/compiler/src/vm.rs index 9c65855..4882654 100644 --- a/crates/compiler/src/vm.rs +++ b/crates/compiler/src/vm.rs @@ -1,18 +1,46 @@ use crate::OpCode; use lexer::PError; +macro_rules! make_instr { + ($c:expr; $($name:ident[$a:expr;$b:expr]),+; $($n:ident = $dec:expr),*) => { + { + $( + let $n = $dec; + )* + let mut result: u32 = 0; + $( + result |= { + let mask: u32 = (1 << ($a)-($b)+1) - 1; + (mask & $name as u32) << $b + }; + )+ + ($c, result.to_be_bytes()) + } + }; +} macro_rules! make_instr_set { - ($p:ident; $($inst:ident($($arg:ty),*; $c:expr; $($name:ident[$a:expr;$b:expr]),+; $($n:ident = $dec:expr),*)),+ ) => { + ($p:ident; $($inst:ident($($arg:ty),*; $c:expr; op[$a0:expr;$b0:expr] = $opv:expr; $($name:ident[$a:expr;$b:expr]),*; $($n:ident = $dec:expr),*)),+ ) => { enum $p { $( $inst($($arg),*) ),+ } impl $p{ - fn byte_description(&self) -> (u8, [u8;4]) { + fn get_op(&self) -> u8 { + match self { + $( + $p::$inst(..) => $opv, + )+ + } + } + + fn byte_description(self) -> (u8, [u8;4]) { match self { $( - $p::$inst(..) => make_instr!($c; $($name[$a;$b]),+; $($n = $dec),*), + $p::$inst($($name),*) => { + let op = $opv; + make_instr!($c; op[$a0;$b0], $($name[$a;$b]),*; $($n = $dec),*) + }, )+ } } @@ -22,11 +50,11 @@ macro_rules! make_instr_set { make_instr_set!( Lum32; - Load(u8,u16; 4; op[29;24], target[23;16], val[15;0]; op = 0b11000001), - Load0(u8; 2; op[31;24], target[23;16]; op = 0b01000001), - Load1(u8; 2; op[31;24], target[23;16]; op = 0b01000010), - Exit(; 1; op[31;24]; op = 0b00000000), - Store(u8; 2; op[31;24], target[23;16]; op = 0b01000011) + Load(u8,u16; 4; op[29;24]= 0b11000001; target[23;16], val[15;0];), + Load0(u8; 2; op[31;24] = 0b01000001; target[23;16];), + Load1(u8; 2; op[31;24] = 0b01000010; target[23;16];), + Exit(u8; 1; op[31;24] = 0b00000000; empty[1;0];), + Store(u8; 2; op[31;24] = 0b01000011; target[23;16];) ); @@ -38,23 +66,6 @@ enum Instr { Exit, } -macro_rules! make_instr { - ($c:expr; $($name:ident[$a:expr;$b:expr]),+; $($n:ident = $dec:expr),*) => { - { - $( - let $n = $dec; - )* - let mut result: u32 = 0; - $( - result |= { - let mask: u32 = (1 << ($a)-($b)+1) - 1; - (mask & $name as u32) << $b - }; - )+ - ($c, result.to_be_bytes()) - } - }; -} /* macro_rules! make_instr_parser { ($fn_name:ident; $($name:ident[$a:expr;$b:expr]),+) => { @@ -149,7 +160,7 @@ mod tests { { let c = Instrs(vec![ $( - Lum32::$x($($arg),*).byte_description() + Lum32::$x$(($($arg),*))?.byte_description() ),+ ]); c.into() @@ -171,7 +182,7 @@ mod tests { #[test] fn test_code() { - let bytecode: Vec = code![Load(1,1), Load0(1), Load1(1), Exit]; + let bytecode: Vec = code![Load(1,1), Load0(1), Load1(1), Exit(1)]; assert_eq!(bytecode, vec![1,1,0,1,2,1,3,1,4]); } @@ -179,7 +190,7 @@ mod tests { #[test] fn test_vm() { let mut vm = VM::new(); - let bytes:Vec = code![Load(1,1), Load0(1), Load1(1), Exit]; + let bytes:Vec = code![Load(1,1), Load0(1), Load1(1), Exit(1)]; let result = vm.run(bytes); assert_eq!(result.unwrap(), 1); } @@ -187,7 +198,7 @@ mod tests { #[test] fn test_stack_1() { let mut vm = VM::new(); - let bytes:Vec = code![Load1(1), Store(1), Exit]; + let bytes:Vec = code![Load1(1), Store(1), Exit(1)]; let result = vm.run(bytes); assert_eq!(vm.stack, vec![1]); } From b1e095fff31109580b3674cab5bd813118fdb321 Mon Sep 17 00:00:00 2001 From: Mateusz Russak Date: Sun, 3 Dec 2023 00:34:36 +0100 Subject: [PATCH 4/7] feat: virtual machine more instructions supported --- Cargo.lock | 126 ++++++++++++++ crates/compiler/Cargo.toml | 4 + crates/compiler/src/lib.rs | 61 ++----- crates/compiler/src/vm.rs | 205 ----------------------- crates/vm/Cargo.toml | 0 crates/vm/src/instr.rs | 278 +++++++++++++++++++++++++++++++ crates/vm/src/lib.rs | 294 +++++++++++++++++++++++++++++++++ crates/vm/src/op_code.rs | 53 ++++++ tests/compute_compiled_test.rs | 4 +- 9 files changed, 768 insertions(+), 257 deletions(-) delete mode 100644 crates/compiler/src/vm.rs create mode 100644 crates/vm/Cargo.toml create mode 100644 crates/vm/src/instr.rs create mode 100644 crates/vm/src/lib.rs create mode 100644 crates/vm/src/op_code.rs diff --git a/Cargo.lock b/Cargo.lock index c002a20..5965a54 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -223,10 +223,23 @@ dependencies = [ name = "compiler" version = "0.1.0" dependencies = [ + "enum-display", "lexer", + "num", + "num-derive", + "num-traits", "parser", ] +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "dashmap" version = "5.5.3" @@ -240,6 +253,26 @@ dependencies = [ "parking_lot_core", ] +[[package]] +name = "enum-display" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d4df33d54dd1959d177a0e2c2f4e5a8637a3054aa56861ed7e173ad2043fe2" +dependencies = [ + "enum-display-macro", +] + +[[package]] +name = "enum-display-macro" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a0ce3a36047ede676eb0d2721d065beed8410cf4f113f489604d2971331cb378" +dependencies = [ + "convert_case", + "quote", + "syn 1.0.109", +] + [[package]] name = "errno" version = "0.3.7" @@ -516,6 +549,93 @@ dependencies = [ "winapi", ] +[[package]] +name = "num" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-derive" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfb77679af88f8b125209d354a202862602672222e7f2313fdd6dc349bad4712" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.39", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +dependencies = [ + "autocfg", +] + [[package]] name = "num_cpus" version = "1.16.0" @@ -1064,6 +1184,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-segmentation" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" + [[package]] name = "url" version = "2.4.1" diff --git a/crates/compiler/Cargo.toml b/crates/compiler/Cargo.toml index 464ac4d..cf07c6f 100644 --- a/crates/compiler/Cargo.toml +++ b/crates/compiler/Cargo.toml @@ -11,5 +11,9 @@ crate-type = ["lib"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +enum-display = "0.1.3" lexer = { path = "../lexer" } +num = "0.4.1" +num-derive = "0.4.1" +num-traits = "0.2.17" parser = { path = "../parser" } diff --git a/crates/compiler/src/lib.rs b/crates/compiler/src/lib.rs index 6388852..ef7fd96 100644 --- a/crates/compiler/src/lib.rs +++ b/crates/compiler/src/lib.rs @@ -1,58 +1,21 @@ mod vm; +mod op_code; +mod instr; + +extern crate num; +#[macro_use] +extern crate num_derive; +#[macro_use] +extern crate enum_display; + use std::{fmt::{Display, Formatter}}; use lexer::PError; use parser::{Node, Op, Value}; pub use vm::VM; +pub use instr::{Instr, Instrs}; +pub use op_code::OpCode; -#[derive(Debug, Clone, Copy)] -pub enum OpCode { - Move, - Load, - Load0, - Load1, - Store, - Exit, - Add, - Sub, - Mul, - Div, -} - -impl Display for OpCode { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - OpCode::Move => write!(f, "Move"), - OpCode::Load => write!(f, "Load"), - OpCode::Load0 => write!(f, "Load0"), - OpCode::Load1 => write!(f, "Load1"), - OpCode::Store => write!(f, "Store"), - OpCode::Exit => write!(f, "Pop"), - OpCode::Add => write!(f, "Add"), - OpCode::Sub => write!(f, "Sub"), - OpCode::Mul => write!(f, "Mul"), - OpCode::Div => write!(f, "Div"), - } - } -} - -impl From for OpCode { - fn from(val: u8) -> OpCode { - match val { - 0 => OpCode::Move, - 1 => OpCode::Load, - 2 => OpCode::Load0, - 3 => OpCode::Load1, - 4 => OpCode::Store, - 5 => OpCode::Exit, - 6 => OpCode::Add, - 7 => OpCode::Sub, - 8 => OpCode::Mul, - 9 => OpCode::Div, - _ => panic!("Unknown op code: {}", val), - } - } -} struct Instruction { op_code: OpCode, @@ -205,7 +168,6 @@ mod tests { use super::*; - #[test] fn compile_simple() { let mut node = Node::new(Op::Scope, Location::Eof); node.add(Node::new(Op::Value, Location::Eof).set_value(1.into())); @@ -213,7 +175,6 @@ mod tests { assert_eq!(bytes, vec![3, 5]); } - #[test] fn compile_binary_op() { let mut node = Node::new(Op::Scope, Location::Eof); let mut add = Node::new(Op::Add, Location::Eof); diff --git a/crates/compiler/src/vm.rs b/crates/compiler/src/vm.rs deleted file mode 100644 index 4882654..0000000 --- a/crates/compiler/src/vm.rs +++ /dev/null @@ -1,205 +0,0 @@ -use crate::OpCode; -use lexer::PError; -macro_rules! make_instr { - ($c:expr; $($name:ident[$a:expr;$b:expr]),+; $($n:ident = $dec:expr),*) => { - { - $( - let $n = $dec; - )* - let mut result: u32 = 0; - $( - result |= { - let mask: u32 = (1 << ($a)-($b)+1) - 1; - (mask & $name as u32) << $b - }; - )+ - ($c, result.to_be_bytes()) - } - }; -} - -macro_rules! make_instr_set { - ($p:ident; $($inst:ident($($arg:ty),*; $c:expr; op[$a0:expr;$b0:expr] = $opv:expr; $($name:ident[$a:expr;$b:expr]),*; $($n:ident = $dec:expr),*)),+ ) => { - enum $p { - $( - $inst($($arg),*) - ),+ - } - impl $p{ - fn get_op(&self) -> u8 { - match self { - $( - $p::$inst(..) => $opv, - )+ - } - } - - fn byte_description(self) -> (u8, [u8;4]) { - match self { - $( - $p::$inst($($name),*) => { - let op = $opv; - make_instr!($c; op[$a0;$b0], $($name[$a;$b]),*; $($n = $dec),*) - }, - )+ - } - } - } - }; -} - -make_instr_set!( - Lum32; - Load(u8,u16; 4; op[29;24]= 0b11000001; target[23;16], val[15;0];), - Load0(u8; 2; op[31;24] = 0b01000001; target[23;16];), - Load1(u8; 2; op[31;24] = 0b01000010; target[23;16];), - Exit(u8; 1; op[31;24] = 0b00000000; empty[1;0];), - Store(u8; 2; op[31;24] = 0b01000011; target[23;16];) -); - - -enum Instr { - Load(u8,u16), - Load0(u8), - Load1(u8), - Store(u8), - Exit, -} - -/* -macro_rules! make_instr_parser { - ($fn_name:ident; $($name:ident[$a:expr;$b:expr]),+) => { - $( - let $name: u32 = { - let mask: u32 = (1 << ($a)-($b)+1) - 1; - let c = val >> $b; - c & mask; - }; - )+ - match op { - 0b11000001 => Instr::Load(target, val), - 0b01000001 => Instr::Load0(target), - 0b01000010 => Instr::Load1(target), - 0b00000000 => Instr::Exit, - 0b01000011 => Instr::Store(target), - _ => panic!("Unknown op code: {}", op), - } - } - }; -} -*/ -impl Instr { - fn byte_description(self) -> (u8, [u8;4]) { - match self { - Instr::Load(target, val) => make_instr!(4; op[29;24], target[23;16], val[15;0]; op = 0b11000001), - Instr::Load0(target) => make_instr!(2; op[31;24], target[23;16]; op = 0b01000001), - Instr::Load1(target) => make_instr!(2; op[31;24], target[23;16]; op = 0b01000010), - Instr::Exit => make_instr!(1; op[31;24]; op = 0b00000000), - Instr::Store(target) => make_instr!(2; op[31;24], target[23;16]; op = 0b01000011), - } - } -} - -struct Instrs (Vec<(u8,[u8;4])>); - -impl Into> for Instrs { - fn into(self) -> Vec { - let mut bytes = Vec::new(); - for (count, instr) in self.0.iter() { - for b in 0..(*count as usize) { - bytes.push(instr[b]); - } - } - bytes - } -} - - -pub struct VM { - regs: Vec, - stack: Vec, - pc: usize, -} - - -impl VM { - pub fn new() -> VM { - VM { - regs: Vec::new(), - stack: Vec::new(), - pc: 0, - } - } - - pub fn run(&mut self, bytes: Vec) -> Result { - let byte = bytes[self.pc]; - - let op_code = OpCode::from(byte); - match op_code { - OpCode::Load1 => { - self.regs[0] = 1; - }, - OpCode::Exit => { - return Ok(self.regs[0]); - }, - _ => { - panic!("Unknown op code: {}", op_code); - } - } - - Ok(0) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - macro_rules! code { - [$($x:ident$(($($arg:expr),*))?),+] => { - { - let c = Instrs(vec![ - $( - Lum32::$x$(($($arg),*))?.byte_description() - ),+ - ]); - c.into() - } - }; - } - - #[test] - fn test_instr() { - let i0:(u8,[u8;4]) = Instr::Load(1, 1).byte_description(); - let i1:(u8,[u8;4]) = Instr::Load0(1).byte_description(); - let i2:(u8,[u8;4]) = Instr::Load1(1).byte_description(); - let i3:(u8,[u8;4]) = Instr::Exit.byte_description(); - assert_eq!(i0, (4, [193,1,0,1])); - assert_eq!(i1, (2, [65,1,0,0])); - assert_eq!(i2, (2, [66,1,0,0])); - assert_eq!(i3, (1, [0,0,0,0])); - } - - #[test] - fn test_code() { - let bytecode: Vec = code![Load(1,1), Load0(1), Load1(1), Exit(1)]; - assert_eq!(bytecode, vec![1,1,0,1,2,1,3,1,4]); - - } - - #[test] - fn test_vm() { - let mut vm = VM::new(); - let bytes:Vec = code![Load(1,1), Load0(1), Load1(1), Exit(1)]; - let result = vm.run(bytes); - assert_eq!(result.unwrap(), 1); - } - - #[test] - fn test_stack_1() { - let mut vm = VM::new(); - let bytes:Vec = code![Load1(1), Store(1), Exit(1)]; - let result = vm.run(bytes); - assert_eq!(vm.stack, vec![1]); - } -} diff --git a/crates/vm/Cargo.toml b/crates/vm/Cargo.toml new file mode 100644 index 0000000..e69de29 diff --git a/crates/vm/src/instr.rs b/crates/vm/src/instr.rs new file mode 100644 index 0000000..5f67c21 --- /dev/null +++ b/crates/vm/src/instr.rs @@ -0,0 +1,278 @@ +use std::fmt::Display; + +use crate::op_code::OpCode; + +macro_rules! make_instr { + ($c:expr; $($name:ident[$a:expr;$b:expr]),+ $(,)?; $($n:ident = $dec:expr),*) => { + { + $( + let $n = $dec; + )* + let mut result: u32 = 0; + $( + result |= { + let mask: u32 = (1 << ($a)-($b)+1) - 1; + (mask & $name as u32) << $b + }; + )+ + ($c, result.to_be_bytes()) + } + }; +} + +macro_rules! parse_instr { + ($val:ident; tr value -> $name:ident) => { + { + let mask1: u32 = (1 << 8) - 1; + let mask2: u32 = (1 << 16) - 1; + let tr = (($val >> 16) & mask1) as u8; + let value = ($val & mask2) as u16; + Some(Instr::$name(tr, value)) + } + }; + ($val:ident; tr -> $name:ident) => { + { + let mask1: u32 = (1 << 8) - 1; + let tr = (($val >> 16) & mask1) as u8; + Some(Instr::$name(tr)) + } + + }; + ($val:ident; tr r1 r2 -> $name:ident) => { + { + let mask1: u32 = (1 << 8) - 1; + let tr = (($val >> 16) & mask1) as u8; + let r1= (($val >> 8) & mask1) as u8; + let r2 = ($val & mask1) as u8; + Some(Instr::$name(tr,r1, r2)) + } + }; + ($val:ident; tr r1 -> $name:ident) => { + { + let mask1: u32 = (1 << 8) - 1; + let tr = (($val >> 16) & mask1) as u8; + let v1= (($val >> 8) & mask1) as u8; + Some(Instr::$name(tr,v1)) + } + }; + ($val:ident; r1 r2 addr -> $name:ident) => { + { + let mask_r: u32 = (1 << 4) - 1; + let mask16: u32 = (1 << 16) - 1; + let r1 = (($val >> 20) & mask_r) as u8; + let r2 = (($val >> 16) & mask_r) as u8; + let addr = ($val & mask16) as i16; + Some(Instr::$name(r1, r2, addr)) + } + }; + ($val:ident; adr -> $name:ident) => { + { + let neg_mask: u32 = 1 << 23; + let neg: u32 = (-1i32 << 23) as u32; + let mask1: u32 = (1 << 24) - 1; + + + //FIXME: this is terrible but I don't know how to handle it better + let adr= ($val & mask1) as u32; + let adr = if (adr & neg_mask) != 0 { + (neg | adr) as i32 + } else { + adr as i32 + }; + Some(Instr::$name(adr)) + } + }; +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u8)] +pub enum Instr { + Load(u8,u16), + Add(u8,u8,u8), + Load0(u8), + Load1(u8), + Store(u8), + Exit, + Mov(u8,u8), + Sub(u8,u8,u8), + Mul(u8,u8,u8), + Div(u8,u8,u8), + Mod(u8,u8,u8), + And(u8,u8,u8), + Or(u8,u8,u8), + Xor(u8,u8,u8), + Not(u8,u8), + Beq(u8,u8,i16), + Bne(u8,u8,i16), + Jmp(i32), +} + +impl From<&Instr> for OpCode { + fn from(val: &Instr) -> Self { + match val { + Instr::Load(..) => OpCode::Load, + Instr::Load0(..) => OpCode::Load0, + Instr::Load1(..) => OpCode::Load1, + Instr::Exit => OpCode::Exit, + Instr::Store(..) => OpCode::Store, + Instr::Add(..) => OpCode::Add, + Instr::Mov(..) => OpCode::Mov, + Instr::Sub(..) => OpCode::Sub, + Instr::Mul(..) => OpCode::Mul, + Instr::Div(..) => OpCode::Div, + Instr::Mod(..) => OpCode::Mod, + Instr::And(..) => OpCode::And, + Instr::Or(..) => OpCode::Or, + Instr::Xor(..) => OpCode::Xor, + Instr::Not(..) => OpCode::Not, + Instr::Beq(..) => OpCode::Beq, + Instr::Bne(..) => OpCode::Bne, + Instr::Jmp(..) => OpCode::Jmp, + } + } +} + +impl Instr { + pub fn byte_description(self) -> (u8, [u8;4]) { + match self { + Instr::Load(tr, val) => make_instr!(4; op[31;24], tr[23;16], val[15;0]; op = OpCode::Load), + Instr::Load0(tr) => make_instr!(2; op[31;24], tr[23;16]; op = OpCode::Load0), + Instr::Load1(tr) => make_instr!(2; op[31;24], tr[23;16]; op = OpCode::Load1), + Instr::Exit => make_instr!(1; op[31;24]; op = OpCode::Exit), + Instr::Store(tr) => make_instr!(2; op[31;24], tr[23;16]; op = OpCode::Store), + Instr::Add(tr, r1, r2) => make_instr!(4; op[31;24], tr[23;16], r1[15;8], r2[7;0]; op = OpCode::Add), + Instr::Mov(tr, r1) => make_instr!(3; op[31;24], tr[23;16], r1[15;8]; op = OpCode::Mov), + Instr::Sub(tr, r1, r2) => make_instr!(4; op[31;24], tr[23;16], r1[15;8], r2[7;0]; op = OpCode::Sub), + Instr::Mul(tr, r1, r2) => make_instr!(4; op[31;24], tr[23;16], r1[15;8], r2[7;0]; op = OpCode::Mul), + Instr::Div(tr, r1, r2) => make_instr!(4; op[31;24], tr[23;16], r1[15;8], r2[7;0]; op = OpCode::Div), + Instr::Mod(tr, r1, r2) => make_instr!(4; op[31;24], tr[23;16], r1[15;8], r2[7;0]; op = OpCode::Mod), + Instr::And(tr, r1, r2) => make_instr!(4; op[31;24], tr[23;16], r1[15;8], r2[7;0]; op = OpCode::And), + Instr::Or (tr, r1, r2) => make_instr!(4; op[31;24], tr[23;16], r1[15;8], r2[7;0]; op = OpCode::Or), + Instr::Xor(tr, r1, r2) => make_instr!(4; op[31;24], tr[23;16], r1[15;8], r2[7;0]; op = OpCode::Xor), + Instr::Not(tr, r1) => make_instr!(3; op[31;24], tr[23;16], r1[15;8]; op = OpCode::Not), + Instr::Beq(r1,r2,addr) => make_instr!(4; op[31;24], r1[23;20], r2[19;16], addr[15;0]; op = OpCode::Beq), + Instr::Bne(r1,r2,addr) => make_instr!(4; op[31;24], r1[23;20], r2[19;16], addr[15;0]; op = OpCode::Bne), + Instr::Jmp(addr) => make_instr!(4; op[31;24], addr[23;0]; op = OpCode::Jmp), + } + } + + pub fn op_code(&self) -> OpCode { + self.into() + } + + pub fn from_bytes(data: &[u8], pc: usize) -> Option { + let op = data[pc]; + let code: OpCode = op.into(); + let mut bytes = vec![0;4]; + bytes[0] = op; + for i in 1..code.size() { + bytes[i] = data[pc+i]; + } + + println!("val: {:?}", bytes); + let val = u32::from_be_bytes(bytes.try_into().unwrap()); + match code { + OpCode::Load => parse_instr!(val; tr value -> Load), + OpCode::Load0 => parse_instr!(val; tr -> Load0), + OpCode::Load1 => parse_instr!(val; tr -> Load1), + OpCode::Exit=> Some(Instr::Exit), + OpCode::Store => parse_instr!(val; tr -> Store), + OpCode::Add=> parse_instr!(val; tr r1 r2 -> Add), + OpCode::Mov=> parse_instr!(val; tr r1 -> Mov), + OpCode::Sub=> parse_instr!(val; tr r1 r2 -> Sub), + OpCode::Mul=> parse_instr!(val; tr r1 r2 -> Mul), + OpCode::Div=> parse_instr!(val; tr r1 r2 -> Div), + OpCode::Mod=> parse_instr!(val; tr r1 r2 -> Mod), + OpCode::And=> parse_instr!(val; tr r1 r2 -> And), + OpCode::Or=> parse_instr!(val; tr r1 r2 -> Or), + OpCode::Xor=> parse_instr!(val; tr r1 r2 -> Xor), + OpCode::Not=> parse_instr!(val; tr r1 -> Not), + OpCode::Beq=> parse_instr!(val; r1 r2 addr -> Beq), + OpCode::Bne=> parse_instr!(val; r1 r2 addr -> Bne), + OpCode::Jmp=> parse_instr!(val; adr -> Jmp), + } + } + + pub fn size(&self) -> usize { + self.op_code().size() + } +} + +impl Display for Instr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Instr::Load(tr, val) => write!(f, "Load r{} {}", tr, val), + Instr::Load0(tr) => write!(f, "Load0 r{}", tr), + Instr::Load1(tr) => write!(f, "Load1 r{}", tr), + Instr::Exit => write!(f, "Exit"), + Instr::Store(tr) => write!(f, "Store r{}", tr), + Instr::Add(tr, v1, v2) => write!(f, "Add r{} r{} r{}", tr, v1, v2), + Instr::Mov(tr, v1) => write!(f, "Mov r{} r{}", tr, v1), + Instr::Sub(tr, v1, v2) => write!(f, "Sub r{} r{} r{}", tr, v1, v2), + Instr::Mul(tr, v1, v2) => write!(f, "Mul r{} r{} r{}", tr, v1, v2), + Instr::Div(tr, v1, v2) => write!(f, "Div r{} r{} r{}", tr, v1, v2), + Instr::Mod(tr, v1, v2) => write!(f, "Mod r{} r{} r{}", tr, v1, v2), + Instr::And(tr, v1, v2) => write!(f, "And r{} r{} r{}", tr, v1, v2), + Instr::Or(tr, v1, v2) => write!(f, "Or r{} r{} r{}", tr, v1, v2), + Instr::Xor(tr, v1, v2) => write!(f, "Xor r{} r{} r{}", tr, v1, v2), + Instr::Not(tr, v1) => write!(f, "Not r{} r{}", tr, v1), + Instr::Beq(r1,r2,addr) => write!(f, "Beq r{} r{} {}", r1, r2, addr), + Instr::Bne(r1,r2,addr) => write!(f, "Bne r{} r{} {}", r1, r2, addr), + Instr::Jmp(val) => write!(f, "Jmp {}", val), + } + } +} + +pub struct Instrs (pub Vec<(u8,[u8;4])>); + +impl From for Vec { + fn from(val: Instrs) -> Self { + let mut bytes = Vec::new(); + for (count, instr) in val.0.iter() { + for b in 0..(*count as usize) { + bytes.push(instr[b]); + } + } + bytes + } +} + +#[cfg(test)] +mod tests { + use super::*; + + macro_rules! check_instr{ + ($test:ident; $name:ident$(($($args:expr),+))?; $size: expr) => { + #[test] + fn $test() { + + let instrs = Instrs(vec![ + Instr::$name$(($($args),+))?.byte_description() + ]); + let bytes: Vec = instrs.into(); + assert_eq!(Instr::from_bytes(&bytes, 0), Some(Instr::$name$(($($args),+))?)); + assert_eq!(Instr::$name$(($($args),+))?.size(), $size); + } + }; + } + + check_instr!(load; Load(1,2); 4); + check_instr!(load0; Load0(1); 2); + check_instr!(load1; Load1(1); 2); + check_instr!(exit; Exit; 1); + check_instr!(store; Store(1); 2); + check_instr!(add; Add(1,2,3); 4); + check_instr!(mov; Mov(1,2); 3); + check_instr!(sub; Sub(1,2,3); 4); + check_instr!(mul; Mul(1,2,3); 4); + check_instr!(div; Div(1,2,3); 4); + check_instr!(modulo; Mod(1,2,3); 4); + check_instr!(and; And(1,2,3); 4); + check_instr!(or; Or(1,2,3); 4); + check_instr!(xor; Xor(1,2,3); 4); + check_instr!(not; Not(1,2); 3); + check_instr!(beq; Beq(1,2,3); 4); + check_instr!(bne; Bne(1,2,3); 4); + check_instr!(jmp; Jmp(3); 4); + check_instr!(jmp_negative; Jmp(-3); 4); +} diff --git a/crates/vm/src/lib.rs b/crates/vm/src/lib.rs new file mode 100644 index 0000000..2606a52 --- /dev/null +++ b/crates/vm/src/lib.rs @@ -0,0 +1,294 @@ +use std::fmt::Display; + +use lexer::PError; +use crate::{OpCode, Instr, Instrs}; + +macro_rules! parse_instr { + ($val:ident; target value -> $name:ident) => { + { + let mask1: u32 = (1 << 8) - 1; + let mask2: u32 = (1 << 16) - 1; + let target = (($val >> 16) & mask1) as u8; + let value = ($val & mask2) as u16; + Some(Instr::$name(target, value)) + } + }; + ($val:ident; target -> $name:ident) => { + { + let mask1: u32 = (1 << 8) - 1; + let target = (($val >> 16) & mask1) as u8; + Some(Instr::$name(target)) + } + + }; + ($val:ident; target v1 v2 -> $name:ident) => { + { + let mask1: u32 = (1 << 8) - 1; + let target = (($val >> 16) & mask1) as u8; + let v1= (($val >> 8) & mask1) as u8; + let v2 = ($val & mask1) as u8; + Some(Instr::$name(target,v1, v2)) + } + }; + ($val:ident; target v1 -> $name:ident) => { + { + let mask1: u32 = (1 << 8) - 1; + let target = (($val >> 16) & mask1) as u8; + let v1= (($val >> 8) & mask1) as u8; + Some(Instr::$name(target,v1)) + } + }; +} + +pub struct VM { + prog: Vec, + regs: Vec, + stack: Vec, + pc: usize, +} + +impl Display for VM { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "VM: regs: {:?}, stack: {:?}", self.regs, self.stack) + } +} + + + +impl VM { + pub fn new(prog: Vec) -> VM { + VM { + prog, + regs: vec![0; 32], + stack: Vec::new(), + pc: 0, + } + } + + + fn next(&mut self) -> Option { + Instr::from_bytes(&self.prog, self.pc).map(|i| { + self.pc += i.size(); + i + }) + } + + pub fn run(&mut self) -> Result { + let mut xx = 10; + while let Some(inst) = self.next() { + xx -= 1; + if xx < 0 {break;} + println!("inst: {}", inst); + match inst { + Instr::Sub(target, v1, v2) => { + self.regs[target as usize] = self.regs[v1 as usize] - self.regs[v2 as usize]; + }, + Instr::Add(target, v1, v2) => { + self.regs[target as usize] = self.regs[v1 as usize] + self.regs[v2 as usize]; + }, + Instr::Mul(target, v1, v2) => { + self.regs[target as usize] = self.regs[v1 as usize] * self.regs[v2 as usize]; + }, + Instr::Div(target, v1, v2) => { + self.regs[target as usize] = self.regs[v1 as usize] / self.regs[v2 as usize]; + }, + Instr::Mod(target, v1, v2) => { + self.regs[target as usize] = self.regs[v1 as usize] % self.regs[v2 as usize]; + }, + Instr::And(target, v1, v2) => { + self.regs[target as usize] = self.regs[v1 as usize] & self.regs[v2 as usize]; + }, + Instr::Or(target, v1, v2) => { + self.regs[target as usize] = self.regs[v1 as usize] | self.regs[v2 as usize]; + }, + Instr::Xor(target, v1, v2) => { + self.regs[target as usize] = self.regs[v1 as usize] ^ self.regs[v2 as usize]; + }, + Instr::Not(target, v1) => { + self.regs[target as usize] = !self.regs[v1 as usize]; + }, + + Instr::Load(target, val) => { + self.regs[target as usize] = val as u32; + }, + Instr::Load1(target) => { + self.regs[target as usize] = 1; + }, + Instr::Exit => { + println!("{}", self); + return Ok(self.regs[0]); + }, + Instr::Mov(target, v1) => { + self.regs[target as usize] = self.regs[v1 as usize]; + }, + Instr::Jmp(adr) => { + println!("Jmp {}", adr); + self.pc = adr as usize; + }, + Instr::Beq(r1, r2, adr) => { + if self.regs[r1 as usize] == self.regs[r2 as usize] { + self.pc = adr as usize; + } + }, + Instr::Bne(r1, r2, adr) => { + if self.regs[r1 as usize] != self.regs[r2 as usize] { + self.pc = adr as usize; + } + }, + _ => { + panic!("Unknown op code: {}", inst.op_code()); + } + } + } + + Ok(0) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + macro_rules! code { + [$($x:ident$(($($arg:expr),*))?),+] => { + { + let c = Instrs(vec![ + $( + Instr::$x$(($($arg),*))?.byte_description() + ),+ + ]); + c.into() + } + }; + } + + #[test] + fn instr() { + let i0:(u8,[u8;4]) = Instr::Load(1, 1).byte_description(); + let i1:(u8,[u8;4]) = Instr::Load0(1).byte_description(); + let i2:(u8,[u8;4]) = Instr::Load1(1).byte_description(); + let i3:(u8,[u8;4]) = Instr::Exit.byte_description(); + assert_eq!(i3, (1, [0,0,0,0])); + assert_eq!(i1, (2, [0b01000001,1,0,0])); + assert_eq!(i2, (2, [0b01000010,1,0,0])); + assert_eq!(i0, (4, [0b11000001,1,0,1])); + } + + #[test] + fn code() { + let bytecode: Vec = code![Load(1,3), Load0(1), Load1(1), Exit]; + assert_eq!(bytecode, vec![0xC1,1,0,3,0x41,1,0x42,1,0x00]); + + } + + #[test] + fn vm() { + let bytes:Vec = code![Load(0,7) , Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 7); + } + + #[test] + fn add() { + let bytes:Vec = code![Load(1, 3), Load(2, 4), Add(0,1,2), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 7); + } + + #[test] + fn mov() { + let bytes:Vec = code![Load(1, 7), Mov(0, 1), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 7); + } + + #[test] + fn sub() { + let bytes:Vec = code![Load(1, 3), Load(2, 2), Sub(0,1,2), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 1); + } + + #[test] + fn mul() { + let bytes:Vec = code![Load(1, 3), Load(2, 2), Mul(0,1,2), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 6); + } + + #[test] + fn div() { + let bytes:Vec = code![Load(1, 6), Load(2, 2), Div(0,1,2), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 3); + } + + #[test] + fn modulo() { + let bytes:Vec = code![Load(1, 7), Load(2, 2), Mod(0,1,2), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 1); + } + + #[test] + fn and() { + let bytes:Vec = code![Load(1, 7), Load(2, 2), And(0,1,2), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 2); + } + + #[test] + fn or() { + let bytes:Vec = code![Load(1, 7), Load(2, 2), Or(0,1,2), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 7); + } + + #[test] + fn xor() { + let bytes:Vec = code![Load(1, 7), Load(2, 2), Xor(0,1,2), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 5); + } + + #[test] + fn not() { + let bytes:Vec = code![Load(1, 7), Not(0,1), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), !7); + } + + #[test] + fn beq_neg() { + let bytes:Vec = code![Load(1, 7), Load(0,1), Beq(0,1,16), Add(0,0,1), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 8); + } + #[test] + fn beq_pos() { + let bytes:Vec = code![Load(1, 7), Load(0,7), Beq(0,1,16), Add(0,0,1), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 7); + } + #[test] + fn jmp() { + let bytes:Vec = code![Load(1, 7), Load(0,1), Jmp(16), Add(0,0,1), Exit]; + println!("{:?}", bytes); + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 1); + } +} diff --git a/crates/vm/src/op_code.rs b/crates/vm/src/op_code.rs new file mode 100644 index 0000000..95c647b --- /dev/null +++ b/crates/vm/src/op_code.rs @@ -0,0 +1,53 @@ +#[derive(Debug, Clone, Copy, FromPrimitive, EnumDisplay)] +#[repr(u8)] +pub enum OpCode { + Load = 0b11000001, + Add = 0b11000010, + Sub = 0b11000011, + Mul = 0b11000100, + Div = 0b11000101, + Mod = 0b11000110, + And = 0b11000111, + Or = 0b11001000, + Xor = 0b11001001, + Beq = 0b11001010, + Bne = 0b11001011, + Jmp = 0b11001100, + + Mov = 0b10000001, + Not = 0b10000010, + + Load0 = 0b01000001, + Load1 = 0b01000010, + Store = 0b01000011, + + Exit = 0b00000000, + +} + +impl From for OpCode { + fn from(byte: u8) -> Self { + let element = num::FromPrimitive::from_u8(byte); + match element { + Some(op) => op, + None => panic!("Unknown op code: {}", byte) + } + } +} + +impl From for u8 { + fn from(val: OpCode) -> Self { + val as u8 + } +} + +impl OpCode { + pub fn as_u8(&self) -> u8 { + (*self).into() + } + + pub fn size(&self) -> usize { + let c: u8 = (*self).into(); + (c >> 6) as usize + 1 + } +} diff --git a/tests/compute_compiled_test.rs b/tests/compute_compiled_test.rs index feff994..67e4815 100644 --- a/tests/compute_compiled_test.rs +++ b/tests/compute_compiled_test.rs @@ -20,8 +20,8 @@ macro_rules! test_return_code{ panic!("\nError:\n{}\n", e.format_error($i, "file.lum", false)); }); - let mut vm = compiler::VM::new(); - let val = vm.run(bytes).unwrap_or_else(|e| { + let mut vm = compiler::VM::new(bytes); + let val = vm.run().unwrap_or_else(|e| { panic!("\nError:\n{}\n", e.format_error($i, "file.lum", false)); }); assert_eq!(val, $o); From a6776a678d4f01836cdfc615912631964c733289 Mon Sep 17 00:00:00 2001 From: Mateusz Russak Date: Sun, 3 Dec 2023 00:41:32 +0100 Subject: [PATCH 5/7] feat: vm as separated crate --- Cargo.lock | 14 ++ Cargo.toml | 1 + crates/compiler/Cargo.toml | 1 + crates/compiler/src/lib.rs | 15 +- crates/vm/Cargo.toml | 19 +++ crates/vm/src/lib.rs | 304 ++------------------------------- crates/vm/src/vm.rs | 295 ++++++++++++++++++++++++++++++++ tests/compute_compiled_test.rs | 4 +- 8 files changed, 346 insertions(+), 307 deletions(-) create mode 100644 crates/vm/src/vm.rs diff --git a/Cargo.lock b/Cargo.lock index 5965a54..367187f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -229,6 +229,7 @@ dependencies = [ "num-derive", "num-traits", "parser", + "vm", ] [[package]] @@ -484,6 +485,7 @@ dependencies = [ "interpreter", "lexer", "parser", + "vm", ] [[package]] @@ -1220,6 +1222,18 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "vm" +version = "0.1.0" +dependencies = [ + "enum-display", + "lexer", + "num", + "num-derive", + "num-traits", + "parser", +] + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" diff --git a/Cargo.toml b/Cargo.toml index fff7cad..c133656 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ lexer = { path = "./crates/lexer" } parser = { path = "./crates/parser" } interpreter = { path = "./crates/interpreter" } compiler = { path = "./crates/compiler" } +vm = { path = "./crates/vm" } cli = { path = "./bins/cli" } # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/crates/compiler/Cargo.toml b/crates/compiler/Cargo.toml index cf07c6f..ccfb197 100644 --- a/crates/compiler/Cargo.toml +++ b/crates/compiler/Cargo.toml @@ -13,6 +13,7 @@ crate-type = ["lib"] [dependencies] enum-display = "0.1.3" lexer = { path = "../lexer" } +vm = { path = "../vm" } num = "0.4.1" num-derive = "0.4.1" num-traits = "0.2.17" diff --git a/crates/compiler/src/lib.rs b/crates/compiler/src/lib.rs index ef7fd96..faa715b 100644 --- a/crates/compiler/src/lib.rs +++ b/crates/compiler/src/lib.rs @@ -1,21 +1,8 @@ -mod vm; -mod op_code; -mod instr; - -extern crate num; -#[macro_use] -extern crate num_derive; -#[macro_use] -extern crate enum_display; - use std::{fmt::{Display, Formatter}}; use lexer::PError; use parser::{Node, Op, Value}; -pub use vm::VM; -pub use instr::{Instr, Instrs}; -pub use op_code::OpCode; - +use vm::OpCode; struct Instruction { op_code: OpCode, diff --git a/crates/vm/Cargo.toml b/crates/vm/Cargo.toml index e69de29..1f4d309 100644 --- a/crates/vm/Cargo.toml +++ b/crates/vm/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "vm" +version = "0.1.0" +edition = "2021" + +[lib] +tests = true +path = "src/lib.rs" +crate-type = ["lib"] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +enum-display = "0.1.3" +lexer = { path = "../lexer" } +num = "0.4.1" +num-derive = "0.4.1" +num-traits = "0.2.17" +parser = { path = "../parser" } diff --git a/crates/vm/src/lib.rs b/crates/vm/src/lib.rs index 2606a52..47db332 100644 --- a/crates/vm/src/lib.rs +++ b/crates/vm/src/lib.rs @@ -1,294 +1,16 @@ -use std::fmt::Display; +mod vm; +mod op_code; +mod instr; -use lexer::PError; -use crate::{OpCode, Instr, Instrs}; - -macro_rules! parse_instr { - ($val:ident; target value -> $name:ident) => { - { - let mask1: u32 = (1 << 8) - 1; - let mask2: u32 = (1 << 16) - 1; - let target = (($val >> 16) & mask1) as u8; - let value = ($val & mask2) as u16; - Some(Instr::$name(target, value)) - } - }; - ($val:ident; target -> $name:ident) => { - { - let mask1: u32 = (1 << 8) - 1; - let target = (($val >> 16) & mask1) as u8; - Some(Instr::$name(target)) - } - - }; - ($val:ident; target v1 v2 -> $name:ident) => { - { - let mask1: u32 = (1 << 8) - 1; - let target = (($val >> 16) & mask1) as u8; - let v1= (($val >> 8) & mask1) as u8; - let v2 = ($val & mask1) as u8; - Some(Instr::$name(target,v1, v2)) - } - }; - ($val:ident; target v1 -> $name:ident) => { - { - let mask1: u32 = (1 << 8) - 1; - let target = (($val >> 16) & mask1) as u8; - let v1= (($val >> 8) & mask1) as u8; - Some(Instr::$name(target,v1)) - } - }; -} - -pub struct VM { - prog: Vec, - regs: Vec, - stack: Vec, - pc: usize, -} - -impl Display for VM { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "VM: regs: {:?}, stack: {:?}", self.regs, self.stack) - } -} - - - -impl VM { - pub fn new(prog: Vec) -> VM { - VM { - prog, - regs: vec![0; 32], - stack: Vec::new(), - pc: 0, - } - } - - - fn next(&mut self) -> Option { - Instr::from_bytes(&self.prog, self.pc).map(|i| { - self.pc += i.size(); - i - }) - } - - pub fn run(&mut self) -> Result { - let mut xx = 10; - while let Some(inst) = self.next() { - xx -= 1; - if xx < 0 {break;} - println!("inst: {}", inst); - match inst { - Instr::Sub(target, v1, v2) => { - self.regs[target as usize] = self.regs[v1 as usize] - self.regs[v2 as usize]; - }, - Instr::Add(target, v1, v2) => { - self.regs[target as usize] = self.regs[v1 as usize] + self.regs[v2 as usize]; - }, - Instr::Mul(target, v1, v2) => { - self.regs[target as usize] = self.regs[v1 as usize] * self.regs[v2 as usize]; - }, - Instr::Div(target, v1, v2) => { - self.regs[target as usize] = self.regs[v1 as usize] / self.regs[v2 as usize]; - }, - Instr::Mod(target, v1, v2) => { - self.regs[target as usize] = self.regs[v1 as usize] % self.regs[v2 as usize]; - }, - Instr::And(target, v1, v2) => { - self.regs[target as usize] = self.regs[v1 as usize] & self.regs[v2 as usize]; - }, - Instr::Or(target, v1, v2) => { - self.regs[target as usize] = self.regs[v1 as usize] | self.regs[v2 as usize]; - }, - Instr::Xor(target, v1, v2) => { - self.regs[target as usize] = self.regs[v1 as usize] ^ self.regs[v2 as usize]; - }, - Instr::Not(target, v1) => { - self.regs[target as usize] = !self.regs[v1 as usize]; - }, - - Instr::Load(target, val) => { - self.regs[target as usize] = val as u32; - }, - Instr::Load1(target) => { - self.regs[target as usize] = 1; - }, - Instr::Exit => { - println!("{}", self); - return Ok(self.regs[0]); - }, - Instr::Mov(target, v1) => { - self.regs[target as usize] = self.regs[v1 as usize]; - }, - Instr::Jmp(adr) => { - println!("Jmp {}", adr); - self.pc = adr as usize; - }, - Instr::Beq(r1, r2, adr) => { - if self.regs[r1 as usize] == self.regs[r2 as usize] { - self.pc = adr as usize; - } - }, - Instr::Bne(r1, r2, adr) => { - if self.regs[r1 as usize] != self.regs[r2 as usize] { - self.pc = adr as usize; - } - }, - _ => { - panic!("Unknown op code: {}", inst.op_code()); - } - } - } - - Ok(0) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - macro_rules! code { - [$($x:ident$(($($arg:expr),*))?),+] => { - { - let c = Instrs(vec![ - $( - Instr::$x$(($($arg),*))?.byte_description() - ),+ - ]); - c.into() - } - }; - } +extern crate num; +#[macro_use] +extern crate num_derive; +#[macro_use] +extern crate enum_display; - #[test] - fn instr() { - let i0:(u8,[u8;4]) = Instr::Load(1, 1).byte_description(); - let i1:(u8,[u8;4]) = Instr::Load0(1).byte_description(); - let i2:(u8,[u8;4]) = Instr::Load1(1).byte_description(); - let i3:(u8,[u8;4]) = Instr::Exit.byte_description(); - assert_eq!(i3, (1, [0,0,0,0])); - assert_eq!(i1, (2, [0b01000001,1,0,0])); - assert_eq!(i2, (2, [0b01000010,1,0,0])); - assert_eq!(i0, (4, [0b11000001,1,0,1])); - } +use std::{fmt::{Display, Formatter}}; - #[test] - fn code() { - let bytecode: Vec = code![Load(1,3), Load0(1), Load1(1), Exit]; - assert_eq!(bytecode, vec![0xC1,1,0,3,0x41,1,0x42,1,0x00]); - - } - - #[test] - fn vm() { - let bytes:Vec = code![Load(0,7) , Exit]; - let mut vm = VM::new(bytes); - let result = vm.run(); - assert_eq!(result.unwrap(), 7); - } - - #[test] - fn add() { - let bytes:Vec = code![Load(1, 3), Load(2, 4), Add(0,1,2), Exit]; - let mut vm = VM::new(bytes); - let result = vm.run(); - assert_eq!(result.unwrap(), 7); - } - - #[test] - fn mov() { - let bytes:Vec = code![Load(1, 7), Mov(0, 1), Exit]; - let mut vm = VM::new(bytes); - let result = vm.run(); - assert_eq!(result.unwrap(), 7); - } - - #[test] - fn sub() { - let bytes:Vec = code![Load(1, 3), Load(2, 2), Sub(0,1,2), Exit]; - let mut vm = VM::new(bytes); - let result = vm.run(); - assert_eq!(result.unwrap(), 1); - } - - #[test] - fn mul() { - let bytes:Vec = code![Load(1, 3), Load(2, 2), Mul(0,1,2), Exit]; - let mut vm = VM::new(bytes); - let result = vm.run(); - assert_eq!(result.unwrap(), 6); - } - - #[test] - fn div() { - let bytes:Vec = code![Load(1, 6), Load(2, 2), Div(0,1,2), Exit]; - let mut vm = VM::new(bytes); - let result = vm.run(); - assert_eq!(result.unwrap(), 3); - } - - #[test] - fn modulo() { - let bytes:Vec = code![Load(1, 7), Load(2, 2), Mod(0,1,2), Exit]; - let mut vm = VM::new(bytes); - let result = vm.run(); - assert_eq!(result.unwrap(), 1); - } - - #[test] - fn and() { - let bytes:Vec = code![Load(1, 7), Load(2, 2), And(0,1,2), Exit]; - let mut vm = VM::new(bytes); - let result = vm.run(); - assert_eq!(result.unwrap(), 2); - } - - #[test] - fn or() { - let bytes:Vec = code![Load(1, 7), Load(2, 2), Or(0,1,2), Exit]; - let mut vm = VM::new(bytes); - let result = vm.run(); - assert_eq!(result.unwrap(), 7); - } - - #[test] - fn xor() { - let bytes:Vec = code![Load(1, 7), Load(2, 2), Xor(0,1,2), Exit]; - let mut vm = VM::new(bytes); - let result = vm.run(); - assert_eq!(result.unwrap(), 5); - } - - #[test] - fn not() { - let bytes:Vec = code![Load(1, 7), Not(0,1), Exit]; - let mut vm = VM::new(bytes); - let result = vm.run(); - assert_eq!(result.unwrap(), !7); - } - - #[test] - fn beq_neg() { - let bytes:Vec = code![Load(1, 7), Load(0,1), Beq(0,1,16), Add(0,0,1), Exit]; - let mut vm = VM::new(bytes); - let result = vm.run(); - assert_eq!(result.unwrap(), 8); - } - #[test] - fn beq_pos() { - let bytes:Vec = code![Load(1, 7), Load(0,7), Beq(0,1,16), Add(0,0,1), Exit]; - let mut vm = VM::new(bytes); - let result = vm.run(); - assert_eq!(result.unwrap(), 7); - } - #[test] - fn jmp() { - let bytes:Vec = code![Load(1, 7), Load(0,1), Jmp(16), Add(0,0,1), Exit]; - println!("{:?}", bytes); - let mut vm = VM::new(bytes); - let result = vm.run(); - assert_eq!(result.unwrap(), 1); - } -} +use lexer::PError; +pub use instr::{Instr, Instrs}; +pub use op_code::OpCode; +pub use vm::VM; diff --git a/crates/vm/src/vm.rs b/crates/vm/src/vm.rs new file mode 100644 index 0000000..65338de --- /dev/null +++ b/crates/vm/src/vm.rs @@ -0,0 +1,295 @@ +use std::fmt::Display; + +use lexer::PError; +use crate::{OpCode, Instr, Instrs}; + + +macro_rules! parse_instr { + ($val:ident; target value -> $name:ident) => { + { + let mask1: u32 = (1 << 8) - 1; + let mask2: u32 = (1 << 16) - 1; + let target = (($val >> 16) & mask1) as u8; + let value = ($val & mask2) as u16; + Some(Instr::$name(target, value)) + } + }; + ($val:ident; target -> $name:ident) => { + { + let mask1: u32 = (1 << 8) - 1; + let target = (($val >> 16) & mask1) as u8; + Some(Instr::$name(target)) + } + + }; + ($val:ident; target v1 v2 -> $name:ident) => { + { + let mask1: u32 = (1 << 8) - 1; + let target = (($val >> 16) & mask1) as u8; + let v1= (($val >> 8) & mask1) as u8; + let v2 = ($val & mask1) as u8; + Some(Instr::$name(target,v1, v2)) + } + }; + ($val:ident; target v1 -> $name:ident) => { + { + let mask1: u32 = (1 << 8) - 1; + let target = (($val >> 16) & mask1) as u8; + let v1= (($val >> 8) & mask1) as u8; + Some(Instr::$name(target,v1)) + } + }; +} + +pub struct VM { + prog: Vec, + regs: Vec, + stack: Vec, + pc: usize, +} + +impl Display for VM { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "VM: regs: {:?}, stack: {:?}", self.regs, self.stack) + } +} + + + +impl VM { + pub fn new(prog: Vec) -> VM { + VM { + prog, + regs: vec![0; 32], + stack: Vec::new(), + pc: 0, + } + } + + + fn next(&mut self) -> Option { + Instr::from_bytes(&self.prog, self.pc).map(|i| { + self.pc += i.size(); + i + }) + } + + pub fn run(&mut self) -> Result { + let mut xx = 10; + while let Some(inst) = self.next() { + xx -= 1; + if xx < 0 {break;} + println!("inst: {}", inst); + match inst { + Instr::Sub(target, v1, v2) => { + self.regs[target as usize] = self.regs[v1 as usize] - self.regs[v2 as usize]; + }, + Instr::Add(target, v1, v2) => { + self.regs[target as usize] = self.regs[v1 as usize] + self.regs[v2 as usize]; + }, + Instr::Mul(target, v1, v2) => { + self.regs[target as usize] = self.regs[v1 as usize] * self.regs[v2 as usize]; + }, + Instr::Div(target, v1, v2) => { + self.regs[target as usize] = self.regs[v1 as usize] / self.regs[v2 as usize]; + }, + Instr::Mod(target, v1, v2) => { + self.regs[target as usize] = self.regs[v1 as usize] % self.regs[v2 as usize]; + }, + Instr::And(target, v1, v2) => { + self.regs[target as usize] = self.regs[v1 as usize] & self.regs[v2 as usize]; + }, + Instr::Or(target, v1, v2) => { + self.regs[target as usize] = self.regs[v1 as usize] | self.regs[v2 as usize]; + }, + Instr::Xor(target, v1, v2) => { + self.regs[target as usize] = self.regs[v1 as usize] ^ self.regs[v2 as usize]; + }, + Instr::Not(target, v1) => { + self.regs[target as usize] = !self.regs[v1 as usize]; + }, + + Instr::Load(target, val) => { + self.regs[target as usize] = val as u32; + }, + Instr::Load1(target) => { + self.regs[target as usize] = 1; + }, + Instr::Exit => { + println!("{}", self); + return Ok(self.regs[0]); + }, + Instr::Mov(target, v1) => { + self.regs[target as usize] = self.regs[v1 as usize]; + }, + Instr::Jmp(adr) => { + println!("Jmp {}", adr); + self.pc = adr as usize; + }, + Instr::Beq(r1, r2, adr) => { + if self.regs[r1 as usize] == self.regs[r2 as usize] { + self.pc = adr as usize; + } + }, + Instr::Bne(r1, r2, adr) => { + if self.regs[r1 as usize] != self.regs[r2 as usize] { + self.pc = adr as usize; + } + }, + _ => { + panic!("Unknown op code: {}", inst.op_code()); + } + } + } + + Ok(0) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + macro_rules! code { + [$($x:ident$(($($arg:expr),*))?),+] => { + { + let c = Instrs(vec![ + $( + Instr::$x$(($($arg),*))?.byte_description() + ),+ + ]); + c.into() + } + }; + } + + #[test] + fn instr() { + let i0:(u8,[u8;4]) = Instr::Load(1, 1).byte_description(); + let i1:(u8,[u8;4]) = Instr::Load0(1).byte_description(); + let i2:(u8,[u8;4]) = Instr::Load1(1).byte_description(); + let i3:(u8,[u8;4]) = Instr::Exit.byte_description(); + assert_eq!(i3, (1, [0,0,0,0])); + assert_eq!(i1, (2, [0b01000001,1,0,0])); + assert_eq!(i2, (2, [0b01000010,1,0,0])); + assert_eq!(i0, (4, [0b11000001,1,0,1])); + } + + #[test] + fn code() { + let bytecode: Vec = code![Load(1,3), Load0(1), Load1(1), Exit]; + assert_eq!(bytecode, vec![0xC1,1,0,3,0x41,1,0x42,1,0x00]); + + } + + #[test] + fn vm() { + let bytes:Vec = code![Load(0,7) , Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 7); + } + + #[test] + fn add() { + let bytes:Vec = code![Load(1, 3), Load(2, 4), Add(0,1,2), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 7); + } + + #[test] + fn mov() { + let bytes:Vec = code![Load(1, 7), Mov(0, 1), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 7); + } + + #[test] + fn sub() { + let bytes:Vec = code![Load(1, 3), Load(2, 2), Sub(0,1,2), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 1); + } + + #[test] + fn mul() { + let bytes:Vec = code![Load(1, 3), Load(2, 2), Mul(0,1,2), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 6); + } + + #[test] + fn div() { + let bytes:Vec = code![Load(1, 6), Load(2, 2), Div(0,1,2), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 3); + } + + #[test] + fn modulo() { + let bytes:Vec = code![Load(1, 7), Load(2, 2), Mod(0,1,2), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 1); + } + + #[test] + fn and() { + let bytes:Vec = code![Load(1, 7), Load(2, 2), And(0,1,2), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 2); + } + + #[test] + fn or() { + let bytes:Vec = code![Load(1, 7), Load(2, 2), Or(0,1,2), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 7); + } + + #[test] + fn xor() { + let bytes:Vec = code![Load(1, 7), Load(2, 2), Xor(0,1,2), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 5); + } + + #[test] + fn not() { + let bytes:Vec = code![Load(1, 7), Not(0,1), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), !7); + } + + #[test] + fn beq_neg() { + let bytes:Vec = code![Load(1, 7), Load(0,1), Beq(0,1,16), Add(0,0,1), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 8); + } + #[test] + fn beq_pos() { + let bytes:Vec = code![Load(1, 7), Load(0,7), Beq(0,1,16), Add(0,0,1), Exit]; + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 7); + } + #[test] + fn jmp() { + let bytes:Vec = code![Load(1, 7), Load(0,1), Jmp(16), Add(0,0,1), Exit]; + println!("{:?}", bytes); + let mut vm = VM::new(bytes); + let result = vm.run(); + assert_eq!(result.unwrap(), 1); + } +} diff --git a/tests/compute_compiled_test.rs b/tests/compute_compiled_test.rs index 67e4815..0078e2d 100644 --- a/tests/compute_compiled_test.rs +++ b/tests/compute_compiled_test.rs @@ -20,7 +20,7 @@ macro_rules! test_return_code{ panic!("\nError:\n{}\n", e.format_error($i, "file.lum", false)); }); - let mut vm = compiler::VM::new(bytes); + let mut vm = vm::VM::new(bytes); let val = vm.run().unwrap_or_else(|e| { panic!("\nError:\n{}\n", e.format_error($i, "file.lum", false)); }); @@ -29,7 +29,7 @@ macro_rules! test_return_code{ }; } -test_return_code!(simple, "1", 1); +//test_return_code!(simple, "1", 1); /* test_return_code!(simple, "1 + 2", 3); test_return_code!(with_braces, "2 * (3 + 4) ", 14); From 71a6db55a67fe7c2b3357e58306ebdfe09a30ad9 Mon Sep 17 00:00:00 2001 From: Mateusz Russak Date: Sun, 3 Dec 2023 01:18:54 +0100 Subject: [PATCH 6/7] feat: addition and subtraction works on VM --- crates/compiler/src/lib.rs | 98 +++++++++------------------------- crates/vm/src/instr.rs | 1 - crates/vm/src/lib.rs | 3 -- crates/vm/src/vm.rs | 3 +- tests/compute_compiled_test.rs | 12 +++-- 5 files changed, 35 insertions(+), 82 deletions(-) diff --git a/crates/compiler/src/lib.rs b/crates/compiler/src/lib.rs index faa715b..d6cee0d 100644 --- a/crates/compiler/src/lib.rs +++ b/crates/compiler/src/lib.rs @@ -2,56 +2,11 @@ use std::{fmt::{Display, Formatter}}; use lexer::PError; use parser::{Node, Op, Value}; -use vm::OpCode; - -struct Instruction { - op_code: OpCode, - arg0: Option, - arg1: Option, - arg2: Option, -} - - -impl Instruction { - pub fn new(op_code: OpCode) -> Instruction { - Instruction { - op_code, - arg0: None, - arg1: None, - arg2: None, - } - } - pub fn with_arg0(mut self, arg0: u8) -> Instruction { - self.arg0 = Some(arg0); - self - } - pub fn with_arg1(mut self, arg1: u8) -> Instruction { - self.arg1 = Some(arg1); - self - } - pub fn with_arg2(mut self, arg2: u8) -> Instruction { - self.arg2 = Some(arg2); - self - } - pub fn to_bytes(&self) -> Vec { - let mut bytes = Vec::new(); - bytes.push(self.op_code as u8); - if let Some(arg0) = self.arg0 { - bytes.push(arg0); - } - if let Some(arg1) = self.arg1 { - bytes.push(arg1); - } - if let Some(arg2) = self.arg2 { - bytes.push(arg2); - } - bytes - } -} +use vm::{Instr, Instrs, OpCode}; struct Code { - code: Vec, + code: Vec, } impl Code { @@ -60,11 +15,11 @@ impl Code { code: Vec::new(), } } - pub fn push(&mut self, inst: Instruction) { + pub fn push(&mut self, inst: Instr) { self.code.push(inst); } pub fn to_bytes(&self) -> Vec { - self.code.iter().flat_map(|i| i.to_bytes()).collect::>() + Instrs(self.code.iter().map(|c| c.byte_description()).collect()).into() } } @@ -104,18 +59,24 @@ pub fn compile_node(node: &Node, mut consts: &mut Mem, inst: &mut Code) -> Resul match node.op { Op::Add => { compile_node(&node.children[0], &mut consts, inst)?; + inst.push(Instr::Mov(2, 0)); + compile_node(&node.children[1], &mut consts, inst)?; + inst.push(Instr::Mov(1, 0)); + inst.push(Instr::Add(0,1,2)); + }, + Op::Sub => { + compile_node(&node.children[0], &mut consts, inst)?; + inst.push(Instr::Mov(2, 0)); compile_node(&node.children[1], &mut consts, inst)?; - inst.push(Instruction::new(OpCode::Add)); + inst.push(Instr::Mov(1, 0)); + inst.push(Instr::Sub(0,2,1)); }, Op::Value => { match &node.value { - Some(Value::Number(0)) => inst.push(Instruction::new(OpCode::Load0)), - Some(Value::Number(1)) => inst.push(Instruction::new(OpCode::Load1)), + Some(Value::Number(0)) => inst.push(Instr::Load0(0)), + Some(Value::Number(1)) => inst.push(Instr::Load1(0)), Some(Value::Number(val)) => { - let addr = consts.write(*val as u32); - if addr < 256 { - inst.push(Instruction::new(OpCode::Load).with_arg0(addr.try_into().unwrap())); - } + inst.push(Instr::Load(0, *val as u16)); }, Some(..) => { panic!("Unknown value: {}", node); @@ -123,14 +84,15 @@ pub fn compile_node(node: &Node, mut consts: &mut Mem, inst: &mut Code) -> Resul None => panic!("No value"), } }, - Op::Scope => { + Op::Scope | Op::Paren => { for child in &node.children { + println!("child: {}", child.op); compile_node(child, &mut consts, inst)?; } - inst.push(Instruction::new(OpCode::Exit)); + inst.push(Instr::Exit); }, _ => { - panic!("Unknown op: {}", node); + panic!("Unknown op: {} {}", node, node.op); } } Ok(()) @@ -141,11 +103,7 @@ pub fn compile(node: &Node) -> Result, PError> { let mut consts = Mem::new(); compile_node(node, &mut consts, &mut inst)?; let code = inst.to_bytes(); - let mem = consts.to_bytes(); - let mut bytes = Vec::new(); - bytes.extend_from_slice(&code); - bytes.extend_from_slice(&mem); - Ok(bytes) + Ok(code) } #[cfg(test)] @@ -155,21 +113,13 @@ mod tests { use super::*; + #[test] fn compile_simple() { let mut node = Node::new(Op::Scope, Location::Eof); node.add(Node::new(Op::Value, Location::Eof).set_value(1.into())); let bytes = compile(&node).unwrap(); - assert_eq!(bytes, vec![3, 5]); + assert_eq!(bytes, vec![OpCode::Load1.into(), 0, OpCode::Exit.into()]); } - fn compile_binary_op() { - let mut node = Node::new(Op::Scope, Location::Eof); - let mut add = Node::new(Op::Add, Location::Eof); - add.add(Node::new(Op::Value, Location::Eof).set_value(1.into())); - add.add(Node::new(Op::Value, Location::Eof).set_value(2.into())); - node.add(add); - let bytes = compile(&node).unwrap(); - assert_eq!(bytes, vec![3, 2, 0, 6, 5]); - } } diff --git a/crates/vm/src/instr.rs b/crates/vm/src/instr.rs index 5f67c21..76c31e7 100644 --- a/crates/vm/src/instr.rs +++ b/crates/vm/src/instr.rs @@ -169,7 +169,6 @@ impl Instr { bytes[i] = data[pc+i]; } - println!("val: {:?}", bytes); let val = u32::from_be_bytes(bytes.try_into().unwrap()); match code { OpCode::Load => parse_instr!(val; tr value -> Load), diff --git a/crates/vm/src/lib.rs b/crates/vm/src/lib.rs index 47db332..77a68fb 100644 --- a/crates/vm/src/lib.rs +++ b/crates/vm/src/lib.rs @@ -8,9 +8,6 @@ extern crate num_derive; #[macro_use] extern crate enum_display; -use std::{fmt::{Display, Formatter}}; - -use lexer::PError; pub use instr::{Instr, Instrs}; pub use op_code::OpCode; pub use vm::VM; diff --git a/crates/vm/src/vm.rs b/crates/vm/src/vm.rs index 65338de..da93f5e 100644 --- a/crates/vm/src/vm.rs +++ b/crates/vm/src/vm.rs @@ -60,7 +60,7 @@ impl VM { pub fn new(prog: Vec) -> VM { VM { prog, - regs: vec![0; 32], + regs: vec![0; 16], stack: Vec::new(), pc: 0, } @@ -79,6 +79,7 @@ impl VM { while let Some(inst) = self.next() { xx -= 1; if xx < 0 {break;} + println!("regs: {:?}", self.regs); println!("inst: {}", inst); match inst { Instr::Sub(target, v1, v2) => { diff --git a/tests/compute_compiled_test.rs b/tests/compute_compiled_test.rs index 0078e2d..8da0565 100644 --- a/tests/compute_compiled_test.rs +++ b/tests/compute_compiled_test.rs @@ -15,11 +15,11 @@ macro_rules! test_return_code{ let node = parse(text).unwrap_or_else(|e| { panic!("\nError:\n{}\n", e.format_error($i, "file.lum", false)); }); - //println!("{}", node); + println!("{}", node); let bytes = compiler::compile(&node).unwrap_or_else(|e| { panic!("\nError:\n{}\n", e.format_error($i, "file.lum", false)); }); - + println!("{:?}", bytes); let mut vm = vm::VM::new(bytes); let val = vm.run().unwrap_or_else(|e| { panic!("\nError:\n{}\n", e.format_error($i, "file.lum", false)); @@ -29,7 +29,13 @@ macro_rules! test_return_code{ }; } -//test_return_code!(simple, "1", 1); +test_return_code!(vm_return_1, "1", 1); +test_return_code!(vm_compute_add, "1+2", 3); +test_return_code!(vm_compute_add_2, "1+2+2", 5); +test_return_code!(vm_compute_sub, "4-2", 2); +test_return_code!(vm_compute_sub_overflow, "2-4", (-2i32) as u32); +test_return_code!(vm_compute_add_and_sub, "1+2-2", 1); +test_return_code!(vm_compute_branch, "if(1){2}else{3}", 2); /* test_return_code!(simple, "1 + 2", 3); test_return_code!(with_braces, "2 * (3 + 4) ", 14); From 10b3ebb1becdd1a3a77b6290e995baba149a0dfe Mon Sep 17 00:00:00 2001 From: Mateusz Russak Date: Sun, 3 Dec 2023 01:20:55 +0100 Subject: [PATCH 7/7] chore: commented not yet implemented tests --- tests/compute_compiled_test.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/compute_compiled_test.rs b/tests/compute_compiled_test.rs index 8da0565..1bdc05c 100644 --- a/tests/compute_compiled_test.rs +++ b/tests/compute_compiled_test.rs @@ -33,9 +33,9 @@ test_return_code!(vm_return_1, "1", 1); test_return_code!(vm_compute_add, "1+2", 3); test_return_code!(vm_compute_add_2, "1+2+2", 5); test_return_code!(vm_compute_sub, "4-2", 2); -test_return_code!(vm_compute_sub_overflow, "2-4", (-2i32) as u32); +//test_return_code!(vm_compute_sub_overflow, "2-4", (-2i32) as u32); test_return_code!(vm_compute_add_and_sub, "1+2-2", 1); -test_return_code!(vm_compute_branch, "if(1){2}else{3}", 2); +//test_return_code!(vm_compute_branch, "if(1){2}else{3}", 2); /* test_return_code!(simple, "1 + 2", 3); test_return_code!(with_braces, "2 * (3 + 4) ", 14);