diff --git a/Cargo.lock b/Cargo.lock index 8217d007..20e2a248 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -169,7 +169,7 @@ dependencies = [ [[package]] name = "benchtool" -version = "0.8.0" +version = "0.9.0" dependencies = [ "ckb-vm", "clap 4.4.6", @@ -1563,7 +1563,7 @@ checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" [[package]] name = "polkatool" -version = "0.8.0" +version = "0.9.0" dependencies = [ "clap 4.4.6", "env_logger 0.10.0", @@ -1575,7 +1575,7 @@ dependencies = [ [[package]] name = "polkavm" -version = "0.8.0" +version = "0.9.0" dependencies = [ "env_logger 0.10.0", "image", @@ -1590,7 +1590,7 @@ dependencies = [ [[package]] name = "polkavm-assembler" -version = "0.8.0" +version = "0.9.0" dependencies = [ "iced-x86", "log", @@ -1598,7 +1598,7 @@ dependencies = [ [[package]] name = "polkavm-common" -version = "0.8.0" +version = "0.9.0" dependencies = [ "log", "proptest", @@ -1606,14 +1606,14 @@ dependencies = [ [[package]] name = "polkavm-derive" -version = "0.8.0" +version = "0.9.0" dependencies = [ "polkavm-derive-impl-macro", ] [[package]] name = "polkavm-derive-impl" -version = "0.8.0" +version = "0.9.0" dependencies = [ "polkavm-common", "proc-macro2", @@ -1623,7 +1623,7 @@ dependencies = [ [[package]] name = "polkavm-derive-impl-macro" -version = "0.8.0" +version = "0.9.0" dependencies = [ "polkavm-derive-impl", "syn 2.0.38", @@ -1631,7 +1631,7 @@ dependencies = [ [[package]] name = "polkavm-linker" -version = "0.8.2" +version = "0.9.0" dependencies = [ "gimli 0.28.0", "hashbrown 0.14.1", @@ -1644,7 +1644,7 @@ dependencies = [ [[package]] name = "polkavm-linux-raw" -version = "0.8.0" +version = "0.9.0" [[package]] name = "polkavm-linux-raw-generate" @@ -2233,7 +2233,7 @@ checksum = "f27f6278552951f1f2b8cf9da965d10969b2efdea95a6ec47987ab46edfe263a" [[package]] name = "simplealloc" -version = "0.8.0" +version = "0.9.0" [[package]] name = "slice-group-by" diff --git a/Cargo.toml b/Cargo.toml index fdb0be2a..d77b8a32 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ members = [ ] [workspace.package] -version = "0.8.0" +version = "0.9.0" authors = ["Jan Bujak ", "Parity Technologies "] license = "MIT/Apache-2.0" edition = "2021" @@ -29,14 +29,14 @@ rust-version = "1.70.0" repository = "https://github.com/koute/polkavm" [workspace.dependencies] -polkavm = { version = "0.8.0", path = "crates/polkavm" } -polkavm-assembler = { version = "0.8.0", path = "crates/polkavm-assembler" } -polkavm-common = { version = "0.8.0", path = "crates/polkavm-common" } -polkavm-derive = { version = "0.8.0", path = "crates/polkavm-derive" } -polkavm-derive-impl = { version = "0.8.0", path = "crates/polkavm-derive-impl" } -polkavm-derive-impl-macro = { version = "0.8.0", path = "crates/polkavm-derive-impl-macro" } -polkavm-linker = { version = "0.8.0", path = "crates/polkavm-linker" } -polkavm-linux-raw = { version = "0.8.0", path = "crates/polkavm-linux-raw" } +polkavm = { version = "0.9.0", path = "crates/polkavm" } +polkavm-assembler = { version = "0.9.0", path = "crates/polkavm-assembler" } +polkavm-common = { version = "0.9.0", path = "crates/polkavm-common" } +polkavm-derive = { version = "0.9.0", path = "crates/polkavm-derive" } +polkavm-derive-impl = { version = "0.9.0", path = "crates/polkavm-derive-impl" } +polkavm-derive-impl-macro = { version = "0.9.0", path = "crates/polkavm-derive-impl-macro" } +polkavm-linker = { version = "0.9.0", path = "crates/polkavm-linker" } +polkavm-linux-raw = { version = "0.9.0", path = "crates/polkavm-linux-raw" } clap = "4.4.6" env_logger = { version = "0.10.0", default-features = false } @@ -58,6 +58,8 @@ syn = "2.0.25" # This also triggers on cases like `.or_insert_with(Vec::new)`. unwrap_or_default = "allow" get_first = "allow" +manual_range_contains = "allow" +let_unit_value = "allow" # These are are 'allow' by default. alloc_instead_of_core = "warn" diff --git a/crates/polkavm-common/src/abi.rs b/crates/polkavm-common/src/abi.rs index 8c8a849a..16b319f9 100644 --- a/crates/polkavm-common/src/abi.rs +++ b/crates/polkavm-common/src/abi.rs @@ -6,21 +6,13 @@ use core::ops::Range; const ADDRESS_SPACE_SIZE: u64 = 0x100000000_u64; -/// The page size of the VM. -/// -/// This is the minimum granularity with which the VM can allocate memory. -pub const VM_PAGE_SIZE: u32 = 0x4000; +/// The minimum page size of the VM. +pub const VM_MIN_PAGE_SIZE: u32 = 0x1000; /// The maximum page size of the VM. pub const VM_MAX_PAGE_SIZE: u32 = 0x10000; -static_assert!(VM_PAGE_SIZE <= VM_MAX_PAGE_SIZE); -static_assert!(VM_MAX_PAGE_SIZE % VM_PAGE_SIZE == 0); - -/// The address at which the program's memory starts inside of the VM. -/// -/// This is directly accessible by the program running inside of the VM. -pub const VM_ADDR_USER_MEMORY: u32 = VM_MAX_PAGE_SIZE; +static_assert!(VM_MIN_PAGE_SIZE <= VM_MAX_PAGE_SIZE); /// The address at which the program's stack starts inside of the VM. /// @@ -33,15 +25,6 @@ pub const VM_ADDR_USER_STACK_HIGH: u32 = (ADDRESS_SPACE_SIZE - VM_MAX_PAGE_SIZE pub const VM_ADDR_RETURN_TO_HOST: u32 = 0xffff0000; static_assert!(VM_ADDR_RETURN_TO_HOST & 0b11 == 0); -/// The total maximum amount of memory a program can use. -/// -/// This is the whole 32-bit address space, except: -/// * the guard page at the start, -/// * the guard page between read-only data and read-write data -/// * the guard page between the heap and the stack, -/// * and the guard page at the end. -pub const VM_MAXIMUM_MEMORY_SIZE: u32 = (ADDRESS_SPACE_SIZE - VM_MAX_PAGE_SIZE as u64 * 4) as u32; - /// The maximum number of VM instructions a program can be composed of. pub const VM_MAXIMUM_INSTRUCTION_COUNT: u32 = 2 * 1024 * 1024; @@ -55,262 +38,250 @@ pub const VM_MAXIMUM_EXPORT_COUNT: u32 = 1024; // TODO: Support the C extension in the linker and lower this to 2. pub const VM_CODE_ADDRESS_ALIGNMENT: u32 = 4; -/// The memory configuration used by a given guest program. -#[derive(Copy, Clone, PartialEq, Eq)] +/// The memory map of a given guest program. +#[derive(Clone)] #[repr(C)] // NOTE: Used on the host <-> zygote boundary. -pub struct GuestMemoryConfig { +pub struct MemoryMap { + page_size: u32, ro_data_size: u32, rw_data_size: u32, - bss_size: u32, stack_size: u32, + heap_base: u32, + max_heap_size: u32, } -impl GuestMemoryConfig { +impl MemoryMap { + /// Creates an empty memory map. #[inline] pub const fn empty() -> Self { Self { + page_size: 0, ro_data_size: 0, rw_data_size: 0, - bss_size: 0, stack_size: 0, + heap_base: 0, + max_heap_size: 0, } } - #[inline] - pub const fn new(ro_data_size: u64, rw_data_size: u64, bss_size: u64, stack_size: u64) -> Result { - if ro_data_size > VM_MAXIMUM_MEMORY_SIZE as u64 { - return Err("size of the read-only data exceeded the maximum memory size"); - } - - if rw_data_size > VM_MAXIMUM_MEMORY_SIZE as u64 { - return Err("size of the read-write data exceeded the maximum memory size"); + /// Calculates the memory map from the given parameters. + pub fn new(page_size: u32, ro_data_size: u32, rw_data_size: u32, stack_size: u32) -> Result { + if page_size < VM_MIN_PAGE_SIZE { + return Err("invalid page size: page size is too small"); } - if bss_size > VM_MAXIMUM_MEMORY_SIZE as u64 { - return Err("size of the bss section exceeded the maximum memory size"); + if page_size > VM_MAX_PAGE_SIZE { + return Err("invalid page size: page size is too big"); } - if stack_size > VM_MAXIMUM_MEMORY_SIZE as u64 { - return Err("size of the stack exceeded the maximum memory size"); + if !page_size.is_power_of_two() { + return Err("invalid page size: page size is not a power of two"); } - // We already checked that these are less than the maximum memory size, so these cannot fail - // because the maximum memory size is going to be vastly smaller than what an u64 can hold. - const _: () = { - assert!(VM_MAXIMUM_MEMORY_SIZE as u64 + VM_MAX_PAGE_SIZE as u64 <= u32::MAX as u64); + let Some(ro_data_address_space) = align_to_next_page_u64(u64::from(VM_MAX_PAGE_SIZE), u64::from(ro_data_size)) else { + return Err("the size of read-only data is too big"); }; - let Some(ro_data_size) = align_to_next_page_u64(VM_PAGE_SIZE as u64, ro_data_size) else { - unreachable!() + let Some(ro_data_size) = align_to_next_page_u32(page_size, ro_data_size) else { + return Err("the size of read-only data is too big"); }; - let Some(rw_data_size) = align_to_next_page_u64(VM_PAGE_SIZE as u64, rw_data_size) else { - unreachable!() + + let Some(rw_data_address_space) = align_to_next_page_u64(u64::from(VM_MAX_PAGE_SIZE), u64::from(rw_data_size)) else { + return Err("the size of read-write data is too big"); }; - let Some(bss_size) = align_to_next_page_u64(VM_PAGE_SIZE as u64, bss_size) else { - unreachable!() + + let original_rw_data_size = rw_data_size; + let Some(rw_data_size) = align_to_next_page_u32(page_size, rw_data_size) else { + return Err("the size of read-write data is too big"); }; - let Some(stack_size) = align_to_next_page_u64(VM_PAGE_SIZE as u64, stack_size) else { - unreachable!() + + let Some(stack_address_space) = align_to_next_page_u64(u64::from(VM_MAX_PAGE_SIZE), u64::from(stack_size)) else { + return Err("the size of the stack is too big"); }; - let config = Self { - ro_data_size: ro_data_size as u32, - rw_data_size: rw_data_size as u32, - bss_size: bss_size as u32, - stack_size: stack_size as u32, + let Some(stack_size) = align_to_next_page_u32(page_size, stack_size) else { + return Err("the size of the stack is too big"); }; - if let Err(error) = config.check_total_memory_size() { - Err(error) - } else { - Ok(config) - } - } + let mut address_low: u64 = 0; - #[inline] - const fn check_total_memory_size(self) -> Result<(), &'static str> { - if self.ro_data_size as u64 + self.rw_data_size as u64 + self.bss_size as u64 + self.stack_size as u64 - > VM_MAXIMUM_MEMORY_SIZE as u64 - { - Err("maximum memory size exceeded") - } else { - Ok(()) + address_low += u64::from(VM_MAX_PAGE_SIZE); + address_low += ro_data_address_space; + address_low += u64::from(VM_MAX_PAGE_SIZE); + + let heap_base = address_low + u64::from(original_rw_data_size); + address_low += rw_data_address_space; + let heap_slack = address_low - heap_base; + address_low += u64::from(VM_MAX_PAGE_SIZE); + + let mut address_high: u64 = u64::from(VM_ADDR_USER_STACK_HIGH); + address_high -= stack_address_space; + + if address_low > address_high { + return Err("maximum memory size exceeded"); } + + let max_heap_size = address_high - address_low + heap_slack; + + Ok(Self { + page_size, + ro_data_size, + rw_data_size, + stack_size, + heap_base: heap_base as u32, + max_heap_size: max_heap_size as u32, + }) } - /// The address at where the program memory starts inside of the VM. + /// The page size of the program. #[inline] - pub const fn user_memory_region_address(self) -> u32 { - VM_ADDR_USER_MEMORY + pub fn page_size(&self) -> u32 { + self.page_size } - /// The size of the region in which the program memory resides inside of the VM, excluding the stack. - /// - /// This also includes the guard page between the read-only data and read-write data. + /// The address at which the program's heap starts. #[inline] - pub const fn user_memory_region_size(self) -> u32 { - (self.bss_address() + self.bss_size()) - self.user_memory_region_address() + pub fn heap_base(&self) -> u32 { + self.heap_base } - /// Resets the size of the program memory to zero, excluding the stack. + /// The maximum size of the program's heap. #[inline] - pub fn clear_user_memory_sizes(&mut self) { - self.ro_data_size = 0; - self.rw_data_size = 0; - self.bss_size = 0; + pub fn max_heap_size(&self) -> u32 { + self.max_heap_size } /// The address at where the program's read-only data starts inside of the VM. #[inline] - pub const fn ro_data_address(self) -> u32 { - self.user_memory_region_address() + pub fn ro_data_address(&self) -> u32 { + VM_MAX_PAGE_SIZE } /// The size of the program's read-only data. #[inline] - pub const fn ro_data_size(self) -> u32 { + pub fn ro_data_size(&self) -> u32 { self.ro_data_size } /// The range of addresses where the program's read-only data is inside of the VM. #[inline] - pub const fn ro_data_range(self) -> Range { + pub fn ro_data_range(&self) -> Range { self.ro_data_address()..self.ro_data_address() + self.ro_data_size() } - /// Sets the program's read-only data size. - pub fn set_ro_data_size(&mut self, ro_data_size: u32) -> Result<(), &'static str> { - if ro_data_size > VM_MAXIMUM_MEMORY_SIZE { - return Err("size of the read-only data exceeded the maximum memory size"); - } - - let ro_data_size = match align_to_next_page_u64(u64::from(VM_PAGE_SIZE), u64::from(ro_data_size)) { - Some(value) => value, - None => unreachable!(), - } as u32; - - Self { ro_data_size, ..*self }.check_total_memory_size()?; - self.ro_data_size = ro_data_size; - Ok(()) - } - /// The address at where the program's read-write data starts inside of the VM. #[inline] - pub const fn rw_data_address(self) -> u32 { - if self.ro_data_size == 0 { - self.user_memory_region_address() - } else { - match align_to_next_page_u32(VM_MAX_PAGE_SIZE, self.ro_data_address() + self.ro_data_size) { - Some(offset) => offset + VM_MAX_PAGE_SIZE, - None => unreachable!(), - } - } - } - - pub const fn rw_data_size(self) -> u32 { - self.rw_data_size - } - - /// Sets the program's read-write data size. - pub fn set_rw_data_size(&mut self, rw_data_size: u32) -> Result<(), &'static str> { - if rw_data_size > VM_MAXIMUM_MEMORY_SIZE { - return Err("size of the read-write data exceeded the maximum memory size"); - } - - let rw_data_size = match align_to_next_page_u64(u64::from(VM_PAGE_SIZE), u64::from(rw_data_size)) { - Some(value) => value, + pub fn rw_data_address(&self) -> u32 { + match align_to_next_page_u32(VM_MAX_PAGE_SIZE, self.ro_data_address() + self.ro_data_size) { + Some(offset) => offset + VM_MAX_PAGE_SIZE, None => unreachable!(), - } as u32; - - Self { rw_data_size, ..*self }.check_total_memory_size()?; - self.rw_data_size = rw_data_size; - Ok(()) + } } - /// The address at where the program's BSS section starts inside of the VM. + /// The size of the program's read-write data. #[inline] - pub const fn bss_address(self) -> u32 { - self.rw_data_address() + self.rw_data_size + pub fn rw_data_size(&self) -> u32 { + self.rw_data_size } + /// The range of addresses where the program's read-write data is inside of the VM. #[inline] - pub const fn bss_size(self) -> u32 { - self.bss_size - } - - /// Sets the program's BSS section size. - pub fn set_bss_size(&mut self, bss_size: u32) -> Result<(), &'static str> { - if bss_size > VM_MAXIMUM_MEMORY_SIZE { - return Err("size of the bss section exceeded the maximum memory size"); - } - - let bss_size = match align_to_next_page_u64(u64::from(VM_PAGE_SIZE), u64::from(bss_size)) { - Some(value) => value, - None => unreachable!(), - } as u32; - - Self { bss_size, ..*self }.check_total_memory_size()?; - self.bss_size = bss_size; - Ok(()) + pub fn rw_data_range(&self) -> Range { + self.rw_data_address()..self.rw_data_address() + self.rw_data_size() } /// The address at where the program's stack starts inside of the VM. #[inline] - pub const fn stack_address_low(self) -> u32 { + pub fn stack_address_low(&self) -> u32 { self.stack_address_high() - self.stack_size } /// The address at where the program's stack ends inside of the VM. #[inline] - pub const fn stack_address_high(self) -> u32 { + pub fn stack_address_high(&self) -> u32 { VM_ADDR_USER_STACK_HIGH } + /// The size of the program's stack. #[inline] - pub const fn stack_size(self) -> u32 { + pub fn stack_size(&self) -> u32 { self.stack_size } + /// The range of addresses where the program's stack is inside of the VM. #[inline] - pub const fn stack_range(self) -> Range { + pub fn stack_range(&self) -> Range { self.stack_address_low()..self.stack_address_high() } +} - /// Sets the program's stack size. - pub fn set_stack_size(&mut self, stack_size: u32) -> Result<(), &'static str> { - if stack_size > VM_MAXIMUM_MEMORY_SIZE { - return Err("size of the stack exceeded the maximum memory size"); - } - - let stack_size = match align_to_next_page_u64(u64::from(VM_PAGE_SIZE), u64::from(stack_size)) { - Some(value) => value, - None => unreachable!(), - } as u32; - - Self { stack_size, ..*self }.check_total_memory_size()?; - self.stack_size = stack_size; - Ok(()) - } - - #[inline] - pub fn clear_stack_size(&mut self) { - self.stack_size = 0; +#[test] +fn test_memory_map() { + { + let map = MemoryMap::new(0x4000, 1, 1, 1).unwrap(); + assert_eq!(map.ro_data_address(), 0x10000); + assert_eq!(map.ro_data_size(), 0x4000); + assert_eq!(map.rw_data_address(), 0x30000); + assert_eq!(map.rw_data_size(), 0x4000); + assert_eq!(map.stack_size(), 0x4000); + assert_eq!(map.stack_address_high(), 0xffff0000); + assert_eq!(map.stack_address_low(), 0xfffec000); + + assert_eq!(map.heap_base(), 0x30001); + assert_eq!( + u64::from(map.max_heap_size()), + ADDRESS_SPACE_SIZE - u64::from(VM_MAX_PAGE_SIZE) * 3 - u64::from(map.heap_base()) + ); } - /// The address at where the program's read-write memory starts inside of the VM. - #[inline] - pub const fn heap_address(self) -> u32 { - self.rw_data_address() + let max_size = (ADDRESS_SPACE_SIZE - u64::from(VM_MAX_PAGE_SIZE) * 4) as u32; + + { + // Read-only data takes the whole address space. + let map = MemoryMap::new(0x4000, max_size, 0, 0).unwrap(); + assert_eq!(map.ro_data_address(), 0x10000); + assert_eq!(map.ro_data_size(), max_size); + assert_eq!(map.rw_data_address(), map.ro_data_address() + VM_MAX_PAGE_SIZE + max_size); + assert_eq!(map.rw_data_size(), 0); + assert_eq!(map.stack_address_high(), VM_ADDR_USER_STACK_HIGH); + assert_eq!(map.stack_address_low(), VM_ADDR_USER_STACK_HIGH); + assert_eq!(map.stack_size(), 0); + + assert_eq!(map.heap_base(), map.rw_data_address()); + assert_eq!(map.max_heap_size(), 0); } - /// The total size of the program's read-write memory, excluding the stack. - #[inline] - pub const fn heap_size(self) -> u32 { - self.rw_data_size + self.bss_size + assert!(MemoryMap::new(0x4000, max_size + 1, 0, 0).is_err()); + assert!(MemoryMap::new(0x4000, max_size, 1, 0).is_err()); + assert!(MemoryMap::new(0x4000, max_size, 0, 1).is_err()); + + { + // Read-write data takes the whole address space. + let map = MemoryMap::new(0x4000, 0, max_size, 0).unwrap(); + assert_eq!(map.ro_data_address(), VM_MAX_PAGE_SIZE); + assert_eq!(map.ro_data_size(), 0); + assert_eq!(map.rw_data_address(), VM_MAX_PAGE_SIZE * 2); + assert_eq!(map.rw_data_size(), max_size); + assert_eq!(map.stack_address_high(), VM_ADDR_USER_STACK_HIGH); + assert_eq!(map.stack_address_low(), VM_ADDR_USER_STACK_HIGH); + assert_eq!(map.stack_size(), 0); + + assert_eq!(map.heap_base(), map.rw_data_address() + map.rw_data_size()); + assert_eq!(map.max_heap_size(), 0); } - #[inline] - pub const fn heap_range(self) -> Range { - self.heap_address()..self.heap_address() + self.heap_size() + { + // Stack takes the whole address space. + let map = MemoryMap::new(0x4000, 0, 0, max_size).unwrap(); + assert_eq!(map.ro_data_address(), VM_MAX_PAGE_SIZE); + assert_eq!(map.ro_data_size(), 0); + assert_eq!(map.rw_data_address(), VM_MAX_PAGE_SIZE * 2); + assert_eq!(map.rw_data_size(), 0); + assert_eq!(map.stack_address_high(), VM_ADDR_USER_STACK_HIGH); + assert_eq!(map.stack_address_low(), VM_ADDR_USER_STACK_HIGH - max_size); + assert_eq!(map.stack_size(), max_size); + + assert_eq!(map.heap_base(), map.rw_data_address()); + assert_eq!(map.max_heap_size(), 0); } } diff --git a/crates/polkavm-common/src/elf.rs b/crates/polkavm-common/src/elf.rs index f3f464c0..d7d3b168 100644 --- a/crates/polkavm-common/src/elf.rs +++ b/crates/polkavm-common/src/elf.rs @@ -1,8 +1,3 @@ -/// Custom instruction used to make an external function call. -/// -/// These are processed when relinking the ELf file and will *not* end up in the final payload. -pub const INSTRUCTION_ECALLI: u32 = 0x0000000b; - pub struct Reader<'a> { pub buffer: &'a [u8], pub bytes_consumed: usize, diff --git a/crates/polkavm-common/src/init.rs b/crates/polkavm-common/src/init.rs deleted file mode 100644 index e1d82436..00000000 --- a/crates/polkavm-common/src/init.rs +++ /dev/null @@ -1,71 +0,0 @@ -use crate::abi::GuestMemoryConfig; - -#[derive(Copy, Clone)] -pub struct GuestProgramInit<'a> { - ro_data: &'a [u8], - rw_data: &'a [u8], - bss_size: u32, - stack_size: u32, -} - -impl<'a> Default for GuestProgramInit<'a> { - fn default() -> Self { - Self::new() - } -} - -impl<'a> GuestProgramInit<'a> { - pub fn new() -> Self { - Self { - ro_data: &[], - rw_data: &[], - bss_size: 0, - stack_size: 0, - } - } - - pub fn ro_data(self) -> &'a [u8] { - self.ro_data - } - - pub fn with_ro_data(mut self, ro_data: &'a [u8]) -> Self { - self.ro_data = ro_data; - self - } - - pub fn rw_data(self) -> &'a [u8] { - self.rw_data - } - - pub fn with_rw_data(mut self, rw_data: &'a [u8]) -> Self { - self.rw_data = rw_data; - self - } - - pub fn bss_size(self) -> u32 { - self.bss_size - } - - pub fn with_bss(mut self, size: u32) -> Self { - self.bss_size = size; - self - } - - pub fn stack_size(self) -> u32 { - self.stack_size - } - - pub fn with_stack(mut self, size: u32) -> Self { - self.stack_size = size; - self - } - - pub fn memory_config(&self) -> Result { - GuestMemoryConfig::new( - self.ro_data.len() as u64, - self.rw_data.len() as u64, - u64::from(self.bss_size), - u64::from(self.stack_size), - ) - } -} diff --git a/crates/polkavm-common/src/lib.rs b/crates/polkavm-common/src/lib.rs index 14487af0..78fe9610 100644 --- a/crates/polkavm-common/src/lib.rs +++ b/crates/polkavm-common/src/lib.rs @@ -21,7 +21,6 @@ pub mod abi; #[cfg(feature = "alloc")] pub mod elf; pub mod error; -pub mod init; pub mod operation; pub mod program; pub mod utils; @@ -35,3 +34,12 @@ pub mod zygote; /// A special hostcall number set by the *guest* to trigger a trace. pub const HOSTCALL_TRACE: u32 = 0x80000000; + +/// A flag which will trigger the sandbox to reset its memory after execution. +pub const VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION: u32 = 1 << 1; + +/// A flag which will trigger the sandbox to unload its program after execution. +pub const VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION: u32 = 1 << 2; + +/// A flag which will trigger the sandbox to reset its memory before execution. +pub const VM_RPC_FLAG_RESET_MEMORY_BEFORE_EXECUTION: u32 = 1 << 3; diff --git a/crates/polkavm-common/src/program.rs b/crates/polkavm-common/src/program.rs index 9d1b1379..2450bd92 100644 --- a/crates/polkavm-common/src/program.rs +++ b/crates/polkavm-common/src/program.rs @@ -673,6 +673,9 @@ define_opcodes! { branch_less_signed = 48, branch_greater_or_equal_unsigned = 41, branch_greater_or_equal_signed = 43, + + cmov_if_zero_imm = 85, + cmov_if_not_zero_imm = 86, ] // Instructions with args: reg, reg, reg @@ -716,6 +719,7 @@ define_opcodes! { // Instructions with args: reg, reg [ move_reg = 82, + sbrk = 87, ] } @@ -824,6 +828,10 @@ impl<'a> InstructionVisitor for core::fmt::Formatter<'a> { write!(self, "@:") } + fn sbrk(&mut self, d: Reg, s: Reg) -> Self::ReturnTy { + write!(self, "{d} = sbrk {s}") + } + fn ecalli(&mut self, nth_import: u32) -> Self::ReturnTy { write!(self, "ecalli {nth_import}") } @@ -973,11 +981,19 @@ impl<'a> InstructionVisitor for core::fmt::Formatter<'a> { } fn cmov_if_zero(&mut self, d: Reg, s: Reg, c: Reg) -> Self::ReturnTy { - write!(self, "{d} = ({c} == 0) ? {s} : 0") + write!(self, "{d} = {s} if {c} == 0") } fn cmov_if_not_zero(&mut self, d: Reg, s: Reg, c: Reg) -> Self::ReturnTy { - write!(self, "{d} = ({c} != 0) ? {s} : 0") + write!(self, "{d} = {s} if {c} != 0") + } + + fn cmov_if_zero_imm(&mut self, d: Reg, c: Reg, s: u32) -> Self::ReturnTy { + write!(self, "{d} = {s} if {c} == 0") + } + + fn cmov_if_not_zero_imm(&mut self, d: Reg, c: Reg, s: u32) -> Self::ReturnTy { + write!(self, "{d} = {s} if {c} != 0") } fn add_imm(&mut self, d: Reg, s1: Reg, s2: u32) -> Self::ReturnTy { @@ -1448,7 +1464,8 @@ impl<'a> core::fmt::Display for ProgramSymbol<'a> { pub struct ProgramBlob<'a> { blob: CowBytes<'a>, - bss_size: u32, + ro_data_size: u32, + rw_data_size: u32, stack_size: u32, ro_data: Range, @@ -1636,7 +1653,8 @@ impl<'a> ProgramBlob<'a> { if section == SECTION_MEMORY_CONFIG { let section_length = reader.read_varint()?; let position = reader.position; - program.bss_size = reader.read_varint()?; + program.ro_data_size = reader.read_varint()?; + program.rw_data_size = reader.read_varint()?; program.stack_size = reader.read_varint()?; if position + section_length as usize != reader.position { return Err(ProgramParseError(ProgramParseErrorKind::Other( @@ -1652,6 +1670,18 @@ impl<'a> ProgramBlob<'a> { reader.read_section_range_into(&mut section, &mut program.exports, SECTION_EXPORTS)?; reader.read_section_range_into(&mut section, &mut program.jump_table, SECTION_JUMP_TABLE)?; + if program.ro_data.len() > program.ro_data_size as usize { + return Err(ProgramParseError(ProgramParseErrorKind::Other( + "size of the read-only data payload exceeds the declared size of the section", + ))); + } + + if program.rw_data.len() > program.rw_data_size as usize { + return Err(ProgramParseError(ProgramParseErrorKind::Other( + "size of the read-write data payload exceeds the declared size of the section", + ))); + } + if section == SECTION_CODE { let section_length = reader.read_varint()?; let initial_position = reader.position; @@ -1705,18 +1735,31 @@ impl<'a> ProgramBlob<'a> { } /// Returns the contents of the read-only data section. + /// + /// This only covers the initial non-zero portion of the section; use `ro_data_size` to get the full size. pub fn ro_data(&self) -> &[u8] { &self.blob[self.ro_data.clone()] } + /// Returns the size of the read-only data section. + /// + /// This can be larger than the length of `ro_data`, in which case the rest of the space is assumed to be filled with zeros. + pub fn ro_data_size(&self) -> u32 { + self.ro_data_size + } + /// Returns the contents of the read-write data section. + /// + /// This only covers the initial non-zero portion of the section; use `rw_data_size` to get the full size. pub fn rw_data(&self) -> &[u8] { &self.blob[self.rw_data.clone()] } - /// Returns the initial size of the BSS section. - pub fn bss_size(&self) -> u32 { - self.bss_size + /// Returns the size of the read-write data section. + /// + /// This can be larger than the length of `rw_data`, in which case the rest of the space is assumed to be filled with zeros. + pub fn rw_data_size(&self) -> u32 { + self.rw_data_size } /// Returns the initial size of the stack. @@ -2011,7 +2054,8 @@ impl<'a> ProgramBlob<'a> { ProgramBlob { blob: self.blob.into_owned(), - bss_size: self.bss_size, + ro_data_size: self.ro_data_size, + rw_data_size: self.rw_data_size, stack_size: self.stack_size, ro_data: self.ro_data, diff --git a/crates/polkavm-common/src/utils.rs b/crates/polkavm-common/src/utils.rs index 982ab8df..c2a90757 100644 --- a/crates/polkavm-common/src/utils.rs +++ b/crates/polkavm-common/src/utils.rs @@ -215,6 +215,12 @@ pub trait Access<'a> { where T: ?Sized + AsUninitSliceMut; fn write_memory(&mut self, address: u32, data: &[u8]) -> Result<(), Self::Error>; + + fn sbrk(&mut self, size: u32) -> Option; + + /// Returns the current size of the program's heap. + fn heap_size(&self) -> u32; + fn program_counter(&self) -> Option; fn native_program_counter(&self) -> Option; diff --git a/crates/polkavm-common/src/writer.rs b/crates/polkavm-common/src/writer.rs index bf6595e8..04f72eb9 100644 --- a/crates/polkavm-common/src/writer.rs +++ b/crates/polkavm-common/src/writer.rs @@ -4,7 +4,8 @@ use core::ops::Range; #[derive(Default)] pub struct ProgramBlobBuilder { - bss_size: u32, + ro_data_size: u32, + rw_data_size: u32, stack_size: u32, ro_data: Vec, rw_data: Vec, @@ -22,8 +23,12 @@ impl ProgramBlobBuilder { Self::default() } - pub fn set_bss_size(&mut self, size: u32) { - self.bss_size = size; + pub fn set_ro_data_size(&mut self, size: u32) { + self.ro_data_size = size; + } + + pub fn set_rw_data_size(&mut self, size: u32) { + self.rw_data_size = size; } pub fn set_stack_size(&mut self, size: u32) { @@ -80,9 +85,10 @@ impl ProgramBlobBuilder { writer.push_raw_bytes(&program::BLOB_MAGIC); writer.push_byte(program::BLOB_VERSION_V1); - if self.bss_size > 0 || self.stack_size > 0 { + if self.ro_data_size > 0 || self.rw_data_size > 0 || self.stack_size > 0 { writer.push_section_inplace(program::SECTION_MEMORY_CONFIG, |writer| { - writer.push_varint(self.bss_size); + writer.push_varint(self.ro_data_size); + writer.push_varint(self.rw_data_size); writer.push_varint(self.stack_size); }); } diff --git a/crates/polkavm-common/src/zygote.rs b/crates/polkavm-common/src/zygote.rs index f899c94a..b61fb185 100644 --- a/crates/polkavm-common/src/zygote.rs +++ b/crates/polkavm-common/src/zygote.rs @@ -3,10 +3,9 @@ //! In general everything here can be modified at will, provided the zygote //! is recompiled. -use crate::abi::GuestMemoryConfig; -use crate::utils::align_to_next_page_usize; +use crate::abi::MemoryMap; use core::cell::UnsafeCell; -use core::sync::atomic::{AtomicU32, AtomicU64}; +use core::sync::atomic::{AtomicBool, AtomicU32, AtomicU64}; // Due to the limitations of Rust's compile time constant evaluation machinery // we need to define this struct multiple times. @@ -58,6 +57,7 @@ define_address_table! { syscall_trap: unsafe extern "C" fn() -> !, syscall_return: unsafe extern "C" fn() -> !, syscall_trace: unsafe extern "C" fn(u32, u64), + syscall_sbrk: unsafe extern "C" fn(u64) -> u32, } /// The address where the native code starts inside of the VM. @@ -76,6 +76,9 @@ pub const VM_ADDR_JUMP_TABLE_RETURN_TO_HOST: u64 = VM_ADDR_JUMP_TABLE + ((crate: /// A special hostcall number set by the *host* to signal that the guest should stop executing the program. pub const HOSTCALL_ABORT_EXECUTION: u32 = !0; +/// A special hostcall number set by the *host* to signal that the guest should execute `sbrk`. +pub const HOSTCALL_SBRK: u32 = !0 - 1; + /// A sentinel value to indicate that the instruction counter is not available. pub const SANDBOX_EMPTY_NTH_INSTRUCTION: u32 = !0; @@ -110,7 +113,7 @@ pub const VM_COMPILER_MAXIMUM_INSTRUCTION_LENGTH: u32 = 53; pub const VM_COMPILER_MAXIMUM_EPILOGUE_LENGTH: u32 = 1024 * 1024; /// The maximum number of bytes the jump table can be. -const VM_SANDBOX_MAXIMUM_JUMP_TABLE_SIZE: u64 = (crate::abi::VM_MAXIMUM_INSTRUCTION_COUNT as u64 + 1) +pub const VM_SANDBOX_MAXIMUM_JUMP_TABLE_SIZE: u64 = (crate::abi::VM_MAXIMUM_INSTRUCTION_COUNT as u64 + 1) * core::mem::size_of::() as u64 * crate::abi::VM_CODE_ADDRESS_ALIGNMENT as u64; @@ -121,101 +124,20 @@ pub const VM_SANDBOX_MAXIMUM_JUMP_TABLE_VIRTUAL_SIZE: u64 = 0x100000000 * core:: pub const VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE: u32 = 512 * 1024 * 1024 - 1; /// The memory configuration used by a given program and/or sandbox instance. -#[derive(Copy, Clone, PartialEq, Eq)] +#[derive(Clone)] #[repr(C)] pub struct SandboxMemoryConfig { - guest_config: GuestMemoryConfig, - code_size: u32, - jump_table_size: u32, -} - -impl core::ops::Deref for SandboxMemoryConfig { - type Target = GuestMemoryConfig; - - #[inline] - fn deref(&self) -> &Self::Target { - &self.guest_config - } -} - -impl core::ops::DerefMut for SandboxMemoryConfig { - #[inline] - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.guest_config - } -} - -impl SandboxMemoryConfig { - #[inline] - pub const fn empty() -> Self { - Self { - guest_config: GuestMemoryConfig::empty(), - code_size: 0, - jump_table_size: 0, - } - } - - #[inline] - pub fn set_guest_config(&mut self, guest_config: GuestMemoryConfig) { - self.guest_config = guest_config; - } - - #[inline] - pub const fn code_size(&self) -> usize { - self.code_size as usize - } - - #[inline] - pub fn clear_code_size(&mut self) { - self.code_size = 0; - } - - pub fn set_code_size(&mut self, native_page_size: usize, code_size: usize) -> Result<(), &'static str> { - if code_size > VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE as usize { - return Err("size of the native code exceeded the maximum code size"); - } - - let Some(code_size) = align_to_next_page_usize(native_page_size, code_size) else { - unreachable!() - }; - self.code_size = code_size as u32; - - Ok(()) - } - - #[inline] - pub const fn jump_table_size(&self) -> usize { - self.jump_table_size as usize - } - - #[inline] - pub fn clear_jump_table_size(&mut self) { - self.jump_table_size = 0; - } - - pub fn set_jump_table_size(&mut self, native_page_size: usize, jump_table_size: usize) -> Result<(), &'static str> { - if jump_table_size > VM_SANDBOX_MAXIMUM_JUMP_TABLE_SIZE as usize { - return Err("size of the jump table exceeded te maximum size"); - } - - let Some(jump_table_size) = align_to_next_page_usize(native_page_size, jump_table_size) else { - unreachable!() - }; - self.jump_table_size = jump_table_size as u32; - - Ok(()) - } + pub memory_map: MemoryMap, + pub ro_data_fd_size: u32, + pub rw_data_fd_size: u32, + pub code_size: u32, + pub jump_table_size: u32, + pub sysreturn_address: u64, } /// A flag which will trigger the sandbox to reload its program before execution. pub const VM_RPC_FLAG_RECONFIGURE: u32 = 1 << 0; -/// A flag which will trigger the sandbox to reset its memory after execution. -pub const VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION: u32 = 1 << 1; - -/// A flag which will trigger the sandbox to unload its program after execution. -pub const VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION: u32 = 1 << 2; - #[repr(C)] pub struct VmInit { pub stack_address: AtomicU64, @@ -246,6 +168,12 @@ impl core::ops::DerefMut for CacheAligned { } } +#[repr(C)] +pub struct VmCtxHeapInfo { + pub heap_top: UnsafeCell, + pub heap_threshold: UnsafeCell, +} + const REG_COUNT: usize = crate::program::Reg::ALL.len(); #[repr(C)] @@ -284,6 +212,9 @@ pub struct VmCtx { /// Fields used when making syscalls from the VM into the host. syscall_ffi: CacheAligned, + /// The state of the program's heap. + pub heap_info: VmCtxHeapInfo, + /// The futex used to synchronize the sandbox with the host process. pub futex: CacheAligned, @@ -291,12 +222,12 @@ pub struct VmCtx { pub rpc_address: UnsafeCell, /// Flags specifying what exactly the sandbox should do. pub rpc_flags: UnsafeCell, - /// The current memory configuration of the sandbox. + /// The amount of memory to allocate. + pub rpc_sbrk: UnsafeCell, + /// The memory configuration of the sandbox. pub memory_config: UnsafeCell, - /// The new memory configuration of the sandbox. Will be applied if the appropriate flag is set. - pub new_memory_config: UnsafeCell, - /// The new sysreturn trampoline address. Will be applied if the appropriate flag is set. - pub new_sysreturn_address: UnsafeCell, + /// Whether the memory of the sandbox is dirty. + pub is_memory_dirty: AtomicBool, /// Performance counters. Only for debugging. pub counters: CacheAligned, @@ -336,9 +267,16 @@ impl VmCtx { rpc_address: UnsafeCell::new(0), rpc_flags: UnsafeCell::new(0), - memory_config: UnsafeCell::new(SandboxMemoryConfig::empty()), - new_memory_config: UnsafeCell::new(SandboxMemoryConfig::empty()), - new_sysreturn_address: UnsafeCell::new(0), + rpc_sbrk: UnsafeCell::new(0), + memory_config: UnsafeCell::new(SandboxMemoryConfig { + memory_map: MemoryMap::empty(), + ro_data_fd_size: 0, + rw_data_fd_size: 0, + code_size: 0, + jump_table_size: 0, + sysreturn_address: 0, + }), + is_memory_dirty: AtomicBool::new(false), syscall_ffi: CacheAligned(VmCtxSyscall { gas: UnsafeCell::new(0), @@ -348,6 +286,11 @@ impl VmCtx { nth_instruction: UnsafeCell::new(0), }), + heap_info: VmCtxHeapInfo { + heap_top: UnsafeCell::new(0), + heap_threshold: UnsafeCell::new(0), + }, + counters: CacheAligned(VmCtxCounters { syscall_wait_loop_start: UnsafeCell::new(0), syscall_futex_wait: UnsafeCell::new(0), @@ -382,6 +325,11 @@ impl VmCtx { &self.syscall_ffi.0.gas } + #[inline(always)] + pub const fn heap_info(&self) -> &VmCtxHeapInfo { + &self.heap_info + } + #[inline(always)] pub const fn hostcall(&self) -> &UnsafeCell { &self.syscall_ffi.0.hostcall diff --git a/crates/polkavm-derive-impl/src/import.rs b/crates/polkavm-derive-impl/src/import.rs index 4bea713c..3c439460 100644 --- a/crates/polkavm-derive-impl/src/import.rs +++ b/crates/polkavm-derive-impl/src/import.rs @@ -1,4 +1,3 @@ -use polkavm_common::elf::INSTRUCTION_ECALLI; use quote::quote; use syn::spanned::Spanned; use syn::Token; @@ -257,9 +256,6 @@ pub fn polkavm_import(attributes: ImportBlockAttributes, input: syn::ItemForeign }; } - let asm_ecalli = format!(".4byte 0x{:08x}\n", INSTRUCTION_ECALLI); - let asm_ecalli = syn::LitStr::new(&asm_ecalli, ident.span()); - passthrough_tokens.push(quote! { #(#inner_doc_attributes)* #(#inner_cfg_attributes)* @@ -305,7 +301,7 @@ pub fn polkavm_import(attributes: ImportBlockAttributes, input: syn::ItemForeign extern fn trampoline(a0: u32, a1: u32, a2: u32, a3: u32, a4: u32, a5: u32) { unsafe { core::arch::asm!( - #asm_ecalli, + ".insn r 0xb, 0, 0, zero, zero, zero\n", ".4byte {metadata}\n", "ret\n", in("a0") a0, diff --git a/crates/polkavm-derive/src/lib.rs b/crates/polkavm-derive/src/lib.rs index cc43b4a3..e6f0d109 100644 --- a/crates/polkavm-derive/src/lib.rs +++ b/crates/polkavm-derive/src/lib.rs @@ -1,6 +1,5 @@ #![no_std] #![doc = include_str!("../README.md")] -#![forbid(unsafe_code)] pub use polkavm_derive_impl_macro::__PRIVATE_DO_NOT_USE_polkavm_define_abi as polkavm_define_abi; pub use polkavm_derive_impl_macro::__PRIVATE_DO_NOT_USE_polkavm_export as polkavm_export; @@ -9,3 +8,45 @@ pub use polkavm_derive_impl_macro::__PRIVATE_DO_NOT_USE_polkavm_import as polkav pub mod default_abi { polkavm_derive_impl_macro::__PRIVATE_DO_NOT_USE_polkavm_impl_abi_support!(); } + +/// Increases the size of the program's heap by a given number of bytes, allocating memory if necessary. +/// If successful returns a pointer to the *end* of the heap. If unsuccessful returns a null pointer. +/// +/// When called with a `size` of 0 this can be used to find the current end of the heap. This will always succeed. +/// +/// Memory allocated through this function can only be freed once the program finishes execution and its whole memory is cleared. +#[cfg(any(all(any(target_arch = "riscv32", target_arch = "riscv64"), target_feature = "e"), doc))] +#[inline] +pub fn sbrk(size: usize) -> *mut u8 { + // SAFETY: Allocating memory is always safe. + unsafe { + let address; + core::arch::asm!( + ".insn r 0xb, 1, 0, {dst}, {size}, zero", + size = in(reg) size, + dst = lateout(reg) address, + ); + address + } +} + +/// A basic memory allocator which doesn't support deallocation. +pub struct LeakingAllocator; + +#[cfg(any(all(any(target_arch = "riscv32", target_arch = "riscv64"), target_feature = "e"), doc))] +unsafe impl core::alloc::GlobalAlloc for LeakingAllocator { + #[inline] + unsafe fn alloc(&self, layout: core::alloc::Layout) -> *mut u8 { + let pointer = crate::sbrk(0); + let padding = !(pointer as usize) & (layout.align() - 1); + let size = layout.size().wrapping_add(padding); + if crate::sbrk(size).is_null() { + return core::ptr::null_mut(); + } + + pointer.add(padding) + } + + #[inline] + unsafe fn dealloc(&self, _ptr: *mut u8, _layout: core::alloc::Layout) {} +} diff --git a/crates/polkavm-linker/Cargo.toml b/crates/polkavm-linker/Cargo.toml index efacb522..ab0c1e14 100644 --- a/crates/polkavm-linker/Cargo.toml +++ b/crates/polkavm-linker/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "polkavm-linker" -version = "0.8.2" +version.workspace = true authors.workspace = true license.workspace = true edition.workspace = true diff --git a/crates/polkavm-linker/src/program_from_elf.rs b/crates/polkavm-linker/src/program_from_elf.rs index f8e18b35..a98a6df0 100644 --- a/crates/polkavm-linker/src/program_from_elf.rs +++ b/crates/polkavm-linker/src/program_from_elf.rs @@ -1,5 +1,4 @@ -use polkavm_common::abi::{GuestMemoryConfig, VM_ADDR_USER_MEMORY, VM_CODE_ADDRESS_ALIGNMENT, VM_MAX_PAGE_SIZE, VM_PAGE_SIZE}; -use polkavm_common::elf::INSTRUCTION_ECALLI; +use polkavm_common::abi::{MemoryMap, VM_CODE_ADDRESS_ALIGNMENT, VM_MAX_PAGE_SIZE, VM_MIN_PAGE_SIZE}; use polkavm_common::program::{self, FrameKind, Instruction, LineProgramOp, ProgramBlob, ProgramSymbol}; use polkavm_common::utils::{align_to_next_page_u32, align_to_next_page_u64}; use polkavm_common::varint; @@ -538,15 +537,26 @@ enum BasicInst { Cmov { kind: CmovKind, dst: Reg, - src: Reg, + src: RegImm, cond: Reg, }, Ecalli { nth_import: usize, }, + Sbrk { + dst: Reg, + size: Reg, + }, Nop, } +#[derive(Copy, Clone)] +enum OpKind { + Read, + Write, + ReadWrite, +} + impl BasicInst { fn is_nop(&self) -> bool { match self { @@ -579,8 +589,9 @@ impl BasicInst { BasicInst::StoreIndirect { src, base, .. } => RegMask::from(src) | RegMask::from(base), BasicInst::RegReg { src1, src2, .. } => RegMask::from(src1) | RegMask::from(src2), BasicInst::AnyAny { src1, src2, .. } => RegMask::from(src1) | RegMask::from(src2), - BasicInst::Cmov { src, cond, .. } => RegMask::from(src) | RegMask::from(cond), + BasicInst::Cmov { dst, src, cond, .. } => RegMask::from(dst) | RegMask::from(src) | RegMask::from(cond), BasicInst::Ecalli { nth_import } => imports[nth_import].src_mask(), + BasicInst::Sbrk { size, .. } => RegMask::from(size), } } @@ -596,12 +607,13 @@ impl BasicInst { | BasicInst::Cmov { dst, .. } | BasicInst::AnyAny { dst, .. } => RegMask::from(dst), BasicInst::Ecalli { nth_import } => imports[nth_import].dst_mask(), + BasicInst::Sbrk { dst, .. } => RegMask::from(dst), } } fn has_side_effects(&self, config: &Config) -> bool { match *self { - BasicInst::Ecalli { .. } | BasicInst::StoreAbsolute { .. } | BasicInst::StoreIndirect { .. } => true, + BasicInst::Sbrk { .. } | BasicInst::Ecalli { .. } | BasicInst::StoreAbsolute { .. } | BasicInst::StoreIndirect { .. } => true, BasicInst::LoadAbsolute { .. } | BasicInst::LoadIndirect { .. } => !config.elide_unnecessary_loads, BasicInst::Nop | BasicInst::LoadImmediate { .. } @@ -613,64 +625,71 @@ impl BasicInst { } } - fn map_register(self, mut map: impl FnMut(Reg, bool) -> Reg) -> Option { + fn map_register(self, mut map: impl FnMut(Reg, OpKind) -> Reg) -> Option { // Note: ALWAYS map the inputs first; otherwise `regalloc2` might break! match self { - BasicInst::LoadImmediate { dst, imm } => Some(BasicInst::LoadImmediate { dst: map(dst, true), imm }), + BasicInst::LoadImmediate { dst, imm } => Some(BasicInst::LoadImmediate { + dst: map(dst, OpKind::Write), + imm, + }), BasicInst::LoadAbsolute { kind, dst, target } => Some(BasicInst::LoadAbsolute { kind, - dst: map(dst, true), + dst: map(dst, OpKind::Write), target, }), BasicInst::StoreAbsolute { kind, src, target } => Some(BasicInst::StoreAbsolute { kind, - src: src.map_register(|reg| map(reg, false)), + src: src.map_register(|reg| map(reg, OpKind::Read)), target, }), BasicInst::LoadAddress { dst, target } => Some(BasicInst::LoadAddress { - dst: map(dst, true), + dst: map(dst, OpKind::Write), target, }), BasicInst::LoadAddressIndirect { dst, target } => Some(BasicInst::LoadAddressIndirect { - dst: map(dst, true), + dst: map(dst, OpKind::Write), target, }), BasicInst::LoadIndirect { kind, dst, base, offset } => Some(BasicInst::LoadIndirect { kind, - base: map(base, false), - dst: map(dst, true), + base: map(base, OpKind::Read), + dst: map(dst, OpKind::Write), offset, }), BasicInst::StoreIndirect { kind, src, base, offset } => Some(BasicInst::StoreIndirect { kind, - src: src.map_register(|reg| map(reg, false)), - base: map(base, false), + src: src.map_register(|reg| map(reg, OpKind::Read)), + base: map(base, OpKind::Read), offset, }), BasicInst::RegReg { kind, dst, src1, src2 } => Some(BasicInst::RegReg { kind, - src1: map(src1, false), - src2: map(src2, false), - dst: map(dst, true), + src1: map(src1, OpKind::Read), + src2: map(src2, OpKind::Read), + dst: map(dst, OpKind::Write), }), BasicInst::AnyAny { kind, dst, src1, src2 } => Some(BasicInst::AnyAny { kind, - src1: src1.map_register(|reg| map(reg, false)), - src2: src2.map_register(|reg| map(reg, false)), - dst: map(dst, true), + src1: src1.map_register(|reg| map(reg, OpKind::Read)), + src2: src2.map_register(|reg| map(reg, OpKind::Read)), + dst: map(dst, OpKind::Write), }), BasicInst::Cmov { kind, dst, src, cond } => Some(BasicInst::Cmov { kind, - src: map(src, false), - cond: map(cond, false), - dst: map(dst, true), + src: src.map_register(|reg| map(reg, OpKind::Read)), + cond: map(cond, OpKind::Read), + dst: map(dst, OpKind::ReadWrite), }), BasicInst::Ecalli { .. } => None, + BasicInst::Sbrk { dst, size } => Some(BasicInst::Sbrk { + size: map(size, OpKind::Read), + dst: map(dst, OpKind::Write), + }), BasicInst::Nop => Some(BasicInst::Nop), } } - fn operands(&self, imports: &[Import]) -> impl Iterator + fn operands(&self, imports: &[Import]) -> impl Iterator where T: Clone, { @@ -679,8 +698,8 @@ impl BasicInst { // Abuse the `map_register` to avoid matching on everything again. let is_special_instruction = self .clone() - .map_register(|reg, is_dst| { - list[length] = Some((reg, is_dst)); + .map_register(|reg, kind| { + list[length] = Some((reg, kind)); length += 1; reg }) @@ -691,29 +710,28 @@ impl BasicInst { let import = &imports[nth_import]; for reg in import.src_mask() { - list[length] = Some((reg, false)); + list[length] = Some((reg, OpKind::Read)); length += 1; } for reg in import.dst_mask() { - list[length] = Some((reg, true)); + list[length] = Some((reg, OpKind::Write)); length += 1; } }; let mut seen_dst = false; - list.into_iter() - .take_while(|reg| reg.is_some()) - .flatten() - .map(move |(reg, is_dst)| { - // Sanity check to make sure inputs always come before outputs, so that `regalloc2` doesn't break. - if seen_dst { - assert!(is_dst); - } - seen_dst |= is_dst; + list.into_iter().take_while(|reg| reg.is_some()).flatten().map(move |(reg, kind)| { + let is_dst = matches!(kind, OpKind::Write | OpKind::ReadWrite); - (reg, is_dst) - }) + // Sanity check to make sure inputs always come before outputs, so that `regalloc2` doesn't break. + if seen_dst { + assert!(is_dst); + } + seen_dst |= is_dst; + + (reg, kind) + }) } fn map_target(self, map: impl Fn(T) -> Result) -> Result, E> { @@ -729,6 +747,7 @@ impl BasicInst { BasicInst::AnyAny { kind, dst, src1, src2 } => BasicInst::AnyAny { kind, dst, src1, src2 }, BasicInst::Cmov { kind, dst, src, cond } => BasicInst::Cmov { kind, dst, src, cond }, BasicInst::Ecalli { nth_import } => BasicInst::Ecalli { nth_import }, + BasicInst::Sbrk { dst, size } => BasicInst::Sbrk { dst, size }, BasicInst::Nop => BasicInst::Nop, }) } @@ -747,6 +766,7 @@ impl BasicInst { | BasicInst::RegReg { .. } | BasicInst::AnyAny { .. } | BasicInst::Cmov { .. } + | BasicInst::Sbrk { .. } | BasicInst::Ecalli { .. } => (None, None), } } @@ -953,8 +973,9 @@ impl DataRef { struct MemoryConfig { ro_data: Vec, rw_data: Vec, - bss_size: u32, - stack_size: u32, + ro_data_size: u32, + rw_data_size: u32, + min_stack_size: u32, } fn get_padding(memory_end: u64, align: u64) -> Option { @@ -966,156 +987,92 @@ fn get_padding(memory_end: u64, align: u64) -> Option { } } -#[allow(clippy::too_many_arguments)] -fn extract_memory_config( +fn process_sections( elf: &Elf, - sections_ro_data: &[SectionIndex], - sections_rw_data: &[SectionIndex], - sections_bss: &[SectionIndex], - sections_min_stack_size: &[SectionIndex], + current_address: &mut u64, + chunks: &mut Vec, base_address_for_section: &mut HashMap, -) -> Result { - let mut memory_end = u64::from(VM_ADDR_USER_MEMORY); - let mut ro_data = Vec::new(); - let mut ro_data_size = 0; - - fn align_if_necessary(memory_end: &mut u64, output_size: &mut u64, output_chunks: &mut Vec, section: &Section) { - if let Some(padding) = get_padding(*memory_end, section.align()) { - *memory_end += padding; - *output_size += padding; - output_chunks.push(DataRef::Padding(padding as usize)); - } - } - - assert_eq!(memory_end % u64::from(VM_MAX_PAGE_SIZE), 0); - - let ro_data_address = memory_end; - for §ion_index in sections_ro_data { + sections: impl IntoIterator, +) -> u64 { + for section_index in sections { let section = elf.section_by_index(section_index); - align_if_necessary(&mut memory_end, &mut ro_data_size, &mut ro_data, section); + assert!(section.size() >= section.data().len() as u64); - let section_name = section.name(); - let base_address = memory_end; - base_address_for_section.insert(section.index(), base_address); - - memory_end += section.size(); - ro_data.push(DataRef::Section { - section_index: section.index(), - range: 0..section.data().len(), - }); - - ro_data_size += section.data().len() as u64; - let padding = section.size() - section.data().len() as u64; - if padding > 0 { - ro_data.push(DataRef::Padding(padding.try_into().expect("overflow"))) + if let Some(padding) = get_padding(*current_address, section.align()) { + *current_address += padding; + chunks.push(DataRef::Padding(padding as usize)); } - log::trace!( - "Found read-only section: '{}', original range = 0x{:x}..0x{:x} (relocated to: 0x{:x}..0x{:x}), size = 0x{:x}", - section_name, - section.original_address(), - section.original_address() + section.size(), - base_address, - base_address + section.size(), - section.size(), - ); - } - - { - let ro_data_size_unaligned = ro_data_size; - - assert_eq!(ro_data_address % u64::from(VM_PAGE_SIZE), 0); - ro_data_size = align_to_next_page_u64(u64::from(VM_PAGE_SIZE), ro_data_size) - .ok_or(ProgramFromElfError::other("out of range size for read-only sections"))?; - - memory_end += ro_data_size - ro_data_size_unaligned; - } - - assert_eq!(memory_end % u64::from(VM_PAGE_SIZE), 0); - memory_end = align_to_next_page_u64(u64::from(VM_MAX_PAGE_SIZE), memory_end).unwrap(); - - if ro_data_size > 0 { - // Add a guard page between read-only data and read-write data. - memory_end += u64::from(VM_MAX_PAGE_SIZE); - } - - let mut rw_data = Vec::new(); - let mut rw_data_size = 0; - let rw_data_address = memory_end; - for §ion_index in sections_rw_data { - let section = elf.section_by_index(section_index); - align_if_necessary(&mut memory_end, &mut rw_data_size, &mut rw_data, section); - let section_name = section.name(); - let base_address = memory_end; - base_address_for_section.insert(section.index(), memory_end); + let section_base_address = *current_address; + base_address_for_section.insert(section.index(), section_base_address); - memory_end += section.size(); - rw_data.push(DataRef::Section { + *current_address += section.size(); + chunks.push(DataRef::Section { section_index: section.index(), range: 0..section.data().len(), }); - rw_data_size += section.data().len() as u64; let padding = section.size() - section.data().len() as u64; if padding > 0 { - rw_data.push(DataRef::Padding(padding.try_into().expect("overflow"))) + chunks.push(DataRef::Padding(padding.try_into().expect("overflow"))) } log::trace!( - "Found read-write section: '{}', original range = 0x{:x}..0x{:x} (relocated to: 0x{:x}..0x{:x}), size = 0x{:x}", + "Found section: '{}', original range = 0x{:x}..0x{:x} (relocated to: 0x{:x}..0x{:x}), size = 0x{:x}/0x{:x}", section_name, section.original_address(), section.original_address() + section.size(), - base_address, - base_address + section.size(), + section_base_address, + section_base_address + section.size(), + section.data().len(), section.size(), ); } - let bss_explicit_address = { - let rw_data_size_unaligned = rw_data_size; - - assert_eq!(rw_data_address % u64::from(VM_PAGE_SIZE), 0); - rw_data_size = align_to_next_page_u64(u64::from(VM_PAGE_SIZE), rw_data_size) - .ok_or(ProgramFromElfError::other("out of range size for read-write sections"))?; - - memory_end + (rw_data_size - rw_data_size_unaligned) - }; - - for §ion_index in sections_bss { - let section = elf.section_by_index(section_index); - if let Some(padding) = get_padding(memory_end, section.align()) { - memory_end += padding; - } - - let section_name = section.name(); - let base_address = memory_end; - base_address_for_section.insert(section.index(), memory_end); + let size_in_memory: u64 = chunks.iter().map(|chunk| chunk.size() as u64).sum(); + while let Some(DataRef::Padding(..)) = chunks.last() { + chunks.pop(); + } - memory_end += section.size(); + *current_address = align_to_next_page_u64(u64::from(VM_MAX_PAGE_SIZE), *current_address).expect("overflow"); + // Add a guard page between this section and the next one. + *current_address += u64::from(VM_MAX_PAGE_SIZE); - log::trace!( - "Found BSS section: '{}', original range = 0x{:x}..0x{:x} (relocated to: 0x{:x}..0x{:x}), size = 0x{:x}", - section_name, - section.original_address(), - section.original_address() + section.size(), - base_address, - base_address + section.size(), - section.size(), - ); - } + size_in_memory +} - let mut bss_size = if memory_end > bss_explicit_address { - memory_end - bss_explicit_address - } else { - 0 - }; +#[allow(clippy::too_many_arguments)] +fn extract_memory_config( + elf: &Elf, + sections_ro_data: &[SectionIndex], + sections_rw_data: &[SectionIndex], + sections_bss: &[SectionIndex], + sections_min_stack_size: &[SectionIndex], + base_address_for_section: &mut HashMap, +) -> Result { + let mut current_address = u64::from(VM_MAX_PAGE_SIZE); - bss_size = align_to_next_page_u64(u64::from(VM_PAGE_SIZE), bss_size) - .ok_or(ProgramFromElfError::other("out of range size for BSS sections"))?; + let mut ro_data = Vec::new(); + let mut rw_data = Vec::new(); + let ro_data_address = current_address; + let ro_data_size = process_sections( + elf, + &mut current_address, + &mut ro_data, + base_address_for_section, + sections_ro_data.iter().copied(), + ); + let rw_data_address = current_address; + let rw_data_size = process_sections( + elf, + &mut current_address, + &mut rw_data, + base_address_for_section, + sections_rw_data.iter().copied().chain(sections_bss.iter().copied()), + ); - let mut stack_size = VM_PAGE_SIZE; + let mut min_stack_size = VM_MIN_PAGE_SIZE; for §ion_index in sections_min_stack_size { let section = elf.section_by_index(section_index); let data = section.data(); @@ -1125,24 +1082,25 @@ fn extract_memory_config( for xs in data.chunks_exact(4) { let value = u32::from_le_bytes([xs[0], xs[1], xs[2], xs[3]]); - stack_size = core::cmp::max(stack_size, value); + min_stack_size = core::cmp::max(min_stack_size, value); } } - let stack_size = - align_to_next_page_u32(VM_PAGE_SIZE, stack_size).ok_or(ProgramFromElfError::other("out of range size for the stack"))?; + let min_stack_size = + align_to_next_page_u32(VM_MIN_PAGE_SIZE, min_stack_size).ok_or(ProgramFromElfError::other("out of range size for the stack"))?; + + log::trace!("Configured minimum stack size: 0x{min_stack_size:x}"); - log::trace!("Configured stack size: 0x{stack_size:x}"); + let ro_data_size = u32::try_from(ro_data_size).expect("overflow"); + let rw_data_size = u32::try_from(rw_data_size).expect("overflow"); // Sanity check that the memory configuration is actually valid. { - let ro_data_size_physical: u64 = ro_data.iter().map(|x| x.size() as u64).sum(); let rw_data_size_physical: u64 = rw_data.iter().map(|x| x.size() as u64).sum(); - - assert!(ro_data_size_physical <= ro_data_size); + let rw_data_size_physical = u32::try_from(rw_data_size_physical).expect("overflow"); assert!(rw_data_size_physical <= rw_data_size); - let config = match GuestMemoryConfig::new(ro_data_size, rw_data_size, bss_size, u64::from(stack_size)) { + let config = match MemoryMap::new(VM_MAX_PAGE_SIZE, ro_data_size, rw_data_size, min_stack_size) { Ok(config) => config, Err(error) => { return Err(ProgramFromElfError::other(error)); @@ -1156,8 +1114,9 @@ fn extract_memory_config( let memory_config = MemoryConfig { ro_data, rw_data, - bss_size: bss_size as u32, - stack_size, + ro_data_size, + rw_data_size, + min_stack_size, }; Ok(memory_config) @@ -1474,15 +1433,14 @@ fn emit_minmax( mut emit: impl FnMut(InstExt), ) { // This is supposed to emit something like this: - // dst = src1 ? src2 - // tmp = (dst != 0) ? src1 : 0 - // dst = (dst == 0) ? src2 : 0 - // dst = dst | tmp + // tmp = src1 ? src2 + // dst = src1 + // dst = src2 if tmp == 0 assert_ne!(dst, tmp); - assert_ne!(Some(dst), src1); - assert_ne!(Some(dst), src2); + assert_ne!(Some(tmp), src1); assert_ne!(Some(tmp), src2); + assert_ne!(Some(dst), src2); let (cmp_src1, cmp_src2, cmp_kind) = match kind { MinMax::MinUnsigned => (src1, src2, AnyAnyKind::SetLessThanUnsigned), @@ -1493,36 +1451,28 @@ fn emit_minmax( emit(InstExt::Basic(BasicInst::AnyAny { kind: cmp_kind, - dst, + dst: tmp, src1: cmp_src1.map_or(RegImm::Imm(0), RegImm::Reg), src2: cmp_src2.map_or(RegImm::Imm(0), RegImm::Reg), })); + if let Some(src1) = src1 { - emit(InstExt::Basic(BasicInst::Cmov { - kind: CmovKind::NotEqZero, - dst: tmp, - src: src1, - cond: dst, - })); - } else { - emit(InstExt::Basic(BasicInst::LoadImmediate { dst: tmp, imm: 0 })); - } - if let Some(src2) = src2 { - emit(InstExt::Basic(BasicInst::Cmov { - kind: CmovKind::EqZero, + emit(InstExt::Basic(BasicInst::AnyAny { + kind: AnyAnyKind::Add, dst, - src: src2, - cond: dst, + src1: RegImm::Reg(src1), + src2: RegImm::Imm(0), })); } else { - emit(InstExt::Basic(BasicInst::LoadImmediate { dst, imm: 0 })); + emit(InstExt::Basic(BasicInst::LoadImmediate { dst: tmp, imm: 0 })); } - emit(InstExt::Basic(BasicInst::AnyAny { - kind: AnyAnyKind::Or, + + emit(InstExt::Basic(BasicInst::Cmov { + kind: CmovKind::EqZero, dst, - src1: dst.into(), - src2: tmp.into(), - })) + src: src2.map_or(RegImm::Imm(0), RegImm::Reg), + cond: tmp, + })); } fn convert_instruction( @@ -1727,20 +1677,24 @@ fn convert_instruction( "found a bare ecall instruction; those are not supported", )), Inst::Cmov { kind, dst, src, cond, .. } => { - let Some(dst) = cast_reg_non_zero(dst)? else { return Ok(()) }; + let Some(dst) = cast_reg_non_zero(dst)? else { + emit(InstExt::Basic(BasicInst::Nop)); + return Ok(()); + }; + let Some(cond) = cast_reg_non_zero(cond)? else { return Err(ProgramFromElfError::other( "found a conditional move with a zero register as the condition", )); }; - if let Some(src) = cast_reg_non_zero(src)? { - emit(InstExt::Basic(BasicInst::Cmov { kind, dst, src, cond })); - - Ok(()) - } else { - todo!(); - } + emit(InstExt::Basic(BasicInst::Cmov { + kind, + dst, + src: cast_reg_any(src)?, + cond, + })); + Ok(()) } Inst::LoadReserved { dst, src, .. } => { let Some(dst) = cast_reg_non_zero(dst)? else { @@ -1923,7 +1877,10 @@ fn parse_code_section( text[relative_offset + 3], ]); - if raw_inst == INSTRUCTION_ECALLI { + const FUNC3_ECALLI: u32 = 0b000; + const FUNC3_SBRK: u32 = 0b001; + + if crate::riscv::R(raw_inst).unpack() == (crate::riscv::OPCODE_CUSTOM_0, FUNC3_ECALLI, 0, RReg::Zero, RReg::Zero, RReg::Zero) { let initial_offset = relative_offset as u64; if relative_offset + 12 > text.len() { return Err(ProgramFromElfError::other("truncated ecalli instruction")); @@ -1989,6 +1946,31 @@ fn parse_code_section( continue; } + if let (crate::riscv::OPCODE_CUSTOM_0, FUNC3_SBRK, 0, dst, size, RReg::Zero) = crate::riscv::R(raw_inst).unpack() { + let Some(dst) = cast_reg_non_zero(dst)? else { + return Err(ProgramFromElfError::other( + "found an 'sbrk' instruction with the zero register as the destination", + )); + }; + + let Some(size) = cast_reg_non_zero(size)? else { + return Err(ProgramFromElfError::other( + "found an 'sbrk' instruction with the zero register as the size", + )); + }; + + output.push(( + Source { + section_index, + offset_range: (relative_offset as u64..relative_offset as u64 + 4).into(), + }, + InstExt::Basic(BasicInst::Sbrk { dst, size }), + )); + + relative_offset += 4; + continue; + } + let source = Source { section_index, offset_range: AddressRange::from(relative_offset as u64..relative_offset as u64 + 4), @@ -3351,6 +3333,21 @@ impl BlockRegs { }); } } + BasicInst::Cmov { + kind, + dst, + src: RegImm::Reg(src), + cond, + } => { + if let RegValue::Constant(src_value) = self.get_reg(src) { + return Some(BasicInst::Cmov { + kind, + dst, + src: RegImm::Imm(src_value as u32), + cond, + }); + } + } BasicInst::LoadIndirect { kind, dst, base, offset } => { if let RegValue::DataAddress(base) = self.get_reg(base) { return Some(BasicInst::LoadAbsolute { @@ -4192,33 +4189,60 @@ fn spill_fake_registers( let (_, instruction) = &block.ops[nth_instruction]; let mut operands = Vec::new(); - for (reg, is_dst) in instruction.operands(imports) { - if is_dst { - let value_index = counter; - counter += 1; - reg_to_value_index[reg as usize] = value_index; - operands.push(regalloc2::Operand::new( - regalloc2::VReg::new(value_index, regalloc2::RegClass::Int), - if reg.fake_register_index().is_none() { - regalloc2::OperandConstraint::FixedReg(regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int)) - } else { - regalloc2::OperandConstraint::Reg - }, - regalloc2::OperandKind::Def, - regalloc2::OperandPos::Late, - )); - } else { - let value_index = reg_to_value_index[reg as usize]; - operands.push(regalloc2::Operand::new( - regalloc2::VReg::new(value_index, regalloc2::RegClass::Int), - if reg.fake_register_index().is_none() { - regalloc2::OperandConstraint::FixedReg(regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int)) - } else { - regalloc2::OperandConstraint::Reg - }, - regalloc2::OperandKind::Use, - regalloc2::OperandPos::Early, - )); + for (reg, kind) in instruction.operands(imports) { + match kind { + OpKind::Write => { + let value_index = counter; + counter += 1; + reg_to_value_index[reg as usize] = value_index; + operands.push(regalloc2::Operand::new( + regalloc2::VReg::new(value_index, regalloc2::RegClass::Int), + if reg.fake_register_index().is_none() { + regalloc2::OperandConstraint::FixedReg(regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int)) + } else { + regalloc2::OperandConstraint::Reg + }, + regalloc2::OperandKind::Def, + regalloc2::OperandPos::Late, + )); + } + OpKind::Read => { + let value_index = reg_to_value_index[reg as usize]; + operands.push(regalloc2::Operand::new( + regalloc2::VReg::new(value_index, regalloc2::RegClass::Int), + if reg.fake_register_index().is_none() { + regalloc2::OperandConstraint::FixedReg(regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int)) + } else { + regalloc2::OperandConstraint::Reg + }, + regalloc2::OperandKind::Use, + regalloc2::OperandPos::Early, + )); + } + OpKind::ReadWrite => { + let value_index_read = reg_to_value_index[reg as usize]; + operands.push(regalloc2::Operand::new( + regalloc2::VReg::new(value_index_read, regalloc2::RegClass::Int), + if reg.fake_register_index().is_none() { + regalloc2::OperandConstraint::FixedReg(regalloc2::PReg::new(reg as usize, regalloc2::RegClass::Int)) + } else { + regalloc2::OperandConstraint::Reg + }, + regalloc2::OperandKind::Use, + regalloc2::OperandPos::Early, + )); + + let value_index_write = counter; + counter += 1; + + reg_to_value_index[reg as usize] = value_index_write; + operands.push(regalloc2::Operand::new( + regalloc2::VReg::new(value_index_write, regalloc2::RegClass::Int), + regalloc2::OperandConstraint::Reuse(operands.len() - 1), + regalloc2::OperandKind::Def, + regalloc2::OperandPos::Late, + )); + } } } @@ -4450,6 +4474,9 @@ fn replace_immediates_with_registers( BasicInst::StoreIndirect { src, .. } => { replace!(src); } + BasicInst::Cmov { src, .. } => { + replace!(src); + } _ => {} } @@ -5353,21 +5380,34 @@ fn emit_code( } } } - BasicInst::Cmov { kind, dst, src, cond } => { - codegen! { - args = (conv_reg(dst), conv_reg(src), conv_reg(cond)), - kind = kind, - { - CmovKind::EqZero => cmov_if_zero, - CmovKind::NotEqZero => cmov_if_not_zero, + BasicInst::Cmov { kind, dst, src, cond } => match src { + RegImm::Reg(src) => { + codegen! { + args = (conv_reg(dst), conv_reg(src), conv_reg(cond)), + kind = kind, + { + CmovKind::EqZero => cmov_if_zero, + CmovKind::NotEqZero => cmov_if_not_zero, + } } } - } + RegImm::Imm(imm) => { + codegen! { + args = (conv_reg(dst), conv_reg(cond), imm), + kind = kind, + { + CmovKind::EqZero => cmov_if_zero_imm, + CmovKind::NotEqZero => cmov_if_not_zero_imm, + } + } + } + }, BasicInst::Ecalli { nth_import } => { assert!(used_imports.contains(&nth_import)); let import = &imports[nth_import]; Instruction::ecalli(import.metadata.index.expect("internal error: no index was assigned to an ecall")) } + BasicInst::Sbrk { dst, size } => Instruction::sbrk(conv_reg(dst), conv_reg(size)), BasicInst::Nop => { if is_optimized { unreachable!("internal error: a nop instruction was not removed") @@ -6913,8 +6953,9 @@ pub fn program_from_elf(config: Config, data: &[u8]) -> Result u32 { + self.0 & 0b1111111 + } + + pub fn func3(self) -> u32 { + (self.0 >> 12) & 0b111 + } + + pub fn func7(self) -> u32 { + (self.0 >> 25) & 0b1111111 + } + + pub fn dst(self) -> Reg { + Reg::decode(self.0 >> 7) + } + + pub fn src1(self) -> Reg { + Reg::decode(self.0 >> 15) + } + + pub fn src2(self) -> Reg { + Reg::decode(self.0 >> 20) + } + + // This matches the order of the `.insn` described here: https://sourceware.org/binutils/docs-2.31/as/RISC_002dV_002dFormats.html + pub fn unpack(self) -> (u32, u32, u32, Reg, Reg, Reg) { + (self.opcode(), self.func3(), self.func7(), self.dst(), self.src1(), self.src2()) + } +} + impl Inst { pub fn decode(op: u32) -> Option { // This is mostly unofficial, but it's a defacto standard used by both LLVM and GCC. @@ -524,23 +561,6 @@ impl Inst { 0b0000001_00000_00000_110_00000_0000000 => RegRegKind::Rem, 0b0000001_00000_00000_111_00000_0000000 => RegRegKind::RemUnsigned, - 0b0000111_00000_00000_101_00000_0000000 => { - return Some(Inst::Cmov { - kind: CmovKind::EqZero, - dst, - src: src1, - cond: src2, - }); - } - 0b0000111_00000_00000_111_00000_0000000 => { - return Some(Inst::Cmov { - kind: CmovKind::NotEqZero, - dst, - src: src1, - cond: src2, - }); - } - _ => return None, }; @@ -621,6 +641,34 @@ impl Inst { } } } + 0b0001011 => { + let dst = Reg::decode(op >> 7); + let src1 = Reg::decode(op >> 15); + let src2 = Reg::decode(op >> 20); + let hi = op >> 25; + let lo = (op >> 12) & 0b111; + if lo == 0b001 { + if hi == 0b0100000 { + // th.mveqz + return Some(Inst::Cmov { + kind: CmovKind::EqZero, + dst, + src: src1, + cond: src2, + }); + } else if hi == 0b0100001 { + // th.mvnez + return Some(Inst::Cmov { + kind: CmovKind::NotEqZero, + dst, + src: src1, + cond: src2, + }); + } + } + + None + } _ => None, } } @@ -779,9 +827,15 @@ impl Inst { | (u32::from(acquire) << 26) | ((kind as u32) << 27), ), - Inst::Cmov { kind, dst, src, cond } => { - Some(0b0110011 | ((kind as u32) << 12) | ((dst as u32) << 7) | ((src as u32) << 15) | ((cond as u32) << 20) | (0b111 << 25)) - } + Inst::Cmov { kind, dst, src, cond } => Some( + 0b0001011 + | (0b001 << 12) + | ((dst as u32) << 7) + | ((src as u32) << 15) + | ((cond as u32) << 20) + | ((kind as u32) << 25) + | (1 << 30), + ), } } } @@ -859,22 +913,12 @@ fn test_decode_multiply() { #[test] fn test_decode_cmov() { assert_eq!( - Inst::decode(0xec5f5b3).unwrap(), + Inst::decode(0x42a6158b).unwrap(), Inst::Cmov { kind: CmovKind::NotEqZero, dst: Reg::A1, - src: Reg::A1, - cond: Reg::A2 - } - ); - - assert_eq!( - Inst::decode(0xec55533).unwrap(), - Inst::Cmov { - kind: CmovKind::EqZero, - dst: Reg::A0, - src: Reg::A0, - cond: Reg::A2 + src: Reg::A2, + cond: Reg::A0 } ); } diff --git a/crates/polkavm-zygote/Cargo.lock b/crates/polkavm-zygote/Cargo.lock index 6d8ac70b..b8234347 100644 --- a/crates/polkavm-zygote/Cargo.lock +++ b/crates/polkavm-zygote/Cargo.lock @@ -4,11 +4,11 @@ version = 3 [[package]] name = "polkavm-common" -version = "0.8.0" +version = "0.9.0" [[package]] name = "polkavm-linux-raw" -version = "0.8.0" +version = "0.9.0" [[package]] name = "polkavm-zygote" diff --git a/crates/polkavm-zygote/src/main.rs b/crates/polkavm-zygote/src/main.rs index 0d345a77..da98de72 100644 --- a/crates/polkavm-zygote/src/main.rs +++ b/crates/polkavm-zygote/src/main.rs @@ -5,15 +5,32 @@ use core::ptr::addr_of_mut; use core::sync::atomic::Ordering; use core::sync::atomic::{AtomicBool, AtomicUsize}; + +#[rustfmt::skip] use polkavm_common::{ - abi::{VM_ADDR_USER_MEMORY, VM_ADDR_USER_STACK_HIGH, VM_MAXIMUM_MEMORY_SIZE}, + abi::VM_ADDR_USER_STACK_HIGH, utils::align_to_next_page_usize, zygote::{ - AddressTableRaw, VmCtx as VmCtxInner, SANDBOX_EMPTY_NATIVE_PROGRAM_COUNTER, SANDBOX_EMPTY_NTH_INSTRUCTION, VMCTX_FUTEX_BUSY, - VMCTX_FUTEX_HOSTCALL, VMCTX_FUTEX_IDLE, VMCTX_FUTEX_INIT, VMCTX_FUTEX_TRAP, VM_ADDR_JUMP_TABLE, VM_ADDR_JUMP_TABLE_RETURN_TO_HOST, - VM_ADDR_NATIVE_CODE, VM_ADDR_SIGSTACK, VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION, VM_RPC_FLAG_RECONFIGURE, - VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION, VM_SANDBOX_MAXIMUM_JUMP_TABLE_VIRTUAL_SIZE, VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE, + AddressTableRaw, VmCtx as VmCtxInner, + SANDBOX_EMPTY_NATIVE_PROGRAM_COUNTER, + SANDBOX_EMPTY_NTH_INSTRUCTION, + VM_ADDR_JUMP_TABLE_RETURN_TO_HOST, + VM_ADDR_JUMP_TABLE, + VM_ADDR_NATIVE_CODE, + VM_ADDR_SIGSTACK, + VM_RPC_FLAG_RECONFIGURE, + VM_SANDBOX_MAXIMUM_JUMP_TABLE_SIZE, + VM_SANDBOX_MAXIMUM_JUMP_TABLE_VIRTUAL_SIZE, + VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE, + VMCTX_FUTEX_BUSY, + VMCTX_FUTEX_HOSTCALL, + VMCTX_FUTEX_IDLE, + VMCTX_FUTEX_INIT, + VMCTX_FUTEX_TRAP, }, + VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION, + VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION, + VM_RPC_FLAG_RESET_MEMORY_BEFORE_EXECUTION, }; use polkavm_linux_raw as linux_raw; @@ -222,6 +239,7 @@ unsafe extern "C" fn entry_point(stack: *mut usize) -> ! { static IN_SIGNAL_HANDLER: AtomicBool = AtomicBool::new(false); static NATIVE_PAGE_SIZE: AtomicUsize = AtomicUsize::new(!0); +static IS_PROGRAM_DIRTY: AtomicBool = AtomicBool::new(false); unsafe extern "C" fn signal_handler(signal: u32, _info: &linux_raw::siginfo_t, context: &linux_raw::ucontext) { if IN_SIGNAL_HANDLER.load(Ordering::Relaxed) || signal == linux_raw::SIGIO { @@ -276,7 +294,7 @@ unsafe extern "C" fn signal_handler(signal: u32, _info: &linux_raw::siginfo_t, c let user_code = VM_ADDR_NATIVE_CODE; #[allow(clippy::needless_borrow)] - if rip >= user_code && rip < user_code + (&*VMCTX.memory_config.get()).code_size() as u64 { + if rip >= user_code && rip < user_code + (&*VMCTX.memory_config.get()).code_size as u64 { signal_host(VMCTX_FUTEX_TRAP, SignalHostKind::Normal) .unwrap_or_else(|error| abort_with_error("failed to wait for the host process (trap)", error)); @@ -456,17 +474,8 @@ unsafe fn initialize(mut stack: *mut usize) -> linux_raw::Fd { // These are technically unnecessary, but let's do it anyway as a just-in-case // failsafe in case there's actually something in memory over there. - linux_raw::sys_munmap( - VM_ADDR_USER_MEMORY as *mut core::ffi::c_void, - align_to_next_page_usize(page_size, VM_MAXIMUM_MEMORY_SIZE as usize).unwrap_or_else(|| abort_with_message("overflow")), - ) - .unwrap_or_else(|error| abort_with_error("failed to make sure the user memory address space is unmapped", error)); - - linux_raw::sys_munmap( - VM_ADDR_NATIVE_CODE as *mut core::ffi::c_void, - align_to_next_page_usize(page_size, VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE as usize).unwrap_or_else(|| abort_with_message("overflow")), - ) - .unwrap_or_else(|error| abort_with_error("failed to make sure the native code address space is unmapped", error)); + linux_raw::sys_munmap(core::ptr::null_mut(), 0x200000000) + .unwrap_or_else(|error| abort_with_error("failed to make sure the address space is unmapped", error)); linux_raw::sys_munmap( VM_ADDR_JUMP_TABLE as *mut core::ffi::c_void, @@ -643,13 +652,25 @@ unsafe fn main_loop(socket: linux_raw::Fd) -> ! { let rpc_flags = *VMCTX.rpc_flags.get(); let rpc_address = *VMCTX.rpc_address.get().cast:: !>>(); + let rpc_sbrk = *VMCTX.rpc_sbrk.get(); if rpc_flags & VM_RPC_FLAG_RECONFIGURE != 0 { - reconfigure(socket.borrow()); + load_program(socket.borrow()); + } else if rpc_flags & VM_RPC_FLAG_RESET_MEMORY_BEFORE_EXECUTION != 0 { + reset_memory(); + } + + if rpc_sbrk > 0 { + let new_heap_top = *VMCTX.heap_info.heap_top.get() + rpc_sbrk as u64; + if syscall_sbrk(new_heap_top) == 0 { + abort_with_message("sbrk failed"); + } } if let Some(rpc_address) = rpc_address { trace!("jumping to: ", Hex(rpc_address as usize)); + + VMCTX.is_memory_dirty.store(true, Ordering::Relaxed); rpc_address(); } else { longjmp(addr_of_mut!(RESUME_IDLE_LOOP_JMPBUF), 1); @@ -658,19 +679,36 @@ unsafe fn main_loop(socket: linux_raw::Fd) -> ! { #[link_section = ".text_hot"] unsafe fn reset_memory() { + if !VMCTX.is_memory_dirty.load(Ordering::Relaxed) { + return; + } + trace!("resetting memory..."); - let current = &mut *VMCTX.memory_config.get(); - let heap_size = current.heap_size(); - if heap_size > 0 { + let cfg = &mut *VMCTX.memory_config.get(); + let rw_data_size = cfg.memory_map.rw_data_size(); + if rw_data_size > 0 { linux_raw::sys_madvise( - current.heap_address() as *mut core::ffi::c_void, - heap_size as usize, + cfg.memory_map.rw_data_address() as *mut core::ffi::c_void, + rw_data_size as usize, linux_raw::MADV_DONTNEED, ) - .unwrap_or_else(|error| abort_with_error("failed to clear user heap", error)); + .unwrap_or_else(|error| abort_with_error("failed to clear user read/write memory", error)); } - let stack_size = current.stack_size() as usize; + let initial_heap_threshold = u64::from(cfg.memory_map.rw_data_range().end); + let heap_top = *VMCTX.heap_info.heap_top.get(); + if heap_top > initial_heap_threshold { + linux_raw::sys_munmap( + initial_heap_threshold as *mut core::ffi::c_void, + heap_top as usize - initial_heap_threshold as usize, + ) + .unwrap_or_else(|error| abort_with_error("failed to unmap the heap", error)); + } + + *VMCTX.heap_info.heap_top.get() = u64::from(cfg.memory_map.heap_base()); + *VMCTX.heap_info.heap_threshold.get() = initial_heap_threshold; + + let stack_size = cfg.memory_map.stack_size() as usize; if stack_size > 0 { linux_raw::sys_madvise( (VM_ADDR_USER_STACK_HIGH as usize - stack_size) as *mut core::ffi::c_void, @@ -679,6 +717,8 @@ unsafe fn reset_memory() { ) .unwrap_or_else(|error| abort_with_error("failed to clear user stack", error)); } + + VMCTX.is_memory_dirty.store(false, Ordering::Relaxed); } #[inline(never)] @@ -687,11 +727,20 @@ pub unsafe extern "C" fn syscall_hostcall(hostcall: u32) { trace!("syscall: hostcall triggered"); *VMCTX.hostcall().get() = hostcall; - signal_host(VMCTX_FUTEX_HOSTCALL, SignalHostKind::Normal) - .unwrap_or_else(|error| abort_with_error("failed to wait for the host process (hostcall)", error)); - if *VMCTX.hostcall().get() == polkavm_common::zygote::HOSTCALL_ABORT_EXECUTION { - longjmp(addr_of_mut!(RESUME_IDLE_LOOP_JMPBUF), 1); + loop { + signal_host(VMCTX_FUTEX_HOSTCALL, SignalHostKind::Normal) + .unwrap_or_else(|error| abort_with_error("failed to wait for the host process (hostcall)", error)); + + match *VMCTX.hostcall().get() { + polkavm_common::zygote::HOSTCALL_ABORT_EXECUTION => longjmp(addr_of_mut!(RESUME_IDLE_LOOP_JMPBUF), 1), + polkavm_common::zygote::HOSTCALL_SBRK => { + let new_heap_top = *VMCTX.heap_info.heap_top.get() + *VMCTX.rpc_sbrk.get() as u64; + *VMCTX.rpc_sbrk.get() = syscall_sbrk(new_heap_top); + *VMCTX.hostcall().get() = hostcall; + } + _ => break, + } } } @@ -731,6 +780,42 @@ pub unsafe extern "C" fn syscall_trace(nth_instruction: u32, rip: u64) { } } +#[inline(never)] +#[no_mangle] +pub unsafe extern "C" fn syscall_sbrk(pending_heap_top: u64) -> u32 { + trace!("syscall: sbrk triggered"); + let memory_map = &(*VMCTX.memory_config.get()).memory_map; + if pending_heap_top > u64::from(memory_map.heap_base() + memory_map.max_heap_size()) { + return 0; + } + + let Some(start) = align_to_next_page_usize(memory_map.page_size() as usize, *VMCTX.heap_info.heap_top.get() as usize) else { + abort_with_message("unreachable") + }; + + let Some(end) = align_to_next_page_usize(memory_map.page_size() as usize, pending_heap_top as usize) else { + abort_with_message("unreachable") + }; + + let size = end - start; + if size > 0 { + linux_raw::sys_mmap( + start as *mut core::ffi::c_void, + end - start, + linux_raw::PROT_READ | linux_raw::PROT_WRITE, + linux_raw::MAP_FIXED | linux_raw::MAP_PRIVATE | linux_raw::MAP_ANONYMOUS, + None, + 0, + ) + .unwrap_or_else(|error| abort_with_error("failed to mmap sbrk increase", error)); + } + + *VMCTX.heap_info.heap_top.get() = pending_heap_top; + *VMCTX.heap_info.heap_threshold.get() = end as u64; + + pending_heap_top as u32 +} + #[link_section = ".address_table"] #[no_mangle] pub static ADDRESS_TABLE: AddressTableRaw = AddressTableRaw { @@ -738,6 +823,7 @@ pub static ADDRESS_TABLE: AddressTableRaw = AddressTableRaw { syscall_trap, syscall_return, syscall_trace, + syscall_sbrk, }; enum SignalHostKind { @@ -799,103 +885,120 @@ fn signal_host(futex_value_to_set: u32, kind: SignalHostKind) -> Result<(), linu #[cold] #[inline(never)] -unsafe fn reconfigure(socket: linux_raw::FdRef) { +unsafe fn load_program(socket: linux_raw::FdRef) { trace!("reconfiguring..."); if NATIVE_PAGE_SIZE.load(Ordering::Relaxed) == 0 { abort_with_message("assertion failed: native page size is zero"); } - let fd = linux_raw::recvfd(socket).unwrap_or_else(|_| abort_with_message("failed to receive reconfiguration fd")); + let fd = linux_raw::recvfd(socket).unwrap_or_else(|_| abort_with_message("failed to receive program memory fd")); clear_program(); + IS_PROGRAM_DIRTY.store(true, Ordering::Relaxed); - let current = &mut *VMCTX.memory_config.get(); - let new = *VMCTX.new_memory_config.get(); - if new.ro_data_size() + new.rw_data_size() > 0 { - if new.ro_data_size() > 0 { + let config = &mut *VMCTX.memory_config.get(); + if config.memory_map.ro_data_size() > 0 { + if config.ro_data_fd_size > 0 { linux_raw::sys_mmap( - new.ro_data_address() as *mut core::ffi::c_void, - new.ro_data_size() as usize, + config.memory_map.ro_data_address() as *mut core::ffi::c_void, + config.ro_data_fd_size as usize, linux_raw::PROT_READ, linux_raw::MAP_FIXED | linux_raw::MAP_PRIVATE, Some(fd.borrow()), 0, ) - .unwrap_or_else(|error| abort_with_error("failed to mmap user memory (ro data)", error)); - - trace!( - "new rodata range: ", - Hex(new.ro_data_address()), - "-", - Hex(new.ro_data_address() + new.ro_data_size()), - " (", - Hex(new.ro_data_size()), - ")" - ); - if let Err(error) = current.set_ro_data_size(new.ro_data_size()) { - abort_with_message(error); - } + .unwrap_or_else(|error| abort_with_error("failed to mmap read-only data", error)); } - if new.rw_data_size() > 0 { + if config.memory_map.ro_data_size() > config.ro_data_fd_size { linux_raw::sys_mmap( - new.rw_data_address() as *mut core::ffi::c_void, - new.rw_data_size() as usize, + (config.memory_map.ro_data_address() + config.ro_data_fd_size) as *mut core::ffi::c_void, + (config.memory_map.ro_data_size() - config.ro_data_fd_size) as usize, + linux_raw::PROT_READ, + linux_raw::MAP_FIXED | linux_raw::MAP_PRIVATE | linux_raw::MAP_ANONYMOUS, + None, + 0, + ) + .unwrap_or_else(|error| abort_with_error("failed to mmap read-only data (trailing zeros)", error)); + } + + trace!( + "new rodata range: ", + Hex(config.memory_map.ro_data_address()), + "-", + Hex(config.memory_map.ro_data_address() + config.memory_map.ro_data_size()), + " (", + Hex(config.memory_map.ro_data_size()), + ")" + ); + } + + if config.memory_map.rw_data_size() > 0 { + if config.rw_data_fd_size > 0 { + linux_raw::sys_mmap( + config.memory_map.rw_data_address() as *mut core::ffi::c_void, + config.rw_data_fd_size as usize, linux_raw::PROT_READ | linux_raw::PROT_WRITE, linux_raw::MAP_FIXED | linux_raw::MAP_PRIVATE, Some(fd.borrow()), - new.ro_data_size().into(), + u64::from(config.ro_data_fd_size), ) - .unwrap_or_else(|error| abort_with_error("failed to mmap user memory (rw data)", error)); - - trace!( - "new rwdata range: ", - Hex(new.rw_data_address()), - "-", - Hex(new.rw_data_address() + new.rw_data_size()), - " (", - Hex(new.rw_data_size()), - ")" - ); - if let Err(error) = current.set_rw_data_size(new.rw_data_size()) { - abort_with_message(error); - } + .unwrap_or_else(|error| abort_with_error("failed to mmap read-write data", error)); + } + + if config.memory_map.rw_data_size() > config.rw_data_fd_size { + linux_raw::sys_mmap( + (config.memory_map.rw_data_address() + config.rw_data_fd_size) as *mut core::ffi::c_void, + (config.memory_map.rw_data_size() - config.rw_data_fd_size) as usize, + linux_raw::PROT_READ | linux_raw::PROT_WRITE, + linux_raw::MAP_FIXED | linux_raw::MAP_PRIVATE | linux_raw::MAP_ANONYMOUS, + None, + 0, + ) + .unwrap_or_else(|error| abort_with_error("failed to mmap read-write data (trailing zeros)", error)); } + + trace!( + "new rwdata range: ", + Hex(config.memory_map.rw_data_address()), + "-", + Hex(config.memory_map.rw_data_address() + config.memory_map.rw_data_size()), + " (", + Hex(config.memory_map.rw_data_size()), + ")" + ); } - if new.code_size() > 0 { + if config.code_size > 0 { linux_raw::sys_mmap( VM_ADDR_NATIVE_CODE as *mut core::ffi::c_void, - new.code_size(), + config.code_size as usize, linux_raw::PROT_EXEC, linux_raw::MAP_FIXED | linux_raw::MAP_PRIVATE, Some(fd.borrow()), - (new.ro_data_size() + new.rw_data_size()).into(), + (config.ro_data_fd_size + config.rw_data_fd_size).into(), ) - .unwrap_or_else(|error| abort_with_error("failed to mmap user code", error)); + .unwrap_or_else(|error| abort_with_error("failed to mmap code", error)); trace!( "new code range: ", Hex(VM_ADDR_NATIVE_CODE), "-", - Hex(VM_ADDR_NATIVE_CODE + new.code_size() as u64), + Hex(VM_ADDR_NATIVE_CODE + config.code_size as u64), " (", - Hex(new.code_size()), + Hex(config.code_size), ")" ); - if let Err(error) = current.set_code_size(NATIVE_PAGE_SIZE.load(Ordering::Relaxed), new.code_size()) { - abort_with_message(error); - } } - if new.jump_table_size() > 0 { + if config.jump_table_size > 0 { linux_raw::sys_mmap( VM_ADDR_JUMP_TABLE as *mut core::ffi::c_void, - new.jump_table_size(), + config.jump_table_size as usize, linux_raw::PROT_READ, linux_raw::MAP_FIXED | linux_raw::MAP_PRIVATE, Some(fd.borrow()), - (new.ro_data_size() as usize + new.rw_data_size() as usize + new.code_size()) as linux_raw::c_ulong, + (config.ro_data_fd_size + config.rw_data_fd_size + config.code_size) as linux_raw::c_ulong, ) .unwrap_or_else(|error| abort_with_error("failed to mmap jump table", error)); @@ -903,104 +1006,67 @@ unsafe fn reconfigure(socket: linux_raw::FdRef) { "new jump table range: ", Hex(VM_ADDR_JUMP_TABLE), "-", - Hex(VM_ADDR_JUMP_TABLE + new.jump_table_size() as u64), + Hex(VM_ADDR_JUMP_TABLE + config.jump_table_size as u64), " (", - Hex(new.jump_table_size()), + Hex(config.jump_table_size), ")" ); - if let Err(error) = current.set_jump_table_size(NATIVE_PAGE_SIZE.load(Ordering::Relaxed), new.jump_table_size()) { - abort_with_message(error); - } } fd.close() - .unwrap_or_else(|error| abort_with_error("failed to close user memory fd", error)); + .unwrap_or_else(|error| abort_with_error("failed to close program memory fd", error)); - if new.bss_size() > 0 { + if config.memory_map.stack_size() > 0 { linux_raw::sys_mmap( - new.bss_address() as *mut core::ffi::c_void, - new.bss_size() as usize, + config.memory_map.stack_address_low() as *mut core::ffi::c_void, + config.memory_map.stack_size() as usize, linux_raw::PROT_READ | linux_raw::PROT_WRITE, linux_raw::MAP_FIXED | linux_raw::MAP_PRIVATE | linux_raw::MAP_ANONYMOUS, None, 0, ) - .unwrap_or_else(|error| abort_with_error("failed to mmap user memory (bss)", error)); - - trace!( - "new bss range: ", - Hex(new.bss_address()), - "-", - Hex(new.bss_address() + new.bss_size()), - " (", - Hex(new.bss_size()), - ")" - ); - if let Err(error) = current.set_bss_size(new.bss_size()) { - abort_with_message(error); - } - } - - if new.stack_size() > 0 { - linux_raw::sys_mmap( - new.stack_address_low() as *mut core::ffi::c_void, - new.stack_size() as usize, - linux_raw::PROT_READ | linux_raw::PROT_WRITE, - linux_raw::MAP_FIXED | linux_raw::MAP_PRIVATE | linux_raw::MAP_ANONYMOUS, - None, - 0, - ) - .unwrap_or_else(|error| abort_with_error("failed to mmap user memory (stack)", error)); + .unwrap_or_else(|error| abort_with_error("failed to mmap stack", error)); trace!( "new stack range: ", - Hex(new.stack_address_low()), + Hex(config.memory_map.stack_address_low()), "-", - Hex(new.stack_address_low() + new.stack_size()), + Hex(config.memory_map.stack_address_low() + config.memory_map.stack_size()), " (", - Hex(new.stack_size()), + Hex(config.memory_map.stack_size()), ")" ); - if let Err(error) = current.set_stack_size(new.stack_size()) { - abort_with_message(error); - } - } - - if *current != new { - // This should never happen, but let's check it just in case. - abort_with_message("internal error: failed to fully update memory configuration"); } - let sysreturn = *VMCTX.new_sysreturn_address.get() as usize; trace!( "new sysreturn address: ", - Hex(sysreturn), + Hex(config.sysreturn_address), " (set at ", Hex(VM_ADDR_JUMP_TABLE_RETURN_TO_HOST), ")" ); - *(VM_ADDR_JUMP_TABLE_RETURN_TO_HOST as *mut usize) = sysreturn; + *(VM_ADDR_JUMP_TABLE_RETURN_TO_HOST as *mut u64) = config.sysreturn_address; + VMCTX.is_memory_dirty.store(false, Ordering::Relaxed); } #[inline(never)] unsafe fn clear_program() { - let current = &mut *VMCTX.memory_config.get(); - if current.user_memory_region_size() > 0 || current.stack_size() > 0 || current.code_size() > 0 { - polkavm_common::static_assert!(VM_ADDR_NATIVE_CODE + (VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE as u64) < 0x200000000); - linux_raw::sys_munmap(core::ptr::null_mut(), 0x200000000) - .unwrap_or_else(|error| abort_with_error("failed to unmap user accessible memory", error)); - - current.clear_user_memory_sizes(); - current.clear_stack_size(); - current.clear_code_size(); + if !IS_PROGRAM_DIRTY.load(Ordering::Relaxed) { + return; } - if current.jump_table_size() > 0 { - linux_raw::sys_munmap(VM_ADDR_JUMP_TABLE as *mut core::ffi::c_void, current.jump_table_size()) - .unwrap_or_else(|error| abort_with_error("failed to unmap jump table", error)); + polkavm_common::static_assert!(VM_ADDR_NATIVE_CODE + (VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE as u64) < 0x200000000); - current.clear_jump_table_size(); - } + linux_raw::sys_munmap(core::ptr::null_mut(), 0x200000000) + .unwrap_or_else(|error| abort_with_error("failed to unmap user accessible memory", error)); + + linux_raw::sys_munmap( + VM_ADDR_JUMP_TABLE as *mut core::ffi::c_void, + VM_SANDBOX_MAXIMUM_JUMP_TABLE_SIZE as usize, + ) + .unwrap_or_else(|error| abort_with_error("failed to unmap jump table", error)); - *(VM_ADDR_JUMP_TABLE_RETURN_TO_HOST as *mut usize) = 0; + *(VM_ADDR_JUMP_TABLE_RETURN_TO_HOST as *mut u64) = 0; + VMCTX.is_memory_dirty.store(false, Ordering::Relaxed); + IS_PROGRAM_DIRTY.store(false, Ordering::Relaxed); } diff --git a/crates/polkavm/src/api.rs b/crates/polkavm/src/api.rs index 972cd335..4568029c 100644 --- a/crates/polkavm/src/api.rs +++ b/crates/polkavm/src/api.rs @@ -4,10 +4,9 @@ use std::sync::{Arc, Mutex}; use core::marker::PhantomData; -use polkavm_common::abi::{GuestMemoryConfig, VM_MAXIMUM_EXPORT_COUNT, VM_MAXIMUM_IMPORT_COUNT, VM_MAXIMUM_INSTRUCTION_COUNT}; +use polkavm_common::abi::{MemoryMap, VM_MAXIMUM_EXPORT_COUNT, VM_MAXIMUM_IMPORT_COUNT, VM_MAXIMUM_INSTRUCTION_COUNT}; use polkavm_common::abi::{VM_ADDR_RETURN_TO_HOST, VM_ADDR_USER_STACK_HIGH}; use polkavm_common::error::Trap; -use polkavm_common::init::GuestProgramInit; use polkavm_common::program::{FrameKind, Instruction, InstructionVisitor, Reg}; use polkavm_common::program::{ProgramBlob, ProgramExport, ProgramImport, ProgramSymbol}; use polkavm_common::utils::{Access, AsUninitSliceMut, Gas}; @@ -17,11 +16,12 @@ use crate::config::{BackendKind, Config, GasMeteringKind, ModuleConfig, SandboxK use crate::error::{bail, bail_static, Error, ExecutionError}; use crate::interpreter::{InterpretedAccess, InterpretedInstance, InterpretedModule}; use crate::tracer::Tracer; +use crate::utils::GuestInit; if_compiler_is_supported! { - use crate::sandbox::Sandbox; + use crate::sandbox::{Sandbox, SandboxInstance}; use crate::sandbox::generic::Sandbox as SandboxGeneric; - use crate::compiler::{CompiledInstance, CompiledModule}; + use crate::compiler::CompiledModule; #[cfg(target_os = "linux")] use crate::sandbox::linux::Sandbox as SandboxLinux; @@ -29,154 +29,15 @@ if_compiler_is_supported! { pub type RegValue = u32; -pub(crate) type OnHostcall<'a> = &'a mut dyn for<'r> FnMut(u32, BackendAccess<'r>) -> Result<(), Trap>; - if_compiler_is_supported! { { - use core::sync::atomic::{AtomicUsize, Ordering}; - - pub(crate) trait SandboxExt: Sandbox { - fn as_compiled_module(module: &Module) -> &CompiledModule; - fn as_sandbox_vec(sandbox_vec: &SandboxVec) -> &Mutex>; - fn reuse_or_spawn_sandbox(engine_state: &EngineState, module: &Module) -> Result { - use crate::sandbox::SandboxConfig; - - let mut sandbox_config = Self::Config::default(); - sandbox_config.enable_logger(cfg!(test) || module.is_debug_trace_execution_enabled()); - - if let Some(sandbox) = engine_state.reuse_sandbox::() { - Ok(sandbox) - } else { - Self::spawn(&sandbox_config) - .map_err(Error::from_display) - .map_err(|error| error.context("instantiation failed: failed to create a sandbox")) - } - } - - fn recycle_sandbox(engine_state: &EngineState, get_sandbox: impl FnOnce() -> Option) { - let Some(sandbox_cache) = engine_state.sandbox_cache.as_ref() else { return }; - let sandboxes = Self::as_sandbox_vec(&sandbox_cache.sandboxes); - - let mut count = sandbox_cache.available_workers.load(Ordering::Relaxed); - if count >= sandbox_cache.worker_limit { - return; - } - - loop { - if let Err(new_count) = sandbox_cache.available_workers.compare_exchange(count, count + 1, Ordering::Relaxed, Ordering::Relaxed) { - if new_count >= sandbox_cache.worker_limit { - return; - } - - count = new_count; - continue; - } - - break; - } - - if let Some(sandbox) = get_sandbox() { - let mut sandboxes = match sandboxes.lock() { - Ok(sandboxes) => sandboxes, - Err(poison) => poison.into_inner(), - }; - - sandboxes.push(sandbox); - } else { - sandbox_cache.available_workers.fetch_sub(1, Ordering::Relaxed); - } - } - } - - #[cfg(target_os = "linux")] - impl SandboxExt for SandboxLinux { - fn as_compiled_module(module: &Module) -> &CompiledModule { - match module.0.compiled_module { - CompiledModuleKind::Linux(ref module) => module, - _ => unreachable!(), - } - } - - #[allow(clippy::match_wildcard_for_single_variants)] - fn as_sandbox_vec(sandbox_vec: &SandboxVec) -> &Mutex> { - match sandbox_vec { - SandboxVec::Linux(vec) => vec, - _ => unreachable!(), - } - } - } - - impl SandboxExt for SandboxGeneric { - fn as_compiled_module(module: &Module) -> &CompiledModule { - match module.0.compiled_module { - CompiledModuleKind::Generic(ref module) => module, - _ => unreachable!(), - } - } - - fn as_sandbox_vec(sandbox_vec: &SandboxVec) -> &Mutex> { - match sandbox_vec { - SandboxVec::Generic(vec) => vec, - #[cfg(target_os = "linux")] - SandboxVec::Linux(..) => unreachable!(), - } - } - } - - pub(crate) enum SandboxVec { - #[cfg(target_os = "linux")] - Linux(Mutex>), - Generic(Mutex>), - } - - struct SandboxCache { - sandboxes: SandboxVec, - available_workers: AtomicUsize, - worker_limit: usize, - } - impl EngineState { - fn reuse_sandbox(&self) -> Option where S: SandboxExt { - let sandbox_cache = self.sandbox_cache.as_ref()?; - if sandbox_cache.available_workers.load(Ordering::Relaxed) == 0 { - return None; - } - - let sandboxes = S::as_sandbox_vec(&sandbox_cache.sandboxes); - let mut sandboxes = match sandboxes.lock() { - Ok(sandboxes) => sandboxes, - Err(poison) => poison.into_inner(), - }; - - let mut sandbox = sandboxes.pop()?; - sandbox_cache.available_workers.fetch_sub(1, Ordering::Relaxed); - - if let Err(error) = sandbox.sync() { - log::warn!("Failed to reuse a sandbox: {error}"); - None - } else { - Some(sandbox) - } + pub(crate) fn sandbox_cache(&self) -> Option<&SandboxCache> { + self.sandbox_cache.as_ref() } } - fn spawn_sandboxes(count: usize, debug_trace_execution: bool) -> Result, Error> where S: Sandbox { - use crate::sandbox::SandboxConfig; - - let mut sandbox_config = S::Config::default(); - sandbox_config.enable_logger(cfg!(test) || debug_trace_execution); - - let mut sandboxes = Vec::with_capacity(count); - for nth in 0..count { - let sandbox = S::spawn(&sandbox_config) - .map_err(crate::Error::from_display) - .map_err(|error| error.context(format!("failed to create a worker process ({} out of {})", nth + 1, count)))?; - - sandboxes.push(sandbox); - } - - Ok(sandboxes) - } + use crate::sandbox::SandboxCache; } else { struct SandboxCache; } @@ -238,27 +99,7 @@ impl Engine { bail!("cannot use the '{selected_sandbox}' sandbox: this sandbox is not secure yet, and `set_allow_insecure`/`POLKAVM_ALLOW_INSECURE` is not enabled"); } - let sandboxes = match selected_sandbox { - SandboxKind::Linux => { - #[cfg(target_os = "linux")] - { - SandboxVec::Linux(Mutex::new(spawn_sandboxes(config.worker_count, debug_trace_execution)?)) - } - - #[cfg(not(target_os = "linux"))] - { - unreachable!() - } - }, - SandboxKind::Generic => SandboxVec::Generic(Mutex::new(spawn_sandboxes(config.worker_count, debug_trace_execution)?)), - }; - - let sandbox_cache = SandboxCache { - sandboxes, - available_workers: AtomicUsize::new(config.worker_count), - worker_limit: config.worker_count, - }; - + let sandbox_cache = SandboxCache::new(selected_sandbox, config.worker_count, debug_trace_execution)?; (Some(selected_sandbox), Some(sandbox_cache)) } else { Default::default() @@ -312,7 +153,7 @@ struct ModulePrivate { blob: ProgramBlob<'static>, compiled_module: CompiledModuleKind, interpreted_module: Option, - memory_config: GuestMemoryConfig, + memory_map: MemoryMap, gas_metering: Option, } @@ -330,7 +171,7 @@ pub(crate) trait BackendModule: Sized { exports: &'a [ProgramExport], basic_block_by_jump_table_index: &'a [u32], jump_table_index_by_basic_block: &'a [u32], - init: GuestProgramInit<'a>, + init: GuestInit<'a>, instruction_count: usize, basic_block_count: usize, debug_trace_execution: bool, @@ -491,6 +332,13 @@ where Ok(()) } + #[inline(always)] + fn sbrk(&mut self, d: Reg, s: Reg) -> Self::ReturnTy { + self.0.before_instruction(); + self.0.sbrk(d, s); + Ok(()) + } + #[inline(always)] fn ecalli(&mut self, imm: u32) -> Self::ReturnTy { if self.imports.get(imm as usize).is_none() { @@ -766,6 +614,20 @@ where Ok(()) } + #[inline(always)] + fn cmov_if_zero_imm(&mut self, d: Reg, c: Reg, s: u32) -> Self::ReturnTy { + self.0.before_instruction(); + self.0.cmov_if_zero_imm(d, c, s); + Ok(()) + } + + #[inline(always)] + fn cmov_if_not_zero_imm(&mut self, d: Reg, c: Reg, s: u32) -> Self::ReturnTy { + self.0.before_instruction(); + self.0.cmov_if_not_zero_imm(d, c, s); + Ok(()) + } + #[inline(always)] fn add_imm(&mut self, d: Reg, s: Reg, imm: u32) -> Self::ReturnTy { self.0.before_instruction(); @@ -1126,10 +988,6 @@ impl Module { self.0.basic_block_by_jump_table_index.get(jump_table_index as usize).copied() } - pub(crate) fn memory_config(&self) -> &GuestMemoryConfig { - &self.0.memory_config - } - pub(crate) fn gas_metering(&self) -> Option { self.0.gas_metering } @@ -1151,13 +1009,7 @@ impl Module { log::debug!("Preparing a module from a blob of length {}...", blob.as_bytes().len()); // Do an early check for memory config validity. - GuestMemoryConfig::new( - blob.ro_data().len() as u64, - blob.rw_data().len() as u64, - u64::from(blob.bss_size()), - u64::from(blob.stack_size()), - ) - .map_err(Error::from_static_str)?; + MemoryMap::new(config.page_size, blob.ro_data_size(), blob.rw_data_size(), blob.stack_size()).map_err(Error::from_static_str)?; let imports = { log::trace!("Parsing imports..."); @@ -1229,11 +1081,14 @@ impl Module { (maximum_export_jump_target, exports) }; - let init = GuestProgramInit::new() - .with_ro_data(blob.ro_data()) - .with_rw_data(blob.rw_data()) - .with_bss(blob.bss_size()) - .with_stack(blob.stack_size()); + let init = GuestInit { + page_size: config.page_size, + ro_data: blob.ro_data(), + rw_data: blob.rw_data(), + ro_data_size: blob.ro_data_size(), + rw_data_size: blob.rw_data_size(), + stack_size: blob.stack_size(), + }; macro_rules! new_common { () => {{ @@ -1439,21 +1294,27 @@ impl Module { let exports = exports.into_iter().map(|export| export.into_owned()).collect(); let imports = imports.into_iter().map(|import| import.into_owned()).collect(); - let memory_config = init.memory_config().map_err(Error::from_static_str)?; + let memory_map = init.memory_map().map_err(Error::from_static_str)?; log::debug!( - " Memory map: RO data: 0x{:08x}..0x{:08x}", - memory_config.ro_data_range().start, - memory_config.ro_data_range().end + " Memory map: RO data: 0x{:08x}..0x{:08x} ({}/{} bytes)", + memory_map.ro_data_range().start, + memory_map.ro_data_range().end, + blob.ro_data_size(), + memory_map.ro_data_range().len(), ); log::debug!( - " Memory map: Heap: 0x{:08x}..0x{:08x}", - memory_config.heap_range().start, - memory_config.heap_range().end + " Memory map: RW data: 0x{:08x}..0x{:08x} ({}/{} bytes)", + memory_map.rw_data_range().start, + memory_map.rw_data_range().end, + blob.rw_data_size(), + memory_map.rw_data_range().len(), ); log::debug!( - " Memory map: Stack: 0x{:08x}..0x{:08x}", - memory_config.stack_range().start, - memory_config.stack_range().end + " Memory map: Stack: 0x{:08x}..0x{:08x} ({}/{} bytes)", + memory_map.stack_range().start, + memory_map.stack_range().end, + blob.stack_size(), + memory_map.stack_range().len(), ); Ok(Module(Arc::new(ModulePrivate { @@ -1470,19 +1331,21 @@ impl Module { blob: blob.clone().into_owned(), compiled_module, interpreted_module, - memory_config, + memory_map, gas_metering: config.gas_metering, }))) } - /// The address at where the program's stack starts inside of the VM. - pub fn stack_address_low(&self) -> u32 { - self.0.memory_config.stack_address_low() + /// The program's memory map. + pub fn memory_map(&self) -> &MemoryMap { + &self.0.memory_map } - /// The address at where the program's stack ends inside of the VM. - pub fn stack_address_high(&self) -> u32 { - self.0.memory_config.stack_address_high() + /// Searches for a given symbol exported by the module. + pub fn lookup_export(&self, symbol: impl AsRef<[u8]>) -> Option { + let symbol = symbol.as_ref(); + let export_index = *self.0.export_index_by_symbol.get(symbol)?; + Some(ExportIndex(export_index)) } /// The raw machine code of the compiled module. @@ -2097,11 +1960,11 @@ impl InstancePre { match compiled_module { #[cfg(target_os = "linux")] CompiledModuleKind::Linux(..) => { - let compiled_instance = CompiledInstance::new(Arc::clone(&self.0.engine_state), self.0.module.clone())?; + let compiled_instance = SandboxInstance::::spawn_and_load_module(Arc::clone(&self.0.engine_state), &self.0.module)?; Some(InstanceBackend::CompiledLinux(compiled_instance)) }, CompiledModuleKind::Generic(..) => { - let compiled_instance = CompiledInstance::new(Arc::clone(&self.0.engine_state), self.0.module.clone())?; + let compiled_instance = SandboxInstance::::spawn_and_load_module(Arc::clone(&self.0.engine_state), &self.0.module)?; Some(InstanceBackend::CompiledGeneric(compiled_instance)) }, CompiledModuleKind::Unavailable => None @@ -2115,14 +1978,11 @@ impl InstancePre { let backend = match backend { Some(backend) => backend, - None => { - let interpreted_instance = InterpretedInstance::new(self.0.module.clone())?; - InstanceBackend::Interpreted(interpreted_instance) - } + None => InstanceBackend::Interpreted(InterpretedInstance::new_from_module(&self.0.module)?), }; let tracer = if self.0.module.0.debug_trace_execution { - Some(Tracer::new(self.0.module.clone())) + Some(Tracer::new(&self.0.module)) } else { None }; @@ -2141,8 +2001,8 @@ if_compiler_is_supported! { { enum InstanceBackend { #[cfg(target_os = "linux")] - CompiledLinux(CompiledInstance), - CompiledGeneric(CompiledInstance), + CompiledLinux(SandboxInstance), + CompiledGeneric(SandboxInstance), Interpreted(InterpretedInstance), } } else { @@ -2153,18 +2013,18 @@ if_compiler_is_supported! { } impl InstanceBackend { - fn call(&mut self, export_index: usize, on_hostcall: OnHostcall, config: &ExecutionConfig) -> Result<(), ExecutionError> { + fn execute(&mut self, args: ExecuteArgs) -> Result<(), ExecutionError> { if_compiler_is_supported! { { match self { #[cfg(target_os = "linux")] - InstanceBackend::CompiledLinux(ref mut backend) => backend.call(export_index, on_hostcall, config), - InstanceBackend::CompiledGeneric(ref mut backend) => backend.call(export_index, on_hostcall, config), - InstanceBackend::Interpreted(ref mut backend) => backend.call(export_index, on_hostcall, config), + InstanceBackend::CompiledLinux(ref mut backend) => backend.execute(args), + InstanceBackend::CompiledGeneric(ref mut backend) => backend.execute(args), + InstanceBackend::Interpreted(ref mut backend) => backend.execute(args), } } else { match self { - InstanceBackend::Interpreted(ref mut backend) => backend.call(export_index, on_hostcall, config), + InstanceBackend::Interpreted(ref mut backend) => backend.execute(args), } } } @@ -2298,6 +2158,14 @@ impl<'a> Access<'a> for BackendAccess<'a> { access_backend!(self, |access| Ok(access.write_memory(address, data).map_err(map_access_error)?)) } + fn sbrk(&mut self, size: u32) -> Option { + access_backend!(self, |access| access.sbrk(size)) + } + + fn heap_size(&self) -> u32 { + access_backend!(self, |access| access.heap_size()) + } + fn program_counter(&self) -> Option { access_backend!(self, |access| access.program_counter()) } @@ -2340,35 +2208,218 @@ impl Clone for Instance { } impl Instance { - /// Returns a handle to a function of a given symbol exported by the module. - pub fn get_func(&self, symbol: impl AsRef<[u8]>) -> Option> { - let symbol = symbol.as_ref(); - let export_index = *self.0.instance_pre.0.module.0.export_index_by_symbol.get(symbol)?; - Some(Func { - instance: self.clone(), - export_index, - }) + /// Returns the module from which this instance was created. + pub fn module(&self) -> &Module { + &self.0.instance_pre.0.module } - /// Returns a handle to a function of a given symbol exported by the module. - pub fn get_typed_func(&self, symbol: impl AsRef<[u8]>) -> Result, Error> + /// Updates the state of the instance according to the `state_args` and calls a given function. + pub fn call(&self, state_args: StateArgs, call_args: CallArgs) -> Result<(), ExecutionError> { + self.execute(state_args, Some(call_args)) + } + + /// A conveniance function to call into this particular instance according to the default ABI. + /// + /// This is equivalent to calling [`Instance::call`] with an appropriately set up [`CallArgs`]. + pub fn call_typed( + &self, + user_data: &mut T, + symbol: impl AsRef<[u8]>, + args: FnArgs, + ) -> Result where FnArgs: FuncArgs, FnResult: FuncResult, { let symbol = symbol.as_ref(); - let Some(&export_index) = self.0.instance_pre.0.module.0.export_index_by_symbol.get(symbol) else { - return Err(Error::from(format!( - "failed to acquire a typed function handle: no such function is exported: {}", - ProgramSymbol::from(symbol) - ))); + let Some(export_index) = self.module().lookup_export(symbol) else { + return Err(ExecutionError::Error( + format!( + "failed to call function {}: the module contains no such export", + ProgramSymbol::new(symbol.into()) + ) + .into(), + )); }; - Ok(TypedFunc { - instance: self.clone(), - export_index, - _phantom: PhantomData, - }) + let mut call_args = CallArgs::new(user_data, export_index); + call_args.args_typed::(args); + + self.call(Default::default(), call_args)?; + Ok(self.get_result_typed::()) + } + + /// Updates the state of this particular instance. + pub fn update_state(&self, state_args: StateArgs) -> Result<(), ExecutionError> { + self.execute(state_args, None) + } + + /// A conveniance function to reset the instance's memory to its initial state from when it was first instantiated. + /// + /// This is equivalent to calling [`Instance::update_state`] with an appropriately set up [`StateArgs`]. + pub fn reset_memory(&self) -> Result<(), Error> { + let mut args = StateArgs::new(); + args.reset_memory(true); + self.update_state(args).map_err(Error::from_execution_error) + } + + /// A conveniance function to increase the size of the program's heap by a given number of bytes, allocating memory if necessary. + /// + /// If successful returns a pointer to the end of the guest's heap. + /// + /// This is equivalent to manually checking that the `size` bytes can actually be allocated, calling [`Instance::sbrk`] with an appropriately set up [`StateArgs`], + /// and calculating the new address of the end of the guest's heap. + pub fn sbrk(&self, size: u32) -> Result, Error> { + let mut mutable = match self.0.mutable.lock() { + Ok(mutable) => mutable, + Err(poison) => poison.into_inner(), + }; + + let Some(new_size) = mutable.backend.access().heap_size().checked_add(size) else { + return Ok(None); + }; + + if new_size > self.module().memory_map().max_heap_size() { + return Ok(None); + }; + + let mut args = StateArgs::new(); + args.sbrk(size); + self.execute_impl(&mut mutable, args, None).map_err(Error::from_execution_error)?; + + debug_assert_eq!(mutable.backend.access().heap_size(), new_size); + Ok(Some(self.module().memory_map().heap_base() + new_size)) + } + + fn execute(&self, state_args: StateArgs, call_args: Option>) -> Result<(), ExecutionError> { + let mutable = &self.0.mutable; + let mut mutable = match mutable.lock() { + Ok(mutable) => mutable, + Err(poison) => poison.into_inner(), + }; + + self.execute_impl(&mut mutable, state_args, call_args) + } + + fn execute_impl( + &self, + mutable: &mut InstancePrivateMut, + state_args: StateArgs, + mut call_args: Option>, + ) -> Result<(), ExecutionError> { + use polkavm_common::{VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION, VM_RPC_FLAG_RESET_MEMORY_BEFORE_EXECUTION}; + + let instance_pre = &self.0.instance_pre; + let module = &instance_pre.0.module; + + if state_args.sbrk > 0 { + let current_size = if state_args.reset_memory { + 0 + } else { + mutable.backend.access().heap_size() + }; + + let new_size = current_size.checked_add(state_args.sbrk); + if !new_size.map_or(false, |new_size| new_size <= module.memory_map().max_heap_size()) { + return Err(ExecutionError::Error(Error::from_static_str( + "execution failed: cannot grow the heap over the maximum", + ))); + } + } + + let mut args = ExecuteArgs::new(); + if state_args.reset_memory { + args.flags |= VM_RPC_FLAG_RESET_MEMORY_BEFORE_EXECUTION; + } + + args.gas = state_args.gas; + args.sbrk = state_args.sbrk; + + let (result, export) = if let Some(call_args) = call_args.as_mut() { + let Some(export) = module.0.exports.get(call_args.export_index) else { + return Err(ExecutionError::Error( + format!( + "failed to call export #{}: out of range index; the module doesn't contain this many exports", + call_args.export_index + ) + .into(), + )); + }; + + args.entry_point = Some(call_args.export_index); + args.regs = Some(&call_args.initial_regs); + if call_args.reset_memory_after_call { + args.flags |= VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION; + } + + log::trace!( + "Calling into {}... (gas limit = {:?})", + export.symbol(), + module.0.gas_metering.and(args.gas) + ); + + if let Some(ref mut tracer) = mutable.tracer() { + tracer.on_before_execute(&args); + } + + let result = { + let mut on_hostcall = on_hostcall( + call_args.user_data, + &instance_pre.0.host_functions, + &instance_pre.0.module.0.imports, + instance_pre.0.fallback_handler.as_ref(), + &mut mutable.raw, + ); + + args.hostcall_handler = Some(&mut on_hostcall); + mutable.backend.execute(args) + }; + + (result, Some(export)) + } else { + log::trace!("Updating state..."); + + if let Some(ref mut tracer) = mutable.tracer() { + tracer.on_before_execute(&args); + } + + let result = mutable.backend.execute(args); + (result, None) + }; + + if let Some(ref mut tracer) = mutable.tracer() { + tracer.on_after_execute(); + } + + match result { + Ok(()) => { + log::trace!( + "...execution finished: success, leftover gas = {:?}", + mutable.backend.access().gas_remaining() + ); + } + Err(ExecutionError::Error(error)) => { + log::trace!("...execution finished: error: {error}"); + + if let Some(export) = export { + return Err(ExecutionError::Error( + format!("failed to call function {}: {}", export.symbol(), error).into(), + )); + } else { + return Err(ExecutionError::Error(format!("execution failed: {error}").into())); + } + } + Err(ExecutionError::Trap(trap)) => { + log::trace!("...execution finished: trapped"); + return Err(ExecutionError::Trap(trap)); + } + Err(ExecutionError::OutOfGas) => { + log::trace!("...execution finished: ran out of gas"); + return Err(ExecutionError::OutOfGas); + } + } + + Ok(()) } pub fn read_memory_into_slice<'slice, B>(&self, address: u32, buffer: &'slice mut B) -> Result<&'slice mut [u8], Trap> @@ -2406,6 +2457,17 @@ impl Instance { result } + /// Returns the current size of the program's heap. + pub fn heap_size(&self) -> u32 { + let mut mutable = match self.0.mutable.lock() { + Ok(mutable) => mutable, + Err(poison) => poison.into_inner(), + }; + + mutable.backend.access().heap_size() + } + + /// Returns the value of the given register. pub fn get_reg(&self, reg: Reg) -> RegValue { let mut mutable = match self.0.mutable.lock() { Ok(mutable) => mutable, @@ -2415,6 +2477,27 @@ impl Instance { mutable.backend.access().get_reg(reg) } + /// Extracts a return value from the argument registers according to the default ABI. + /// + /// This is equivalent to manually calling [`Instance::get_reg`]. + pub fn get_result_typed(&self) -> FnResult + where + FnResult: FuncResult, + { + let mut mutable = match self.0.mutable.lock() { + Ok(mutable) => mutable, + Err(poison) => poison.into_inner(), + }; + + let mut output_count = 0; + FnResult::_get(|| { + let access = mutable.backend.access(); + let value = access.get_reg(Reg::ARG_REGS[output_count]); + output_count += 1; + value + }) + } + /// Gets the amount of gas remaining, or `None` if gas metering is not enabled for this instance. /// /// Note that this being zero doesn't necessarily mean that the execution ran out of gas, @@ -2442,60 +2525,150 @@ impl Instance { } } -pub struct ExecutionConfig { - pub(crate) reset_memory_after_execution: bool, - pub(crate) clear_program_after_execution: bool, +/// The index of an exported function to be called. +#[derive(Copy, Clone, Debug)] +pub struct ExportIndex(usize); + +/// A helper struct used when calling into a function exported by the guest program. +pub struct CallArgs<'a, T> { pub(crate) initial_regs: [RegValue; Reg::ALL.len()], - pub(crate) gas: Option, + pub(crate) user_data: &'a mut T, + pub(crate) export_index: usize, + pub(crate) reset_memory_after_call: bool, } -impl Default for ExecutionConfig { - fn default() -> Self { +impl<'a, T> CallArgs<'a, T> { + /// Creates a new `CallArgs`. + pub fn new(user_data: &'a mut T, export_index: ExportIndex) -> Self { let mut initial_regs = [0; Reg::ALL.len()]; initial_regs[Reg::SP as usize] = VM_ADDR_USER_STACK_HIGH; initial_regs[Reg::RA as usize] = VM_ADDR_RETURN_TO_HOST; - ExecutionConfig { - reset_memory_after_execution: false, - clear_program_after_execution: false, + Self { initial_regs, - gas: None, + user_data, + export_index: export_index.0, + reset_memory_after_call: false, } } -} -impl ExecutionConfig { - pub fn set_reset_memory_after_execution(&mut self, value: bool) -> &mut Self { - self.reset_memory_after_execution = value; + /// Decides whether the memory of the instance will be reset after the call. + /// + /// Default: `false` + pub fn reset_memory_after_call(&mut self, value: bool) -> &mut Self { + self.reset_memory_after_call = value; self } - pub fn set_clear_program_after_execution(&mut self, value: bool) -> &mut Self { - self.clear_program_after_execution = value; + /// Sets a given register to the given value before the call. + /// + /// The default value for `SP` and `RA` is 0xffff0000, and for every other register it is zero. + pub fn reg(&mut self, reg: Reg, value: RegValue) -> &mut Self { + self.initial_regs[reg as usize] = value; self } - pub fn set_reg(&mut self, reg: Reg, value: RegValue) -> &mut Self { - self.initial_regs[reg as usize] = value; + /// Sets the argument registers to the given values. + /// + /// A shorthand for successively calling `set_reg` with `Reg::A0`, `Reg::A1`, ..., `Reg::A5`. + /// + /// Will panic if `args` has more than 6 elements. + pub fn args_untyped(&mut self, args: &[RegValue]) -> &mut Self { + self.initial_regs[Reg::A0 as usize..Reg::A0 as usize + args.len()].copy_from_slice(args); self } - pub fn set_gas(&mut self, gas: Gas) -> &mut Self { - self.gas = Some(gas); + /// Sets the argument registers to the given values according to the default ABI. + pub fn args_typed(&mut self, args: FnArgs) -> &mut Self + where + FnArgs: FuncArgs, + { + let mut input_count = 0; + args._set(|value| { + assert!(input_count <= Reg::MAXIMUM_INPUT_REGS); + self.initial_regs[Reg::A0 as usize + input_count] = value; + input_count += 1; + }); + self } } -pub struct Func { - instance: Instance, - export_index: usize, +pub struct StateArgs { + pub(crate) reset_memory: bool, + pub(crate) gas: Option, + pub(crate) sbrk: u32, } -impl Clone for Func { - fn clone(&self) -> Self { +impl Default for StateArgs { + fn default() -> Self { + Self::new() + } +} + +impl StateArgs { + /// Creates a new `StateArgs`. + pub fn new() -> Self { Self { - instance: self.instance.clone(), - export_index: self.export_index, + reset_memory: false, + gas: None, + sbrk: 0, + } + } + + /// Decides whether the memory of the instance will be reset. + /// + /// If the memory is already reset this does nothing. + /// + /// Default: `false` + pub fn reset_memory(&mut self, value: bool) -> &mut Self { + self.reset_memory = value; + self + } + + /// Sets the current remaining gas. + /// + /// Default: unset (the current value will not be changed) + pub fn set_gas(&mut self, gas: Gas) -> &mut Self { + self.gas = Some(gas); + self + } + + /// Increments the guest's heap by the given number of bytes. + /// + /// Has exactly the same semantics as the guest-side `sbrk` instruction. + /// + /// Default: 0 + pub fn sbrk(&mut self, bytes: u32) -> &mut Self { + self.sbrk = bytes; + self + } +} + +pub(crate) type HostcallHandler<'a> = &'a mut dyn for<'r> FnMut(u32, BackendAccess<'r>) -> Result<(), Trap>; + +pub(crate) struct ExecuteArgs<'a> { + pub(crate) entry_point: Option, + pub(crate) regs: Option<&'a [RegValue; Reg::ALL.len()]>, + pub(crate) gas: Option, + pub(crate) sbrk: u32, + pub(crate) flags: u32, + pub(crate) hostcall_handler: Option>, + pub(crate) module: Option<&'a Module>, + pub(crate) is_async: bool, +} + +impl<'a> ExecuteArgs<'a> { + pub(crate) fn new() -> Self { + ExecuteArgs { + entry_point: None, + regs: None, + gas: None, + sbrk: 0, + flags: 0, + hostcall_handler: None, + module: None, + is_async: false, } } } @@ -2541,180 +2714,3 @@ fn on_hostcall<'a, T>( Ok(()) } } - -impl Func { - /// Calls the function. - pub fn call(&self, user_data: &mut T, args: &[RegValue], return_value: &mut [RegValue]) -> Result<(), ExecutionError> { - self.call_ex(user_data, args, return_value, ExecutionConfig::default()) - } - - /// Calls the function with the given configuration. - pub fn call_ex( - &self, - user_data: &mut T, - args: &[RegValue], - return_value: &mut [RegValue], - mut config: ExecutionConfig, - ) -> Result<(), ExecutionError> { - let instance_pre = &self.instance.0.instance_pre; - let export = &instance_pre.0.module.0.exports[self.export_index]; - - config.initial_regs[Reg::A0 as usize..Reg::A0 as usize + args.len()].copy_from_slice(args); - - let mutable = &self.instance.0.mutable; - let mut mutable = match mutable.lock() { - Ok(mutable) => mutable, - Err(poison) => poison.into_inner(), - }; - - let mutable = &mut *mutable; - if let Some(ref mut tracer) = mutable.tracer() { - tracer.on_before_call(self.export_index, export, &config); - } - - let mut on_hostcall = on_hostcall( - user_data, - &instance_pre.0.host_functions, - &instance_pre.0.module.0.imports, - instance_pre.0.fallback_handler.as_ref(), - &mut mutable.raw, - ); - - log::trace!( - "Calling into {}... (gas limit = {:?})", - export.symbol(), - self.instance.0.instance_pre.0.module.0.gas_metering.and(config.gas) - ); - let result = mutable.backend.call(self.export_index, &mut on_hostcall, &config); - core::mem::drop(on_hostcall); - - if let Some(ref mut tracer) = mutable.tracer() { - tracer.on_after_call(); - } - - match result { - Ok(()) => { - log::trace!( - "...execution finished: success, leftover gas = {:?}", - mutable.backend.access().gas_remaining() - ); - } - Err(ExecutionError::Error(error)) => { - log::trace!("...execution finished: error: {error}"); - - return Err(ExecutionError::Error( - format!("failed to call function {}: {}", export.symbol(), error).into(), - )); - } - Err(ExecutionError::Trap(trap)) => { - log::trace!("...execution finished: trapped"); - return Err(ExecutionError::Trap(trap)); - } - Err(ExecutionError::OutOfGas) => { - log::trace!("...execution finished: ran out of gas"); - return Err(ExecutionError::OutOfGas); - } - } - - for (nth, return_value) in return_value.iter_mut().enumerate() { - *return_value = mutable.backend.access().get_reg(Reg::ARG_REGS[nth]); - } - - Ok(()) - } -} - -pub struct TypedFunc { - instance: Instance, - export_index: usize, - _phantom: PhantomData<(FnArgs, FnResult)>, -} - -impl TypedFunc -where - FnArgs: FuncArgs, - FnResult: FuncResult, -{ - /// Calls the function. - pub fn call(&self, user_data: &mut T, args: FnArgs) -> Result { - self.call_ex(user_data, args, ExecutionConfig::default()) - } - - /// Calls the function with the given configuration. - pub fn call_ex(&self, user_data: &mut T, args: FnArgs, mut config: ExecutionConfig) -> Result { - let instance_pre = &self.instance.0.instance_pre; - let export = &instance_pre.0.module.0.exports[self.export_index]; - - let mut input_count = 0; - args._set(|value| { - assert!(input_count <= Reg::MAXIMUM_INPUT_REGS); - config.initial_regs[Reg::A0 as usize + input_count] = value; - input_count += 1; - }); - - let mutable = &self.instance.0.mutable; - let mut mutable = match mutable.lock() { - Ok(mutable) => mutable, - Err(poison) => poison.into_inner(), - }; - - let mutable = &mut *mutable; - if let Some(ref mut tracer) = mutable.tracer() { - tracer.on_before_call(self.export_index, export, &config); - } - - let mut on_hostcall = on_hostcall( - user_data, - &instance_pre.0.host_functions, - &instance_pre.0.module.0.imports, - instance_pre.0.fallback_handler.as_ref(), - &mut mutable.raw, - ); - - log::trace!( - "Calling into {}... (gas limit = {:?})", - export.symbol(), - self.instance.0.instance_pre.0.module.0.gas_metering.and(config.gas) - ); - let result = mutable.backend.call(self.export_index, &mut on_hostcall, &config); - core::mem::drop(on_hostcall); - - if let Some(ref mut tracer) = mutable.tracer() { - tracer.on_after_call(); - } - - match result { - Ok(()) => { - log::trace!( - "...execution finished: success, leftover gas = {:?}", - mutable.backend.access().gas_remaining() - ); - } - Err(ExecutionError::Error(error)) => { - log::trace!("...execution finished: error: {error}"); - - return Err(ExecutionError::Error( - format!("failed to call function {}: {}", export.symbol(), error).into(), - )); - } - Err(ExecutionError::Trap(trap)) => { - log::trace!("...execution finished: trapped"); - return Err(ExecutionError::Trap(trap)); - } - Err(ExecutionError::OutOfGas) => { - log::trace!("...execution finished: ran out of gas"); - return Err(ExecutionError::OutOfGas); - } - } - - let mut output_count = 0; - let result = FnResult::_get(|| { - let access = mutable.backend.access(); - let value = access.get_reg(Reg::ARG_REGS[output_count]); - output_count += 1; - value - }); - - Ok(result) - } -} diff --git a/crates/polkavm/src/caller.rs b/crates/polkavm/src/caller.rs index 439f5cdd..498b91ff 100644 --- a/crates/polkavm/src/caller.rs +++ b/crates/polkavm/src/caller.rs @@ -125,6 +125,11 @@ impl CallerRaw { result } + unsafe fn sbrk(&mut self, size: u32) -> Option { + // SAFETY: The caller will make sure that the invariants hold. + unsafe { self.access_mut() }.sbrk(size) + } + unsafe fn gas_remaining(&self) -> Option { // SAFETY: The caller will make sure that the invariants hold. unsafe { self.access() }.gas_remaining() @@ -247,6 +252,11 @@ impl<'a, T> Caller<'a, T> { unsafe { self.raw.write_memory(address, data) } } + pub fn sbrk(&mut self, size: u32) -> Option { + // SAFETY: This can only be called from inside of `Caller::wrap` so this is always valid. + unsafe { self.raw.sbrk(size) } + } + pub fn gas_remaining(&self) -> Option { // SAFETY: This can only be called from inside of `Caller::wrap` so this is always valid. unsafe { self.raw.gas_remaining() } diff --git a/crates/polkavm/src/compiler.rs b/crates/polkavm/src/compiler.rs index c52b21bb..08ae3541 100644 --- a/crates/polkavm/src/compiler.rs +++ b/crates/polkavm/src/compiler.rs @@ -1,21 +1,19 @@ use std::borrow::Cow; use std::collections::HashMap; -use std::sync::Arc; use polkavm_assembler::{Assembler, Label}; -use polkavm_common::error::{ExecutionError, Trap}; -use polkavm_common::init::GuestProgramInit; use polkavm_common::program::{ProgramExport, Instruction}; use polkavm_common::zygote::{ AddressTable, VM_COMPILER_MAXIMUM_EPILOGUE_LENGTH, VM_COMPILER_MAXIMUM_INSTRUCTION_LENGTH, }; use polkavm_common::abi::VM_CODE_ADDRESS_ALIGNMENT; -use crate::api::{BackendAccess, EngineState, ExecutionConfig, Module, OnHostcall, SandboxExt, VisitorWrapper}; +use crate::api::VisitorWrapper; use crate::error::{bail, Error}; -use crate::sandbox::{Sandbox, SandboxProgram, SandboxProgramInit, ExecuteArgs}; +use crate::sandbox::{Sandbox, SandboxProgram, SandboxInit}; use crate::config::{GasMeteringKind, ModuleConfig, SandboxKind}; +use crate::utils::GuestInit; #[cfg(target_arch = "x86_64")] mod amd64; @@ -37,14 +35,16 @@ pub(crate) struct Compiler<'a> { trap_label: Label, trace_label: Label, jump_table_label: Label, + sbrk_label: Label, sandbox_kind: SandboxKind, gas_metering: Option, native_code_address: u64, address_table: AddressTable, vmctx_regs_offset: usize, vmctx_gas_offset: usize, + vmctx_heap_info_offset: usize, nth_instruction_to_code_offset_map: Vec, - init: GuestProgramInit<'a>, + init: GuestInit<'a>, is_last_instruction: bool, } @@ -54,7 +54,7 @@ struct CompilationResult<'a> { export_trampolines: Vec, sysreturn_address: u64, nth_instruction_to_code_offset_map: Vec, - init: GuestProgramInit<'a>, + init: GuestInit<'a>, } impl<'a> Compiler<'a> { @@ -68,17 +68,19 @@ impl<'a> Compiler<'a> { address_table: AddressTable, vmctx_regs_offset: usize, vmctx_gas_offset: usize, + vmctx_heap_info_offset: usize, debug_trace_execution: bool, native_code_address: u64, instruction_count: usize, basic_block_count: usize, - init: GuestProgramInit<'a>, + init: GuestInit<'a>, ) -> Self { let mut asm = Assembler::new(); let ecall_label = asm.forward_declare_label(); let trap_label = asm.forward_declare_label(); let trace_label = asm.forward_declare_label(); let jump_table_label = asm.forward_declare_label(); + let sbrk_label = asm.forward_declare_label(); let nth_basic_block_to_label = Vec::with_capacity(basic_block_count); let mut nth_basic_block_to_machine_code_offset = Vec::new(); @@ -113,6 +115,7 @@ impl<'a> Compiler<'a> { trap_label, trace_label, jump_table_label, + sbrk_label, sandbox_kind, gas_metering: config.gas_metering, native_code_address, @@ -120,6 +123,7 @@ impl<'a> Compiler<'a> { address_table, vmctx_regs_offset, vmctx_gas_offset, + vmctx_heap_info_offset, nth_instruction_to_code_offset_map, init, is_last_instruction: instruction_count == 0, @@ -150,6 +154,7 @@ impl<'a> Compiler<'a> { self.emit_trap_trampoline(); self.emit_ecall_trampoline(); + self.emit_sbrk_trampoline(); self.emit_export_trampolines(); let label_sysreturn = self.emit_sysreturn(); @@ -337,12 +342,15 @@ impl crate::api::BackendModule for CompiledModule where S: Sandbox { exports: &'a [ProgramExport], basic_block_by_jump_table_index: &'a [u32], jump_table_index_by_basic_block: &'a [u32], - init: GuestProgramInit<'a>, + init: GuestInit<'a>, instruction_count: usize, basic_block_count: usize, debug_trace_execution: bool, ) -> Result<(Self::BackendVisitor<'a>, Self::Aux), Error> { - crate::sandbox::assert_native_page_size(); + let native_page_size = crate::sandbox::get_native_page_size(); + if native_page_size > config.page_size as usize || config.page_size as usize % native_page_size != 0 { + return Err(format!("configured page size of {} is incompatible with the native page size of {}", config.page_size, native_page_size).into()); + } let address_space = S::reserve_address_space().map_err(Error::from_display)?; let native_code_address = crate::sandbox::SandboxAddressSpace::native_code_address(&address_space); @@ -355,6 +363,7 @@ impl crate::api::BackendModule for CompiledModule where S: Sandbox { S::address_table(), S::vmctx_regs_offset(), S::vmctx_gas_offset(), + S::vmctx_heap_info_offset(), debug_trace_execution, native_code_address, instruction_count, @@ -366,15 +375,16 @@ impl crate::api::BackendModule for CompiledModule where S: Sandbox { } fn finish_compilation<'a>(wrapper: VisitorWrapper<'a, Self::BackendVisitor<'a>>, address_space: Self::Aux) -> Result<(crate::api::Common<'a>, Self), Error> { - let gas_metering = wrapper.visitor.gas_metering; let result = wrapper.visitor.finalize(&wrapper.common.gas_cost_for_basic_block)?; - let init = SandboxProgramInit::new(result.init) - .with_code(&result.code) - .with_jump_table(&result.jump_table) - .with_sysreturn_address(result.sysreturn_address); + let init = SandboxInit { + guest_init: result.init, + code: &result.code, + jump_table: &result.jump_table, + sysreturn_address: result.sysreturn_address + }; - let sandbox_program = S::prepare_program(init, address_space, gas_metering).map_err(Error::from_display)?; + let sandbox_program = S::prepare_program(init, address_space).map_err(Error::from_display)?; let export_trampolines = result.export_trampolines; let module = CompiledModule { @@ -388,8 +398,8 @@ impl crate::api::BackendModule for CompiledModule where S: Sandbox { } pub(crate) struct CompiledModule where S: Sandbox { - sandbox_program: S::Program, - export_trampolines: Vec, + pub(crate) sandbox_program: S::Program, + pub(crate) export_trampolines: Vec, nth_instruction_to_code_offset_map: Vec, } @@ -402,95 +412,3 @@ impl CompiledModule where S: Sandbox { &self.nth_instruction_to_code_offset_map } } - -pub(crate) struct CompiledInstance where S: SandboxExt { - engine_state: Arc, - module: Module, - sandbox: Option, -} - -impl CompiledInstance where S: SandboxExt { - pub fn new(engine_state: Arc, module: Module) -> Result, Error> { - let mut args = ExecuteArgs::new(); - args.set_program(&S::as_compiled_module(&module).sandbox_program); - - let mut sandbox = S::reuse_or_spawn_sandbox(&engine_state, &module)?; - sandbox - .execute(args) - .map_err(Error::from_display) - .map_err(|error| error.context("instantiation failed: failed to upload the program into the sandbox"))?; - - Ok(CompiledInstance { engine_state, module, sandbox: Some(sandbox) }) - } - - pub fn call(&mut self, export_index: usize, on_hostcall: OnHostcall, config: &ExecutionConfig) -> Result<(), ExecutionError> { - let address = S::as_compiled_module(&self.module).export_trampolines[export_index]; - let mut exec_args = ExecuteArgs::::new(); - - if config.reset_memory_after_execution { - exec_args.set_reset_memory_after_execution(); - } - - if config.clear_program_after_execution { - exec_args.set_clear_program_after_execution(); - } - - exec_args.set_call(address); - exec_args.set_initial_regs(&config.initial_regs); - if self.module.gas_metering().is_some() { - if let Some(gas) = config.gas { - exec_args.set_gas(gas); - } - } - - fn wrap_on_hostcall(on_hostcall: OnHostcall<'_>) -> impl for <'r> FnMut(u32, S::Access<'r>) -> Result<(), Trap> + '_ where S: Sandbox { - move |hostcall, access| { - let access: BackendAccess = access.into(); - on_hostcall(hostcall, access) - } - } - - let mut on_hostcall = wrap_on_hostcall::(on_hostcall); - exec_args.set_on_hostcall(&mut on_hostcall); - - let sandbox = self.sandbox.as_mut().unwrap(); - let result = match sandbox.execute(exec_args) { - Ok(()) => Ok(()), - Err(ExecutionError::Trap(trap)) => Err(ExecutionError::Trap(trap)), - Err(ExecutionError::Error(error)) => return Err(ExecutionError::Error(Error::from_display(error))), - Err(ExecutionError::OutOfGas) => return Err(ExecutionError::OutOfGas), - }; - - if self.module.gas_metering().is_some() && sandbox.gas_remaining_impl().is_err() { - return Err(ExecutionError::OutOfGas); - } - - result - } - - pub fn access(&'_ mut self) -> S::Access<'_> { - self.sandbox.as_mut().unwrap().access() - } - - pub fn sandbox(&self) -> &S { - self.sandbox.as_ref().unwrap() - } -} - -impl Drop for CompiledInstance where S: SandboxExt { - fn drop(&mut self) { - S::recycle_sandbox(&self.engine_state, || { - let mut sandbox = self.sandbox.take()?; - let mut exec_args = ExecuteArgs::::new(); - exec_args.set_clear_program_after_execution(); - exec_args.set_gas(polkavm_common::utils::Gas::MIN); - exec_args.set_async(true); - if let Err(error) = sandbox.execute(exec_args) { - log::warn!("Failed to cache a sandbox worker process due to an error: {error}"); - None - } else { - Some(sandbox) - } - }) - } -} diff --git a/crates/polkavm/src/compiler/amd64.rs b/crates/polkavm/src/compiler/amd64.rs index 86cb2ea7..e00efd43 100644 --- a/crates/polkavm/src/compiler/amd64.rs +++ b/crates/polkavm/src/compiler/amd64.rs @@ -358,16 +358,26 @@ impl<'a> Compiler<'a> { #[cfg_attr(not(debug_assertions), inline(always))] fn cmov(&mut self, d: Reg, s: Reg, c: Reg, condition: Condition) { - if d != c && d != s { - self.clear_reg(d); - self.push(test((self.reg_size(), conv_reg(c), conv_reg(c)))); - self.push(cmov(condition, self.reg_size(), conv_reg(d), conv_reg(s))); - } else { - self.push(xor((RegSize::R32, TMP_REG, TMP_REG))); - self.push(test((self.reg_size(), conv_reg(c), conv_reg(c)))); - self.push(cmov(condition, self.reg_size(), TMP_REG, conv_reg(s))); - self.push(mov(self.reg_size(), conv_reg(d), TMP_REG)) + if d == s { + return; } + + let d = conv_reg(d); + let s = conv_reg(s); + let c = conv_reg(c); + + self.push(test((self.reg_size(), c, c))); + self.push(cmov(condition, self.reg_size(), d, s)); + } + + #[cfg_attr(not(debug_assertions), inline(always))] + fn cmov_imm(&mut self, d: Reg, s: u32, c: Reg, condition: Condition) { + let d = conv_reg(d); + let c = conv_reg(c); + + self.push(test((self.reg_size(), c, c))); + self.push(mov_imm(TMP_REG, imm32(s))); + self.push(cmov(condition, self.reg_size(), d, TMP_REG)); } fn div_rem(&mut self, d: Reg, s1: Reg, s2: Reg, div_rem: DivRem, kind: Signedness) { @@ -565,6 +575,21 @@ impl<'a> Compiler<'a> { self.push(jmp(TMP_REG)); } + pub(crate) fn emit_sbrk_trampoline(&mut self) { + log::trace!("Emitting trampoline: sbrk"); + self.define_label(self.sbrk_label); + + self.push(push(TMP_REG)); + self.save_registers_to_vmctx(); + self.push(mov_imm64(TMP_REG, self.address_table.syscall_sbrk)); + self.push(pop(rdi)); + self.push(call(TMP_REG)); + self.push(push(rax)); + self.restore_registers_from_vmctx(); + self.push(pop(TMP_REG)); + self.push(ret()); + } + #[cold] pub(crate) fn trace_execution(&mut self, nth_instruction: usize) { self.push(mov_imm(TMP_REG, imm32(nth_instruction as u32))); @@ -654,6 +679,43 @@ impl<'a> InstructionVisitor for VisitorWrapper<'a, Compiler<'a>> { self.start_new_basic_block(); } + #[inline(always)] + fn sbrk(&mut self, dst: Reg, size: Reg) -> Self::ReturnTy { + let label_bump_only = self.asm.forward_declare_label(); + let label_continue = self.asm.forward_declare_label(); + let sbrk_label = self.sbrk_label; + + let dst = conv_reg(dst); + let size = conv_reg(size); + if dst != size { + self.push(mov(RegSize::R32, dst, size)); + } + + let offset = self.vmctx_heap_info_offset; + let heap_info_base = self.load_vmctx_field_address(offset); + + // Calculate new top-of-the-heap pointer. + self.push(add((RegSize::R64, dst, reg_indirect(RegSize::R64, heap_info_base)))); + // Compare it to the current threshold. + self.push(cmp((RegSize::R64, dst, reg_indirect(RegSize::R64, heap_info_base + 8)))); + // If it was less or equal to the threshold then no extra action is necessary (bump only!). + self.push(jcc_label8(Condition::BelowOrEqual, label_bump_only)); + + // The new top-of-the-heap pointer crossed the threshold, so more involved handling is necessary. + // We'll either allocate new memory, or return a null pointer. + self.push(mov(RegSize::R64, TMP_REG, dst)); + self.push(call_label32(sbrk_label)); + self.push(mov(RegSize::R32, dst, TMP_REG)); + // Note: `dst` can be zero here, which is why we do the pointer bump from within the handler. + self.push(jmp_label8(label_continue)); + + self.define_label(label_bump_only); + // Only a bump was necessary, so just updated the pointer and continue. + self.push(store(RegSize::R64, reg_indirect(RegSize::R64, heap_info_base), dst)); + + self.define_label(label_continue); + } + #[inline(always)] fn ecalli(&mut self, imm: u32) -> Self::ReturnTy { let ecall_label = self.ecall_label; @@ -1011,6 +1073,16 @@ impl<'a> InstructionVisitor for VisitorWrapper<'a, Compiler<'a>> { self.cmov(d, s, c, Condition::NotEqual); } + #[inline(always)] + fn cmov_if_zero_imm(&mut self, d: Reg, c: Reg, s: u32) -> Self::ReturnTy { + self.cmov_imm(d, s, c, Condition::Equal); + } + + #[inline(always)] + fn cmov_if_not_zero_imm(&mut self, d: Reg, c: Reg, s: u32) -> Self::ReturnTy { + self.cmov_imm(d, s, c, Condition::NotEqual); + } + #[inline(always)] fn add_imm(&mut self, d: Reg, s1: Reg, s2: u32) -> Self::ReturnTy { let reg_size = self.reg_size(); diff --git a/crates/polkavm/src/config.rs b/crates/polkavm/src/config.rs index 95c0c4ac..a2e11400 100644 --- a/crates/polkavm/src/config.rs +++ b/crates/polkavm/src/config.rs @@ -252,6 +252,7 @@ pub enum GasMeteringKind { /// The configuration for a module. #[derive(Clone)] pub struct ModuleConfig { + pub(crate) page_size: u32, pub(crate) gas_metering: Option, } @@ -264,7 +265,18 @@ impl Default for ModuleConfig { impl ModuleConfig { /// Creates a new default module configuration. pub fn new() -> Self { - ModuleConfig { gas_metering: None } + ModuleConfig { + page_size: 0x4000, + gas_metering: None, + } + } + + /// Sets the page size used for the module. + /// + /// Default: `16384` (16k) + pub fn set_page_size(&mut self, page_size: u32) -> &mut Self { + self.page_size = page_size; + self } /// Sets the type of gas metering to enable for this module. diff --git a/crates/polkavm/src/error.rs b/crates/polkavm/src/error.rs index 084ae1ac..f5a8c5f3 100644 --- a/crates/polkavm/src/error.rs +++ b/crates/polkavm/src/error.rs @@ -51,6 +51,18 @@ impl Error { Error(ErrorKind::Static(message)) } + #[cold] + pub(crate) fn from_execution_error(error: ExecutionError) -> Self + where + E: core::fmt::Display, + { + match error { + ExecutionError::Error(error) => Error::from_display(error), + ExecutionError::Trap(_) => Error::from_display("unexpected trap"), + ExecutionError::OutOfGas => Error::from_display("unexpected out-of-gas"), + } + } + #[cold] pub(crate) fn context(self, message: impl core::fmt::Display) -> Self { let string = match self.0 { diff --git a/crates/polkavm/src/interpreter.rs b/crates/polkavm/src/interpreter.rs index 90989b67..3f2db24d 100644 --- a/crates/polkavm/src/interpreter.rs +++ b/crates/polkavm/src/interpreter.rs @@ -1,13 +1,16 @@ -use crate::api::{BackendAccess, ExecutionConfig, MemoryAccessError, Module, OnHostcall}; -use crate::error::{bail, Error}; +use crate::api::{BackendAccess, ExecuteArgs, HostcallHandler, MemoryAccessError, Module}; +use crate::error::Error; +use crate::utils::GuestInit; use crate::utils::RegImm; use core::mem::MaybeUninit; use polkavm_common::abi::{VM_ADDR_RETURN_TO_HOST, VM_CODE_ADDRESS_ALIGNMENT}; use polkavm_common::error::Trap; -use polkavm_common::init::GuestProgramInit; use polkavm_common::operation::*; use polkavm_common::program::{Instruction, InstructionVisitor, Reg}; -use polkavm_common::utils::{byte_slice_init, Access, AsUninitSliceMut, Gas}; +use polkavm_common::utils::{align_to_next_page_usize, byte_slice_init, Access, AsUninitSliceMut, Gas}; +use polkavm_common::{ + VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION, VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION, VM_RPC_FLAG_RESET_MEMORY_BEFORE_EXECUTION, +}; type ExecutionError = polkavm_common::error::ExecutionError; @@ -19,15 +22,15 @@ pub(crate) struct InterpretedModule { } impl InterpretedModule { - pub fn new(init: GuestProgramInit, gas_cost_for_basic_block: Vec, instructions: Vec) -> Result { - let memory_config = init.memory_config().map_err(Error::from_static_str)?; - let mut ro_data: Vec<_> = init.ro_data().into(); - ro_data.resize(memory_config.ro_data_size() as usize, 0); + pub fn new(init: GuestInit, gas_cost_for_basic_block: Vec, instructions: Vec) -> Result { + let memory_map = init.memory_map().map_err(Error::from_static_str)?; + let mut ro_data: Vec<_> = init.ro_data.into(); + ro_data.resize(memory_map.ro_data_size() as usize, 0); Ok(InterpretedModule { instructions, ro_data, - rw_data: init.rw_data().into(), + rw_data: init.rw_data.into(), gas_cost_for_basic_block, }) } @@ -38,13 +41,13 @@ pub(crate) type OnStore<'a> = &'a mut dyn for<'r> FnMut(u32, &'r [u8]) -> Result #[derive(Default)] pub(crate) struct InterpreterContext<'a> { - on_hostcall: Option>, + on_hostcall: Option>, on_set_reg: Option>, on_store: Option>, } impl<'a> InterpreterContext<'a> { - pub fn set_on_hostcall(&mut self, on_hostcall: OnHostcall<'a>) { + pub fn set_on_hostcall(&mut self, on_hostcall: HostcallHandler<'a>) { self.on_hostcall = Some(on_hostcall); } @@ -58,8 +61,8 @@ impl<'a> InterpreterContext<'a> { } pub(crate) struct InterpretedInstance { - module: Module, - heap: Vec, + module: Option, + rw_data: Vec, stack: Vec, regs: [u32; Reg::ALL.len()], nth_instruction: u32, @@ -68,24 +71,16 @@ pub(crate) struct InterpretedInstance { cycle_counter: u64, gas_remaining: Option, in_new_execution: bool, + is_memory_dirty: bool, + heap_size: u32, } impl InterpretedInstance { - pub fn new(module: Module) -> Result { - if module.interpreted_module().is_none() { - bail!("an interpreter cannot be created from the given module") - } - - let mut heap = Vec::new(); - let mut stack = Vec::new(); - - heap.reserve_exact(module.memory_config().heap_size() as usize); - stack.reserve_exact(module.memory_config().stack_size() as usize); - - let mut interpreter = Self { - heap, - stack, - module, + pub fn new() -> Self { + Self { + module: None, + rw_data: Vec::new(), + stack: Vec::new(), regs: [0; Reg::ALL.len()], nth_instruction: VM_ADDR_RETURN_TO_HOST, nth_basic_block: 0, @@ -93,26 +88,31 @@ impl InterpretedInstance { cycle_counter: 0, gas_remaining: None, in_new_execution: false, - }; - - if interpreter.module.gas_metering().is_some() { - interpreter.gas_remaining = Some(0); + is_memory_dirty: false, + heap_size: 0, } + } - interpreter.reset_memory(); - Ok(interpreter) + pub fn new_from_module(module: &Module) -> Result { + let mut instance = InterpretedInstance::new(); + let mut args = ExecuteArgs::new(); + args.module = Some(module); + instance.execute(args).map_err(Error::from_execution_error)?; + + Ok(instance) } - pub fn call(&mut self, export_index: usize, on_hostcall: OnHostcall, config: &ExecutionConfig) -> Result<(), ExecutionError> { - let mut ctx = InterpreterContext::default(); - ctx.set_on_hostcall(on_hostcall); - self.prepare_for_call(export_index, config); + pub fn execute(&mut self, mut args: ExecuteArgs) -> Result<(), ExecutionError> { + self.prepare_for_execution(&args); - let result = self.run(ctx); - if config.reset_memory_after_execution { - self.reset_memory(); + let mut ctx = InterpreterContext::default(); + if let Some(hostcall_handler) = args.hostcall_handler.take() { + ctx.set_on_hostcall(hostcall_handler); } + let result = if args.entry_point.is_some() { self.run(ctx) } else { Ok(()) }; + + self.finish_execution(args.flags); result } @@ -125,21 +125,22 @@ impl InterpretedInstance { }) } + self.is_memory_dirty = true; + if self.in_new_execution { self.in_new_execution = false; translate_error(self.on_start_new_basic_block())?; } + let Some(module) = self.module.as_ref() else { + return Err(ExecutionError::Error(Error::from_static_str("no module loaded"))); + }; + + let module = module.clone(); let mut visitor = Visitor { inner: self, ctx }; loop { visitor.inner.cycle_counter += 1; - let Some(instruction) = visitor - .inner - .module - .instructions() - .get(visitor.inner.nth_instruction as usize) - .copied() - else { + let Some(instruction) = module.instructions().get(visitor.inner.nth_instruction as usize).copied() else { return Err(ExecutionError::Trap(Default::default())); }; @@ -152,57 +153,139 @@ impl InterpretedInstance { Ok(()) } + pub fn step_once(&mut self, ctx: InterpreterContext) -> Result<(), ExecutionError> { + if self.in_new_execution { + self.in_new_execution = false; + self.on_start_new_basic_block()?; + } + + self.cycle_counter += 1; + let module = self.module.as_ref().expect("no module loaded"); + let Some(instruction) = module.instructions().get(self.nth_instruction as usize).copied() else { + return Err(ExecutionError::Trap(Default::default())); + }; + + let mut visitor = Visitor { inner: self, ctx }; + instruction.visit(&mut visitor) + } + + fn reset_instance(&mut self) { + self.rw_data.clear(); + self.stack.clear(); + + *self = Self { + rw_data: core::mem::take(&mut self.rw_data), + stack: core::mem::take(&mut self.stack), + ..Self::new() + }; + } + pub fn reset_memory(&mut self) { - let interpreted_module = self.module.interpreted_module().unwrap(); - self.heap.clear(); - self.heap.extend_from_slice(&interpreted_module.rw_data); - self.heap.resize(self.module.memory_config().heap_size() as usize, 0); + if self.is_memory_dirty { + self.force_reset_memory(); + } + } + + fn force_reset_memory(&mut self) { + self.rw_data.clear(); self.stack.clear(); - self.stack.resize(self.module.memory_config().stack_size() as usize, 0); + self.heap_size = 0; + self.is_memory_dirty = false; + + if let Some(module) = self.module.as_ref() { + let interpreted_module = module.interpreted_module().unwrap(); + self.rw_data.extend_from_slice(&interpreted_module.rw_data); + self.rw_data.resize(module.memory_map().rw_data_size() as usize, 0); + self.stack.resize(module.memory_map().stack_size() as usize, 0); + } } - pub fn prepare_for_call(&mut self, export_index: usize, config: &ExecutionConfig) { - // TODO: If this function becomes public then this needs to return an error. - let nth_basic_block = self - .module - .get_export(export_index) - .expect("internal error: invalid export index") - .jump_target(); + pub fn sbrk(&mut self, size: u32) -> Option { + let module = self.module.as_ref()?; + let new_heap_size = self.heap_size.checked_add(size)?; + let memory_map = module.memory_map(); + if new_heap_size > memory_map.max_heap_size() { + return None; + } - let nth_instruction = self - .module - .instruction_by_basic_block(nth_basic_block) - .expect("internal error: invalid export address"); + log::trace!("sbrk: +{} (heap size: {} -> {})", size, self.heap_size, new_heap_size); - self.return_to_host = false; - self.regs.copy_from_slice(&config.initial_regs); - self.nth_instruction = nth_instruction; - self.nth_basic_block = nth_basic_block; - if self.module.gas_metering().is_some() { - if let Some(gas) = config.gas { + self.heap_size = new_heap_size; + let heap_top = memory_map.heap_base() + new_heap_size; + if heap_top as usize > memory_map.rw_data_address() as usize + self.rw_data.len() { + let new_size = align_to_next_page_usize(memory_map.page_size() as usize, heap_top as usize).unwrap() + - memory_map.rw_data_address() as usize; + log::trace!("sbrk: growing memory: {} -> {}", self.rw_data.len(), new_size); + self.rw_data.resize(new_size, 0); + } + + Some(heap_top) + } + + pub fn prepare_for_execution(&mut self, args: &ExecuteArgs) { + if let Some(module) = args.module { + if module.interpreted_module().is_none() { + panic!("internal_error: an interpreter cannot be created from the given module"); + } + + self.reset_instance(); + self.module = Some(module.clone()); + if module.gas_metering().is_some() { + self.gas_remaining = Some(0); + } + + self.force_reset_memory(); + } + + if let Some(regs) = args.regs { + self.regs.copy_from_slice(regs); + } + + if self.module.as_ref().and_then(|module| module.gas_metering()).is_some() { + if let Some(gas) = args.gas { self.gas_remaining = Some(gas.get() as i64); } } else { self.gas_remaining = None; } - self.in_new_execution = true; - } + if let Some(entry_point) = args.entry_point { + let module = self + .module + .as_ref() + .expect("internal error: tried to call into an instance without a loaded module"); - pub fn step_once(&mut self, ctx: InterpreterContext) -> Result<(), ExecutionError> { - if self.in_new_execution { - self.in_new_execution = false; - self.on_start_new_basic_block()?; + let nth_basic_block = module + .get_export(entry_point) + .expect("internal error: invalid export index") + .jump_target(); + + let nth_instruction = module + .instruction_by_basic_block(nth_basic_block) + .expect("internal error: invalid export address"); + + self.nth_instruction = nth_instruction; + self.nth_basic_block = nth_basic_block; } - self.cycle_counter += 1; - let Some(instruction) = self.module.instructions().get(self.nth_instruction as usize).copied() else { - return Err(ExecutionError::Trap(Default::default())); - }; + if args.flags & VM_RPC_FLAG_RESET_MEMORY_BEFORE_EXECUTION != 0 { + self.reset_memory(); + } - let mut visitor = Visitor { inner: self, ctx }; + if args.sbrk > 0 { + self.sbrk(args.sbrk).expect("internal error: sbrk failed"); + } - instruction.visit(&mut visitor) + self.return_to_host = false; + self.in_new_execution = true; + } + + pub fn finish_execution(&mut self, flags: u32) { + if flags & VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION != 0 { + self.reset_instance(); + } else if flags & VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION != 0 { + self.reset_memory(); + } } pub fn access(&mut self) -> InterpretedAccess { @@ -210,39 +293,40 @@ impl InterpretedInstance { } fn get_memory_slice(&self, address: u32, length: u32) -> Option<&[u8]> { - let memory_config = self.module.memory_config(); - let (range, memory) = if memory_config.ro_data_range().contains(&address) { - let module = self.module.interpreted_module().unwrap(); - (memory_config.ro_data_range(), &module.ro_data) - } else if memory_config.heap_range().contains(&address) { - (memory_config.heap_range(), &self.heap) - } else if memory_config.stack_range().contains(&address) { - (memory_config.stack_range(), &self.stack) + let module = self.module.as_ref()?; + let memory_map = module.memory_map(); + let (start, memory_slice) = if address >= memory_map.stack_address_low() { + (memory_map.stack_address_low(), &self.stack) + } else if address >= memory_map.rw_data_address() { + (memory_map.rw_data_address(), &self.rw_data) + } else if address >= memory_map.ro_data_address() { + let module = module.interpreted_module().unwrap(); + (memory_map.ro_data_address(), &module.ro_data) } else { return None; }; - let offset = address - range.start; - memory.get(offset as usize..offset as usize + length as usize) + let offset = address - start; + memory_slice.get(offset as usize..offset as usize + length as usize) } fn get_memory_slice_mut(&mut self, address: u32, length: u32) -> Option<&mut [u8]> { - let memory_config = self.module.memory_config(); - let (range, memory_slice) = if memory_config.heap_range().contains(&address) { - (memory_config.heap_range(), &mut self.heap) - } else if memory_config.stack_range().contains(&address) { - (memory_config.stack_range(), &mut self.stack) + let memory_map = self.module.as_ref()?.memory_map(); + let (start, memory_slice) = if address >= memory_map.stack_address_low() { + (memory_map.stack_address_low(), &mut self.stack) + } else if address >= memory_map.rw_data_address() { + (memory_map.rw_data_address(), &mut self.rw_data) } else { return None; }; - let offset = (address - range.start) as usize; + let offset = (address - start) as usize; memory_slice.get_mut(offset..offset + length as usize) } fn on_start_new_basic_block(&mut self) -> Result<(), ExecutionError> { if let Some(ref mut gas_remaining) = self.gas_remaining { - let module = self.module.interpreted_module().unwrap(); + let module = self.module.as_ref().unwrap().interpreted_module().unwrap(); let gas_cost = i64::from(module.gas_cost_for_basic_block[self.nth_basic_block as usize]); log::trace!( @@ -305,6 +389,8 @@ impl<'a> Access<'a> for InterpretedAccess<'a> { } fn write_memory(&mut self, address: u32, data: &[u8]) -> Result<(), Self::Error> { + self.instance.is_memory_dirty = true; + let Some(slice) = self.instance.get_memory_slice_mut(address, data.len() as u32) else { return Err(MemoryAccessError { address, @@ -317,6 +403,14 @@ impl<'a> Access<'a> for InterpretedAccess<'a> { Ok(()) } + fn sbrk(&mut self, size: u32) -> Option { + self.instance.sbrk(size) + } + + fn heap_size(&self) -> u32 { + self.instance.heap_size + } + fn program_counter(&self) -> Option { Some(self.instance.nth_instruction) } @@ -392,6 +486,8 @@ impl<'a, 'b> Visitor<'a, 'b> { self.inner.nth_instruction = self .inner .module + .as_ref() + .unwrap() .instruction_by_basic_block(target) .expect("internal error: couldn't fetch the instruction index for a branch"); self.inner.nth_basic_block = target; @@ -414,8 +510,11 @@ impl<'a, 'b> Visitor<'a, 'b> { pc = self.inner.nth_instruction, cycle = self.inner.cycle_counter ); + self.inner .module + .as_ref() + .unwrap() .debug_print_location(log::Level::Debug, self.inner.nth_instruction); return Err(ExecutionError::Trap(Default::default())); }; @@ -430,6 +529,7 @@ impl<'a, 'b> Visitor<'a, 'b> { fn store(&mut self, src: impl Into, base: Option, offset: u32) -> Result<(), ExecutionError> { assert!(core::mem::size_of::() >= 1); + self.inner.is_memory_dirty = true; let address = base.map_or(0, |base| self.inner.regs[base as usize]).wrapping_add(offset); let value = match src.into() { @@ -453,6 +553,8 @@ impl<'a, 'b> Visitor<'a, 'b> { ); self.inner .module + .as_ref() + .unwrap() .debug_print_location(log::Level::Debug, self.inner.nth_instruction); return Err(ExecutionError::Trap(Default::default())); }; @@ -471,6 +573,8 @@ impl<'a, 'b> Visitor<'a, 'b> { fn get_return_address(&self) -> u32 { self.inner .module + .as_ref() + .unwrap() .jump_table_index_by_basic_block(self.inner.nth_basic_block + 1) .expect("internal error: couldn't fetch the jump table index for the return basic block") * VM_CODE_ADDRESS_ALIGNMENT @@ -509,6 +613,8 @@ impl<'a, 'b> Visitor<'a, 'b> { let Some(nth_basic_block) = self .inner .module + .as_ref() + .unwrap() .basic_block_by_jump_table_index(target / VM_CODE_ADDRESS_ALIGNMENT) else { return Err(ExecutionError::Trap(Default::default())); @@ -517,6 +623,8 @@ impl<'a, 'b> Visitor<'a, 'b> { let nth_instruction = self .inner .module + .as_ref() + .unwrap() .instruction_by_basic_block(nth_basic_block) .expect("internal error: couldn't fetch the instruction index for a dynamic jump"); @@ -605,6 +713,14 @@ impl<'a, 'b> InstructionVisitor for Visitor<'a, 'b> { self.inner.on_start_new_basic_block() } + fn sbrk(&mut self, dst: Reg, size: Reg) -> Self::ReturnTy { + let size = self.get(size); + let result = self.inner.sbrk(size).unwrap_or(0); + self.set(dst, result)?; + self.inner.nth_instruction += 1; + Ok(()) + } + fn ecalli(&mut self, imm: u32) -> Self::ReturnTy { if let Some(on_hostcall) = self.ctx.on_hostcall.as_mut() { let access = BackendAccess::Interpreted(self.inner.access()); @@ -772,11 +888,41 @@ impl<'a, 'b> InstructionVisitor for Visitor<'a, 'b> { } fn cmov_if_zero(&mut self, d: Reg, s: Reg, c: Reg) -> Self::ReturnTy { - self.set3(d, s, c, |s, c| if c == 0 { s } else { 0 }) + if self.get(c) == 0 { + let value = self.get(s); + self.set(d, value)?; + } + + self.inner.nth_instruction += 1; + Ok(()) + } + + fn cmov_if_zero_imm(&mut self, d: Reg, c: Reg, s: u32) -> Self::ReturnTy { + if self.get(c) == 0 { + self.set(d, s)?; + } + + self.inner.nth_instruction += 1; + Ok(()) } fn cmov_if_not_zero(&mut self, d: Reg, s: Reg, c: Reg) -> Self::ReturnTy { - self.set3(d, s, c, |s, c| if c != 0 { s } else { 0 }) + if self.get(c) != 0 { + let value = self.get(s); + self.set(d, value)?; + } + + self.inner.nth_instruction += 1; + Ok(()) + } + + fn cmov_if_not_zero_imm(&mut self, d: Reg, c: Reg, s: u32) -> Self::ReturnTy { + if self.get(c) != 0 { + self.set(d, s)?; + } + + self.inner.nth_instruction += 1; + Ok(()) } fn add_imm(&mut self, d: Reg, s1: Reg, s2: u32) -> Self::ReturnTy { @@ -939,6 +1085,8 @@ impl<'a, 'b> InstructionVisitor for Visitor<'a, 'b> { let nth_instruction = self .inner .module + .as_ref() + .unwrap() .instruction_by_basic_block(target) .expect("internal error: couldn't fetch the instruction index for a jump"); diff --git a/crates/polkavm/src/lib.rs b/crates/polkavm/src/lib.rs index 74244506..0366e9d1 100644 --- a/crates/polkavm/src/lib.rs +++ b/crates/polkavm/src/lib.rs @@ -55,12 +55,13 @@ if_compiler_is_supported! { } pub use polkavm_common::{ + abi::MemoryMap, error::{ExecutionError, Trap}, program::{ProgramBlob, ProgramParseError, Reg}, utils::{AsUninitSliceMut, Gas}, }; -pub use crate::api::{Engine, ExecutionConfig, Func, Instance, InstancePre, Linker, Module, TypedFunc}; +pub use crate::api::{CallArgs, Engine, ExportIndex, Instance, InstancePre, Linker, Module, StateArgs}; pub use crate::caller::{Caller, CallerRef}; pub use crate::config::{BackendKind, Config, GasMeteringKind, ModuleConfig, SandboxKind}; pub use crate::error::Error; diff --git a/crates/polkavm/src/sandbox.rs b/crates/polkavm/src/sandbox.rs index 33ca1985..3a999492 100644 --- a/crates/polkavm/src/sandbox.rs +++ b/crates/polkavm/src/sandbox.rs @@ -1,21 +1,21 @@ use std::borrow::Cow; +use std::sync::{Arc, Mutex}; +use core::sync::atomic::{AtomicUsize, Ordering}; use polkavm_common::{ - abi::VM_PAGE_SIZE, - error::{ExecutionError, Trap}, - init::GuestProgramInit, - program::Reg, + error::ExecutionError, zygote::{ AddressTable, SandboxMemoryConfig, - VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION, - VM_RPC_FLAG_RECONFIGURE, VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION, }, - utils::{Access, Gas} + utils::{Access, Gas, align_to_next_page_usize} }; -use crate::api::BackendAccess; +use crate::api::{BackendAccess, EngineState, ExecuteArgs, Module}; +use crate::compiler::CompiledModule; use crate::config::{GasMeteringKind, SandboxKind}; +use crate::utils::GuestInit; +use crate::error::Error; macro_rules! get_field_offset { ($struct:expr, |$struct_ident:ident| $get_field:expr) => {{ @@ -53,15 +53,6 @@ pub(crate) fn get_native_page_size() -> usize { unsafe { sysconf(_SC_PAGESIZE) as usize } } -pub(crate) fn assert_native_page_size() { - let native_page_size = get_native_page_size(); - assert!( - native_page_size <= VM_PAGE_SIZE as usize && VM_PAGE_SIZE as usize % native_page_size == 0, - "unsupported native page size: {}", - native_page_size - ); -} - #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub(crate) struct OutOfGas; @@ -86,468 +77,257 @@ pub(crate) trait Sandbox: Sized { type Program: SandboxProgram; type AddressSpace: SandboxAddressSpace; + fn as_sandbox_vec(sandbox_vec: &SandboxVec) -> &Mutex>; + fn as_compiled_module(module: &Module) -> &CompiledModule; + fn reserve_address_space() -> Result; - fn prepare_program(init: SandboxProgramInit, address_space: Self::AddressSpace, gas_metering: Option) -> Result; + fn prepare_program(init: SandboxInit, address_space: Self::AddressSpace) -> Result; fn spawn(config: &Self::Config) -> Result; - fn execute(&mut self, args: ExecuteArgs) -> Result<(), ExecutionError>; + fn execute(&mut self, args: ExecuteArgs) -> Result<(), ExecutionError>; fn access(&'_ mut self) -> Self::Access<'_>; fn pid(&self) -> Option; fn address_table() -> AddressTable; fn vmctx_regs_offset() -> usize; fn vmctx_gas_offset() -> usize; + fn vmctx_heap_info_offset() -> usize; fn gas_remaining_impl(&self) -> Result, OutOfGas>; fn sync(&mut self) -> Result<(), Self::Error>; } -pub(crate) type OnHostcall<'a, T> = &'a mut dyn for<'r> FnMut(u32, ::Access<'r>) -> Result<(), Trap>; - -#[derive(Copy, Clone)] -pub struct SandboxProgramInit<'a> { - guest_init: GuestProgramInit<'a>, - code: &'a [u8], - jump_table: &'a [u8], - sysreturn_address: u64, +#[derive(Copy, Clone, Default)] +pub struct SandboxInit<'a> { + pub guest_init: GuestInit<'a>, + pub code: &'a [u8], + pub jump_table: &'a [u8], + pub sysreturn_address: u64, } -impl<'a> Default for SandboxProgramInit<'a> { - fn default() -> Self { - Self::new(Default::default()) - } -} +impl<'a> SandboxInit<'a> { + fn memory_config(&self, native_page_size: usize) -> Result { + let memory_map = self.guest_init.memory_map()?; + let mut ro_data_fd_size = align_to_next_page_usize(native_page_size, self.guest_init.ro_data.len()).unwrap() as u32; + if memory_map.ro_data_size() - ro_data_fd_size < memory_map.page_size() { + ro_data_fd_size = memory_map.ro_data_size(); + } -impl<'a> core::ops::Deref for SandboxProgramInit<'a> { - type Target = GuestProgramInit<'a>; - fn deref(&self) -> &Self::Target { - &self.guest_init + let rw_data_fd_size = align_to_next_page_usize(native_page_size, self.guest_init.rw_data.len()).unwrap() as u32; + let code_size = align_to_next_page_usize(native_page_size, self.code.len()).unwrap() as u32; + let jump_table_size = align_to_next_page_usize(native_page_size, self.jump_table.len()).unwrap() as u32; + + Ok(SandboxMemoryConfig { + memory_map, + ro_data_fd_size, + rw_data_fd_size, + code_size, + jump_table_size, + sysreturn_address: self.sysreturn_address, + }) } } -impl<'a> SandboxProgramInit<'a> { - pub fn new(guest_init: GuestProgramInit<'a>) -> Self { - Self { - guest_init, - code: &[], - jump_table: &[], - sysreturn_address: 0, - } - } - - pub fn with_code(mut self, code: &'a [u8]) -> Self { - self.code = code; - self +pub(crate) fn get_gas(args: &ExecuteArgs, gas_metering: Option) -> Option { + if args.module.is_none() && args.gas.is_none() && gas_metering.is_some() { + // Keep whatever value was set there previously. + return None; } - pub fn with_jump_table(mut self, jump_table: &'a [u8]) -> Self { - self.jump_table = jump_table; - self + let gas = args.gas.unwrap_or(Gas::MIN); + if gas_metering.is_some() { + Some(gas.get() as i64) + } else { + Some(0) } +} - pub fn with_sysreturn_address(mut self, address: u64) -> Self { - self.sysreturn_address = address; - self - } +pub(crate) struct SandboxInstance where S: Sandbox { + engine_state: Arc, + sandbox: Option +} - fn memory_config(&self, native_page_size: usize) -> Result { - let mut config = SandboxMemoryConfig::empty(); - config.set_guest_config(self.guest_init.memory_config()?); - config.set_code_size(native_page_size, self.code.len())?; - config.set_jump_table_size(native_page_size, self.jump_table.len())?; +impl SandboxInstance where S: Sandbox { + pub fn spawn_and_load_module(engine_state: Arc, module: &Module) -> Result { + let mut sandbox = SandboxInstance { + sandbox: Some(reuse_or_spawn_sandbox::(&engine_state, module)?), + engine_state, + }; - Ok(config) - } -} + let mut args = ExecuteArgs::new(); + args.module = Some(module); -pub(crate) struct ExecuteArgs<'a, T> where T: Sandbox + 'a { - rpc_address: u64, - rpc_flags: u32, - program: Option<&'a T::Program>, - on_hostcall: Option>, - initial_regs: &'a [u32], - gas: Option, - is_async: bool, -} + sandbox + .execute(args) + .map_err(Error::from_display) + .map_err(|error| error.context("instantiation failed: failed to upload the program into the sandbox"))?; -impl<'a, T> Default for ExecuteArgs<'a, T> where T: Sandbox { - fn default() -> Self { - Self::new() + Ok(sandbox) } -} -impl<'a, T> ExecuteArgs<'a, T> where T: Sandbox { - #[inline] - pub fn new() -> Self { - static EMPTY_REGS: &[u32; Reg::ALL.len()] = &[0; Reg::ALL.len()]; - ExecuteArgs { - rpc_address: 0, - rpc_flags: 0, - program: None, - on_hostcall: None, - initial_regs: EMPTY_REGS, - gas: None, - is_async: false, + pub fn execute(&mut self, args: ExecuteArgs) -> Result<(), ExecutionError> { + let sandbox = self.sandbox.as_mut().unwrap(); + let result = match sandbox.execute(args) { + Ok(()) => Ok(()), + Err(ExecutionError::Trap(trap)) => Err(ExecutionError::Trap(trap)), + Err(ExecutionError::Error(error)) => return Err(ExecutionError::Error(Error::from_display(error))), + Err(ExecutionError::OutOfGas) => return Err(ExecutionError::OutOfGas), + }; + + if sandbox.gas_remaining_impl().is_err() { + return Err(ExecutionError::OutOfGas); } - } - #[inline] - pub fn set_program(&mut self, program: &'a T::Program) { - self.rpc_flags |= VM_RPC_FLAG_RECONFIGURE; - self.program = Some(program); + result } - #[inline] - pub fn set_reset_memory_after_execution(&mut self) { - self.rpc_flags |= VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION; + pub fn access(&'_ mut self) -> S::Access<'_> { + self.sandbox.as_mut().unwrap().access() } - #[inline] - pub fn set_clear_program_after_execution(&mut self) { - self.rpc_flags |= VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION; + pub fn sandbox(&self) -> &S { + self.sandbox.as_ref().unwrap() } +} - #[inline] - pub fn set_call(&mut self, address: u64) { - self.rpc_address = address; +impl Drop for SandboxInstance where S: Sandbox { + fn drop(&mut self) { + recycle_sandbox::(&self.engine_state, || { + let mut sandbox = self.sandbox.take()?; + let mut args = ExecuteArgs::new(); + args.flags |= polkavm_common::VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION; + args.gas = Some(polkavm_common::utils::Gas::MIN); + args.is_async = true; + + if let Err(error) = sandbox.execute(args) { + log::warn!("Failed to cache a sandbox worker process due to an error: {error}"); + None + } else { + Some(sandbox) + } + }) } +} - #[inline] - pub fn set_on_hostcall(&mut self, callback: OnHostcall<'a, T>) { - self.on_hostcall = Some(callback); - } +pub(crate) enum SandboxVec { + #[cfg(target_os = "linux")] + Linux(Mutex>), + Generic(Mutex>), +} - #[inline] - pub fn set_initial_regs(&mut self, regs: &'a [u32]) { - assert_eq!(regs.len(), Reg::ALL.len()); - self.initial_regs = regs; - } +pub(crate) struct SandboxCache { + sandboxes: SandboxVec, + available_workers: AtomicUsize, + worker_limit: usize, +} - #[inline] - pub fn set_gas(&mut self, gas: Gas) { - self.gas = Some(gas); - } +impl SandboxCache { + pub(crate) fn new(kind: SandboxKind, worker_count: usize, debug_trace_execution: bool) -> Result { + let sandboxes = match kind { + SandboxKind::Linux => { + #[cfg(target_os = "linux")] + { + SandboxVec::Linux(Mutex::new(spawn_sandboxes(worker_count, debug_trace_execution)?)) + } - #[inline] - pub fn set_async(&mut self, value: bool) { - self.is_async = value; + #[cfg(not(target_os = "linux"))] + { + unreachable!() + } + }, + SandboxKind::Generic => SandboxVec::Generic(Mutex::new(spawn_sandboxes(worker_count, debug_trace_execution)?)), + }; + + Ok(SandboxCache { + sandboxes, + available_workers: AtomicUsize::new(worker_count), + worker_limit: worker_count, + }) } - fn get_gas(&self, gas_metering: Option) -> Option { - if self.program.is_none() && self.gas.is_none() && gas_metering.is_some() { - // Keep whatever value was set there previously. + fn reuse_sandbox(&self) -> Option where S: Sandbox { + if self.available_workers.load(Ordering::Relaxed) == 0 { return None; } - let gas = self.gas.unwrap_or(Gas::MIN); - if gas_metering.is_some() { - Some(gas.get() as i64) + let sandboxes = S::as_sandbox_vec(&self.sandboxes); + let mut sandboxes = match sandboxes.lock() { + Ok(sandboxes) => sandboxes, + Err(poison) => poison.into_inner(), + }; + + let mut sandbox = sandboxes.pop()?; + self.available_workers.fetch_sub(1, Ordering::Relaxed); + + if let Err(error) = sandbox.sync() { + log::warn!("Failed to reuse a sandbox: {error}"); + None } else { - Some(0) + Some(sandbox) } } } -#[cfg(test)] -macro_rules! sandbox_tests { - ($sandbox_kind:ident) => { - mod $sandbox_kind { - use crate::sandbox::Sandbox as _; - use crate::sandbox::SandboxConfig as _; - use crate::sandbox::SandboxAddressSpace as _; - use crate::sandbox::{SandboxKind, SandboxProgramInit, ExecuteArgs, get_native_page_size}; - use polkavm_assembler::amd64::addr::*; - use polkavm_assembler::amd64::inst::*; - use polkavm_assembler::amd64::Reg::*; - use polkavm_assembler::amd64::{LoadKind, RegSize, Size}; - use polkavm_assembler::Assembler; - use polkavm_common::init::GuestProgramInit; - use polkavm_common::utils::Access; - use polkavm_common::error::ExecutionError; - - use crate::sandbox::$sandbox_kind::{Sandbox, SandboxConfig}; - - fn emit_sysreturn(asm: &mut Assembler) { - asm.push(mov_imm64(rcx, Sandbox::address_table().syscall_return)); - asm.push(jmp(rcx)); - } - - #[test] - fn spawn_stress_test() { - let _ = env_logger::try_init(); - let init = GuestProgramInit::new().with_ro_data(&[0x00]).with_bss(1); - let init = SandboxProgramInit::new(init); - - let mut asm = Assembler::new(); - emit_sysreturn(&mut asm); - - let code = asm.finalize(); - let address_space = Sandbox::reserve_address_space().unwrap(); - let native_code_address = address_space.native_code_address(); - let program = Sandbox::prepare_program(init.with_code(&code), address_space, None).unwrap(); - - const THREAD_COUNT: usize = 32; - let barrier = std::sync::Arc::new(std::sync::Barrier::new(THREAD_COUNT)); - - let mut threads = Vec::new(); - for _ in 0..THREAD_COUNT { - let program = program.clone(); - let barrier = barrier.clone(); - let thread = std::thread::spawn(move || { - barrier.wait(); - for _ in 0..32 { - let mut args = ExecuteArgs::new(); - args.set_program(&program); - args.set_call(native_code_address); - - let mut config = SandboxConfig::default(); - config.enable_logger(true); - - let mut sandbox = Sandbox::spawn(&config).unwrap(); - sandbox.execute(args).unwrap(); - } - }); - threads.push(thread); - } +fn spawn_sandboxes(count: usize, debug_trace_execution: bool) -> Result, Error> where S: Sandbox { + use crate::sandbox::SandboxConfig; - let mut results = Vec::new(); - for thread in threads { - results.push(thread.join()); - } + let mut sandbox_config = S::Config::default(); + sandbox_config.enable_logger(cfg!(test) || debug_trace_execution); - for result in results { - result.unwrap(); - } - } - - #[test] - fn basic_execution_works() { - let _ = env_logger::try_init(); - - let init = GuestProgramInit::new().with_ro_data(&[0xaa, 0xbb]).with_bss(1); - let init = SandboxProgramInit::new(init); - - let mem = init.memory_config(get_native_page_size()).unwrap(); - let mut asm = Assembler::new(); - if Sandbox::KIND != SandboxKind::Generic { - asm.push(mov_imm(r15, imm32(0))); - } + let mut sandboxes = Vec::with_capacity(count); + for nth in 0..count { + let sandbox = S::spawn(&sandbox_config) + .map_err(crate::Error::from_display) + .map_err(|error| error.context(format!("failed to create a worker process ({} out of {})", nth + 1, count)))?; - asm - .push(load(LoadKind::U32, rax, reg_indirect(RegSize::R64, r15 + mem.ro_data_address().try_into().unwrap()))) - .push(store(Size::U8, reg_indirect(RegSize::R64, r15 + i32::try_from(mem.rw_data_address()).unwrap()), rax)) - .push(store(Size::U16, reg_indirect(RegSize::R64, r15 + (i32::try_from(mem.rw_data_address()).unwrap() + 4)), rax)); - - emit_sysreturn(&mut asm); - let code = asm.finalize(); - let address_space = Sandbox::reserve_address_space().unwrap(); - let native_code_address = address_space.native_code_address(); - let program = Sandbox::prepare_program(init.with_code(&code), address_space, None).unwrap(); - let mut args = ExecuteArgs::new(); - args.set_program(&program); - args.set_call(native_code_address); - - let mut config = SandboxConfig::default(); - config.enable_logger(true); - - let mut sandbox = Sandbox::spawn(&config).unwrap(); - sandbox.execute(args).unwrap(); - - assert_eq!( - sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 8).unwrap(), - [0xaa, 0x00, 0x00, 0x00, 0xaa, 0xbb, 0x00, 0x00,] - ); - } - - #[test] - fn program_memory_can_be_reused_and_cleared() { - let _ = env_logger::try_init(); - - let init = GuestProgramInit::new().with_bss(1); - let init = SandboxProgramInit::new(init); - let mem = init.memory_config(get_native_page_size()).unwrap(); - let mut asm = Assembler::new(); - if Sandbox::KIND != SandboxKind::Generic { - asm.push(mov_imm(r15, imm32(0))); - } - - asm - .push(load(LoadKind::U32, rax, reg_indirect(RegSize::R64, r15 + mem.rw_data_address().try_into().unwrap()))) - .push(add((rax, imm64(1)))) - .push(store(Size::U32, reg_indirect(RegSize::R64, r15 + i32::try_from(mem.rw_data_address()).unwrap()), rax)); - - emit_sysreturn(&mut asm); - let code = asm.finalize(); - let address_space = Sandbox::reserve_address_space().unwrap(); - let native_code_address = address_space.native_code_address(); - let program = Sandbox::prepare_program(init.with_code(&code), address_space, None).unwrap(); + sandboxes.push(sandbox); + } - let mut sandbox = Sandbox::spawn(&Default::default()).unwrap(); - assert!(sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).is_err()); + Ok(sandboxes) +} - { - let mut args = ExecuteArgs::new(); - args.set_program(&program); - sandbox.execute(args).unwrap(); - assert_eq!( - sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), - [0x00, 0x00, 0x00, 0x00] - ); - } +fn reuse_or_spawn_sandbox(engine_state: &EngineState, module: &Module) -> Result where S: Sandbox { + use crate::sandbox::SandboxConfig; - { - let mut args = ExecuteArgs::new(); - args.set_call(native_code_address); - sandbox.execute(args).unwrap(); - assert_eq!( - sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), - [0x01, 0x00, 0x00, 0x00] - ); - } + let mut sandbox_config = S::Config::default(); + sandbox_config.enable_logger(cfg!(test) || module.is_debug_trace_execution_enabled()); - { - let mut args = ExecuteArgs::new(); - args.set_call(native_code_address); - sandbox.execute(args).unwrap(); - assert_eq!( - sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), - [0x02, 0x00, 0x00, 0x00] - ); - } + if let Some(sandbox) = engine_state.sandbox_cache().and_then(|cache| cache.reuse_sandbox::()) { + Ok(sandbox) + } else { + S::spawn(&sandbox_config) + .map_err(Error::from_display) + .map_err(|error| error.context("instantiation failed: failed to create a sandbox")) + } +} - { - let mut args = ExecuteArgs::new(); - args.set_call(native_code_address); - args.set_reset_memory_after_execution(); - sandbox.execute(args).unwrap(); - assert_eq!( - sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), - [0x00, 0x00, 0x00, 0x00] - ); - } +fn recycle_sandbox(engine_state: &EngineState, get_sandbox: impl FnOnce() -> Option) where S: Sandbox { + let Some(sandbox_cache) = engine_state.sandbox_cache() else { return }; + let sandboxes = S::as_sandbox_vec(&sandbox_cache.sandboxes); - { - let mut args = ExecuteArgs::new(); - args.set_call(native_code_address); - sandbox.execute(args).unwrap(); - assert_eq!( - sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), - [0x01, 0x00, 0x00, 0x00] - ); - } + let mut count = sandbox_cache.available_workers.load(Ordering::Relaxed); + if count >= sandbox_cache.worker_limit { + return; + } - { - let mut args = ExecuteArgs::new(); - args.set_clear_program_after_execution(); - sandbox.execute(args).unwrap(); - assert!(sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).is_err()); - } + loop { + if let Err(new_count) = sandbox_cache.available_workers.compare_exchange(count, count + 1, Ordering::Relaxed, Ordering::Relaxed) { + if new_count >= sandbox_cache.worker_limit { + return; } - #[test] - fn out_of_bounds_memory_access_generates_a_trap() { - let _ = env_logger::try_init(); - - let init = GuestProgramInit::new().with_bss(1); - let init = SandboxProgramInit::new(init); - let mem = init.memory_config(get_native_page_size()).unwrap(); - let mut asm = Assembler::new(); - if Sandbox::KIND != SandboxKind::Generic { - asm.push(mov_imm(r15, imm32(0))); - } - - asm - .push(load(LoadKind::U32, rax, reg_indirect(RegSize::R64, r15 + mem.rw_data_address().try_into().unwrap()))) - .push(add((rax, imm64(1)))) - .push(store(Size::U32, reg_indirect(RegSize::R64, r15 + i32::try_from(mem.rw_data_address()).unwrap()), rax)) - .push(load(LoadKind::U32, rax, reg_indirect(RegSize::R64, r15))); - - emit_sysreturn(&mut asm); - let code = asm.finalize(); - let address_space = Sandbox::reserve_address_space().unwrap(); - let native_code_address = address_space.native_code_address(); - let program = Sandbox::prepare_program(init.with_code(&code), address_space, None).unwrap(); - - let mut sandbox = Sandbox::spawn(&Default::default()).unwrap(); - { - let mut args = ExecuteArgs::new(); - args.set_program(&program); - args.set_call(native_code_address); - match sandbox.execute(args) { - Err(ExecutionError::Trap(_)) => {} - _ => panic!(), - } - - assert_eq!( - sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), - [0x01, 0x00, 0x00, 0x00] - ); - } - - // The VM still works even though it got hit with a SIGSEGV. - { - let mut args = ExecuteArgs::new(); - args.set_call(native_code_address); - match sandbox.execute(args) { - Err(ExecutionError::Trap(_)) => {} - _ => panic!(), - } - - assert_eq!( - sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), - [0x02, 0x00, 0x00, 0x00] - ); - } - } + count = new_count; + continue; + } - #[test] - fn divide_by_zero_generates_a_trap() { - if Sandbox::KIND == SandboxKind::Generic { - return; - } + break; + } - let _ = env_logger::try_init(); - - let init = GuestProgramInit::new().with_bss(4); - let init = SandboxProgramInit::new(init); - let mem = init.memory_config(get_native_page_size()).unwrap(); - let mut asm = Assembler::new(); - asm - .push(mov_imm(rdx, imm32(0))) - .push(mov_imm(rax, imm32(1))) - .push(mov_imm(rcx, imm32(0))) - .push(mov_imm(r8, imm32(0x11223344))) - .push(store(Size::U32, abs(RegSize::R32, i32::try_from(mem.rw_data_address()).unwrap()), r8)) - .push(idiv(RegSize::R32, rcx)) - .push(mov_imm(r8, imm32(0x12345678))) - .push(store(Size::U32, abs(RegSize::R32, i32::try_from(mem.rw_data_address()).unwrap()), r8)); - - emit_sysreturn(&mut asm); - let code = asm.finalize(); - let address_space = Sandbox::reserve_address_space().unwrap(); - let native_code_address = address_space.native_code_address(); - let program = Sandbox::prepare_program(init.with_code(&code), address_space, None).unwrap(); - let mut sandbox = Sandbox::spawn(&Default::default()).unwrap(); + if let Some(sandbox) = get_sandbox() { + let mut sandboxes = match sandboxes.lock() { + Ok(sandboxes) => sandboxes, + Err(poison) => poison.into_inner(), + }; - { - let mut args = ExecuteArgs::new(); - args.set_program(&program); - args.set_call(native_code_address); - match sandbox.execute(args) { - Err(ExecutionError::Trap(_)) => {} - _ => panic!(), - } - - assert_eq!( - sandbox.access().read_memory_into_new_vec(mem.rw_data_address(), 4).unwrap(), - [0x44, 0x33, 0x22, 0x11] - ); - } - } - } + sandboxes.push(sandbox); + } else { + sandbox_cache.available_workers.fetch_sub(1, Ordering::Relaxed); } } - -#[cfg(test)] -mod tests { - #[cfg(target_os = "linux")] - sandbox_tests!(linux); - sandbox_tests!(generic); -} diff --git a/crates/polkavm/src/sandbox/generic.rs b/crates/polkavm/src/sandbox/generic.rs index f2c45ef3..9bf01f16 100644 --- a/crates/polkavm/src/sandbox/generic.rs +++ b/crates/polkavm/src/sandbox/generic.rs @@ -1,21 +1,23 @@ #![allow(clippy::manual_range_contains)] use polkavm_common::{ + abi::MemoryMap, error::{ExecutionError, Trap}, program::Reg, - utils::{byte_slice_init, Access, AsUninitSliceMut, Gas}, + utils::{align_to_next_page_usize, byte_slice_init, Access, AsUninitSliceMut, Gas}, zygote::{ AddressTable, AddressTableRaw, CacheAligned, SandboxMemoryConfig, - VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION, - VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION, VM_ADDR_JUMP_TABLE, VM_ADDR_JUMP_TABLE_RETURN_TO_HOST, VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE, VM_SANDBOX_MAXIMUM_JUMP_TABLE_VIRTUAL_SIZE, }, + VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION, + VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION, + VM_RPC_FLAG_RESET_MEMORY_BEFORE_EXECUTION, }; use super::ExecuteArgs; @@ -25,11 +27,11 @@ use core::cell::UnsafeCell; use core::sync::atomic::{AtomicUsize, Ordering}; use core::mem::MaybeUninit; use std::borrow::Cow; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; -use super::{OnHostcall, SandboxKind, SandboxProgramInit, get_native_page_size}; -use crate::api::{BackendAccess, MemoryAccessError}; -use crate::config::GasMeteringKind; +use super::{SandboxKind, SandboxInit, SandboxVec, get_native_page_size}; +use crate::api::{BackendAccess, CompiledModuleKind, MemoryAccessError, Module, HostcallHandler}; +use crate::compiler::CompiledModule; // On Linux don't depend on the `libc` crate to lower the number of dependencies. #[cfg(target_os = "linux")] @@ -413,13 +415,31 @@ unsafe fn sysreturn(vmctx: &mut VmCtx) -> ! { } } +#[repr(C)] +enum TrapKind { + None, + Trap, + Error, +} + unsafe fn trigger_trap(vmctx: &mut VmCtx) -> ! { - vmctx.trap_triggered = true; + vmctx.trap_kind = TrapKind::Trap; + sysreturn(vmctx); +} + +unsafe fn trigger_error(vmctx: &mut VmCtx) -> ! { + vmctx.trap_kind = TrapKind::Error; sysreturn(vmctx); } const REG_COUNT: usize = polkavm_common::program::Reg::ALL.len(); +#[repr(C)] +struct HeapInfo { + heap_top: u64, + heap_threshold: u64, +} + #[repr(C)] struct VmCtx { // NOTE: These two fields are accessed from inline assembly so they shouldn't be moved! @@ -428,11 +448,14 @@ struct VmCtx { gas: i64, + heap_info: HeapInfo, + memory_map: MemoryMap, + program_range: Range, - trap_triggered: bool, + trap_kind: TrapKind, regs: CacheAligned<[u32; REG_COUNT]>, - on_hostcall: Option>, + hostcall_handler: Option>, sandbox: *mut Sandbox, instruction_number: Option, native_program_counter: Option, @@ -444,12 +467,16 @@ impl VmCtx { VmCtx { return_address: 0, return_stack_pointer: 0, - trap_triggered: false, + trap_kind: TrapKind::None, program_range: 0..0, - + heap_info: HeapInfo { + heap_top: 0, + heap_threshold: 0, + }, + memory_map: MemoryMap::empty(), gas: 0, regs: CacheAligned([0; REG_COUNT]), - on_hostcall: None, + hostcall_handler: None, sandbox: core::ptr::null_mut(), instruction_number: None, native_program_counter: None, @@ -491,8 +518,8 @@ unsafe extern "C" fn syscall_hostcall(hostcall: u32) { // SAFETY: We were called from the inside of the guest program, so vmctx must be valid. let vmctx = unsafe { conjure_vmctx() }; - let Some(on_hostcall) = vmctx.on_hostcall.as_mut().take() else { - trigger_trap(vmctx); + let Some(hostcall_handler) = vmctx.hostcall_handler.as_mut().take() else { + trigger_error(vmctx); }; // SAFETY: We were called from the inside of the guest program, so no other @@ -501,7 +528,7 @@ unsafe extern "C" fn syscall_hostcall(hostcall: u32) { &mut *vmctx.sandbox }; - match on_hostcall(hostcall, super::Sandbox::access(sandbox)) { + match hostcall_handler(hostcall, super::Sandbox::access(sandbox).into()) { Ok(()) => {} Err(_) => trigger_trap(vmctx) } @@ -514,7 +541,7 @@ unsafe extern "C" fn syscall_trace(instruction_number: u32, rip: u64) { vmctx.instruction_number = Some(instruction_number); vmctx.native_program_counter = Some(rip); - let Some(on_hostcall) = vmctx.on_hostcall.as_mut().take() else { + let Some(hostcall_handler) = vmctx.hostcall_handler.as_mut().take() else { return; }; @@ -524,7 +551,7 @@ unsafe extern "C" fn syscall_trace(instruction_number: u32, rip: u64) { &mut *vmctx.sandbox }; - match on_hostcall(polkavm_common::HOSTCALL_TRACE, super::Sandbox::access(sandbox)) { + match hostcall_handler(polkavm_common::HOSTCALL_TRACE, super::Sandbox::access(sandbox).into()) { Ok(()) => {} Err(_) => trigger_trap(vmctx) } @@ -546,6 +573,52 @@ unsafe extern "C" fn syscall_return() -> ! { sysreturn(vmctx); } +unsafe fn sbrk(vmctx: &mut VmCtx, pending_heap_top: u64) -> Result, ()> { + if pending_heap_top > u64::from(vmctx.memory_map.heap_base() + vmctx.memory_map.max_heap_size()) { + return Ok(None); + } + + let Some(start) = align_to_next_page_usize(vmctx.memory_map.page_size() as usize, vmctx.heap_info.heap_top as usize) else { return Err(()); }; + let Some(end) = align_to_next_page_usize(vmctx.memory_map.page_size() as usize, pending_heap_top as usize) else { return Err(()); }; + + let size = end - start; + if size > 0 { + let guest_memory_base = (vmctx as *mut VmCtx).cast::().offset(-GUEST_MEMORY_TO_VMCTX_OFFSET); + let pointer = sys::mmap( + guest_memory_base.add(start).cast::(), + end - start, + sys::PROT_READ | sys::PROT_WRITE, + sys::MAP_FIXED | sys::MAP_PRIVATE | sys::MAP_ANONYMOUS, + -1, + 0, + ); + + if pointer == sys::MAP_FAILED { + log::error!("sbrk mmap failed!"); + return Err(()); + } + } + + vmctx.heap_info.heap_top = pending_heap_top; + vmctx.heap_info.heap_threshold = end as u64; + + Ok(Some(pending_heap_top as u32)) +} + +unsafe extern "C" fn syscall_sbrk(pending_heap_top: u64) -> u32 { + // SAFETY: We were called from the inside of the guest program, so vmctx must be valid. + let vmctx = unsafe { conjure_vmctx() }; + + // SAFETY: `vmctx` is valid and was allocated along with the guest memory. + match sbrk(vmctx, pending_heap_top) { + Ok(Some(new_heap_top)) => new_heap_top, + Ok(None) => 0, + Err(()) => { + trigger_error(vmctx); + } + } +} + #[derive(Clone)] pub struct SandboxProgram(Arc); @@ -556,8 +629,6 @@ struct SandboxProgramInner { code_memory: Mmap, code_length: usize, - - gas_metering: Option, } impl super::SandboxProgram for SandboxProgram { @@ -576,8 +647,8 @@ pub struct Sandbox { poison: Poison, program: Option, memory: Mmap, - memory_config: SandboxMemoryConfig, guest_memory_offset: usize, + module: Option, } impl Drop for Sandbox { @@ -603,97 +674,110 @@ impl Sandbox { } fn clear_program(&mut self) -> Result<(), ExecutionError> { - let user_memory_region_size = self.memory_config.user_memory_region_size(); - if user_memory_region_size > 0 { - self.memory.mmap_within( - self.guest_memory_offset + self.memory_config.user_memory_region_address() as usize, - self.memory_config.user_memory_region_size() as usize, - 0 - )?; + let length = self.memory.len() - self.guest_memory_offset; + let program = self.program.take(); - self.memory_config.clear_user_memory_sizes(); - } - - if self.memory_config.stack_size() > 0 { - self.memory.mmap_within( - self.guest_memory_offset + self.memory_config.stack_address_low() as usize, - self.memory_config.stack_size() as usize, - 0 - )?; - - self.memory_config.clear_stack_size(); - } + self.memory.mmap_within( + self.guest_memory_offset, + length, + 0 + )?; - self.memory_config.clear_code_size(); - self.memory_config.clear_jump_table_size(); - if let Some(program) = self.program.take() { + if let Some(program) = program { if let Some(program) = Arc::into_inner(program.0) { program.code_memory.unmap()?; } } + self.vmctx_mut().heap_info.heap_top = 0; + self.vmctx_mut().heap_info.heap_threshold = 0; + self.vmctx_mut().memory_map = MemoryMap::empty(); + Ok(()) } - fn reset_memory(&mut self) -> Result<(), ExecutionError> { - if let Some(ref program) = self.program { - let program = &program.0; - let rw_data_size = self.memory_config.rw_data_size() as usize; - if rw_data_size > 0 { - let offset = self.guest_memory_offset + self.memory_config.rw_data_address() as usize; - assert!(program.rw_data.len() <= rw_data_size); + fn force_reset_memory(&mut self) -> Result<(), Error> { + let Some(ref program) = self.program else { return Ok(()) }; + let program = &program.0; + let memory_map = program.memory_config.memory_map.clone(); - self.memory.as_slice_mut()[offset..offset + program.rw_data.len()].copy_from_slice(&program.rw_data); - self.memory.as_slice_mut()[offset + program.rw_data.len()..offset + self.memory_config.rw_data_size() as usize].fill(0); - } + log::trace!("Resetting memory"); + log::trace!(" Read-write data: 0x{:x}..0x{:x}", memory_map.rw_data_address(), memory_map.rw_data_range().end); + + let rw_data_size = memory_map.rw_data_size() as usize; + if rw_data_size > 0 { + let offset = self.guest_memory_offset + memory_map.rw_data_address() as usize; + assert!(program.rw_data.len() <= rw_data_size); + + let copy_range = offset..offset + program.rw_data.len(); + self.memory.as_slice_mut()[copy_range.clone()].copy_from_slice(&program.rw_data); + log::trace!(" ...copy: 0x{:x}..0x{:x}", copy_range.start - self.guest_memory_offset, copy_range.end - self.guest_memory_offset); + + let native_page_size = get_native_page_size(); + let offset_to_next_native_page = align_to_next_page_usize(native_page_size, offset + program.rw_data.len()).unwrap(); + + let fill_range = offset + program.rw_data.len()..offset_to_next_native_page; + self.memory.as_slice_mut()[fill_range.clone()].fill(0); + log::trace!(" ...fill: 0x{:x}..0x{:x}", fill_range.start - self.guest_memory_offset, fill_range.end - self.guest_memory_offset); - let bss_size = self.memory_config.bss_size() as usize; + let bss_size = memory_map.rw_data_size() as usize - (offset_to_next_native_page - offset); if bss_size > 0 { + log::trace!(" ...mmap: 0x{:x}..0x{:x}", offset_to_next_native_page - self.guest_memory_offset, offset_to_next_native_page + bss_size - self.guest_memory_offset); self.memory.mmap_within( - self.guest_memory_offset + self.memory_config.bss_address() as usize, + offset_to_next_native_page, bss_size, PROT_READ | PROT_WRITE )?; } + } - let stack_size = self.memory_config.stack_size() as usize; - if stack_size > 0 { - self.memory.mmap_within( - self.guest_memory_offset + self.memory_config.stack_address_low() as usize, - stack_size, - PROT_READ | PROT_WRITE - )?; - } - } else { - assert_eq!(self.memory_config.ro_data_size(), 0); - assert_eq!(self.memory_config.rw_data_size(), 0); - assert_eq!(self.memory_config.stack_size(), 0); + let stack_size = memory_map.stack_size() as usize; + if stack_size > 0 { + self.memory.mmap_within( + self.guest_memory_offset + memory_map.stack_address_low() as usize, + stack_size, + PROT_READ | PROT_WRITE + )?; + } + + let initial_heap_threshold = u64::from(memory_map.rw_data_range().end); + let heap_top = self.vmctx().heap_info.heap_top; + log::trace!(" Heap: 0x{:x}..0x{:x}", memory_map.heap_base(), heap_top); + if heap_top > initial_heap_threshold { + log::trace!(" ..mmap:: 0x{:x}..0x{:x}", initial_heap_threshold, heap_top); + self.memory.mmap_within( + self.guest_memory_offset + initial_heap_threshold as usize, + heap_top as usize - initial_heap_threshold as usize, + 0 + )?; } + self.vmctx_mut().heap_info.heap_top = u64::from(program.memory_config.memory_map.heap_base()); + self.vmctx_mut().heap_info.heap_threshold = initial_heap_threshold; + Ok(()) } fn bound_check_access(&self, address: u32, length: u32) -> Result<(), ()> { - use core::ops::Range; - - #[inline] - fn check(range: Range, access_range: Range) -> Result { - let range = u64::from(range.start)..u64::from(range.end); - if access_range.end <= range.start || access_range.start >= range.end { - // No overlap. - Ok(false) + let memory_map = self.vmctx().memory_map.clone(); + + let (start, region_length) = if address >= memory_map.stack_address_low() { + (memory_map.stack_address_low(), memory_map.stack_size()) + } else if address >= memory_map.rw_data_address() { + let heap_threshold = self.vmctx().heap_info.heap_threshold as u32; + if heap_threshold == 0 { + (memory_map.rw_data_address(), memory_map.rw_data_size()) } else { - // There is overlap. - if access_range.start >= range.start && access_range.end <= range.end { - Ok(true) - } else { - Err(()) - } + (memory_map.rw_data_address(), heap_threshold - memory_map.rw_data_address()) } - } + } else if address >= memory_map.ro_data_address() { + (memory_map.ro_data_address(), memory_map.ro_data_size()) + } else { + return Err(()); + }; - let range = u64::from(address)..u64::from(address) + u64::from(length); - if check(self.memory_config.ro_data_range(), range.clone())? || check(self.memory_config.heap_range(), range.clone())? || check(self.memory_config.stack_range(), range)? { + let Some(address_end) = address.checked_add(length) else { return Err(()) }; + if address_end <= (start + region_length) { Ok(()) } else { Err(()) @@ -712,16 +796,18 @@ impl Sandbox { Some(&mut self.memory.as_slice_mut()[range]) } - fn execute_impl(&mut self, mut args: ExecuteArgs) -> Result<(), ExecutionError> { - if let Some(SandboxProgram(program)) = args.program { + fn execute_impl(&mut self, mut args: ExecuteArgs) -> Result<(), ExecutionError> { + if let Some(module) = args.module { + let compiled_module = ::as_compiled_module(module); + let program = &compiled_module.sandbox_program.0; + log::trace!("Reconfiguring sandbox..."); self.clear_program()?; - let current = &mut self.memory_config; - let new = program.memory_config; - if new.ro_data_size() > 0 { - let offset = self.guest_memory_offset + new.ro_data_address() as usize; - let length = new.ro_data_size() as usize; + let new = &program.memory_config; + if new.memory_map.ro_data_size() > 0 { + let offset = self.guest_memory_offset + new.memory_map.ro_data_address() as usize; + let length = new.memory_map.ro_data_size() as usize; assert!(program.ro_data.len() <= length); self.memory.modify_and_protect(offset, length, PROT_READ, |slice| { @@ -733,17 +819,15 @@ impl Sandbox { " New rodata range: 0x{:x}-0x{:x} (0x{:x}-0x{:x}) (0x{:x})", memory_address, memory_address + length, - new.ro_data_address(), - new.ro_data_address() + new.ro_data_size(), - new.ro_data_size() + new.memory_map.ro_data_address(), + new.memory_map.ro_data_address() + new.memory_map.ro_data_size(), + new.memory_map.ro_data_size() ); - - current.set_ro_data_size(new.ro_data_size()).unwrap(); } - if new.rw_data_size() > 0 { - let offset = self.guest_memory_offset + new.rw_data_address() as usize; - let length = new.rw_data_size() as usize; + if new.memory_map.rw_data_size() > 0 { + let offset = self.guest_memory_offset + new.memory_map.rw_data_address() as usize; + let length = new.memory_map.rw_data_size() as usize; assert!(program.rw_data.len() <= length); self.memory.modify_and_protect(offset, length, PROT_READ | PROT_WRITE, |slice| { @@ -755,69 +839,65 @@ impl Sandbox { " New rwdata range: 0x{:x}-0x{:x} (0x{:x}-0x{:x}) (0x{:x})", memory_address, memory_address + length, - new.rw_data_address(), - new.rw_data_address() + new.rw_data_size(), - new.rw_data_size() + new.memory_map.rw_data_address(), + new.memory_map.rw_data_address() + new.memory_map.rw_data_size(), + new.memory_map.rw_data_size() ); - - current.set_rw_data_size(new.rw_data_size()).unwrap(); } - if new.bss_size() > 0 { - let offset = self.guest_memory_offset + new.bss_address() as usize; - let length = new.bss_size() as usize; + if new.memory_map.stack_size() > 0 { + let offset = self.guest_memory_offset + new.memory_map.stack_address_low() as usize; + let length = new.memory_map.stack_size() as usize; self.memory.mprotect(offset, length, PROT_READ | PROT_WRITE)?; let memory_address = self.memory.as_ptr() as usize + offset; log::trace!( - " New bss range: 0x{:x}-0x{:x} (0x{:x}-0x{:x}) (0x{:x})", + " New stack range: 0x{:x}-0x{:x} (0x{:x}-0x{:x}) (0x{:x})", memory_address, memory_address + length, - new.bss_address(), - new.bss_address() + new.bss_size(), - new.bss_size() + new.memory_map.stack_address_low(), + new.memory_map.stack_address_low() + new.memory_map.stack_size(), + new.memory_map.stack_size() ); - - current.set_bss_size(new.bss_size()).unwrap(); } - if new.stack_size() > 0 { - let offset = self.guest_memory_offset + new.stack_address_low() as usize; - let length = new.stack_size() as usize; + self.vmctx_mut().heap_info.heap_top = u64::from(program.memory_config.memory_map.heap_base()); + self.vmctx_mut().heap_info.heap_threshold = u64::from(new.memory_map.rw_data_range().end); + self.vmctx_mut().memory_map = new.memory_map.clone(); - self.memory.mprotect(offset, length, PROT_READ | PROT_WRITE)?; + self.program = Some(SandboxProgram(Arc::clone(program))); + self.module = Some(module.clone()); + } - let memory_address = self.memory.as_ptr() as usize + offset; - log::trace!( - " New stack range: 0x{:x}-0x{:x} (0x{:x}-0x{:x}) (0x{:x})", - memory_address, - memory_address + length, - new.stack_address_low(), - new.stack_address_low() + new.stack_size(), - new.stack_size() - ); + if let Some(regs) = args.regs { + self.vmctx_mut().regs.copy_from_slice(regs); + } - current.set_stack_size(new.stack_size()).unwrap(); - } + if let Some(gas) = crate::sandbox::get_gas(&args, self.module.as_ref().and_then(|module| module.gas_metering())) { + self.vmctx_mut().gas = gas; + } - let native_page_size = get_native_page_size(); - current.set_code_size(native_page_size, new.code_size()).unwrap(); - current.set_jump_table_size(native_page_size, new.jump_table_size()).unwrap(); - self.program = Some(SandboxProgram(Arc::clone(program))); + if args.flags & VM_RPC_FLAG_RESET_MEMORY_BEFORE_EXECUTION != 0 { + // TODO: Do this only if the memory is dirty. + self.force_reset_memory()?; + } + + if args.sbrk > 0 { + let new_heap_top = self.vmctx().heap_info.heap_top + u64::from(args.sbrk); - if *current != new { - panic!("internal error: failed to fully update memory configuration"); + // SAFETY: `vmctx` is valid and was allocated along with the guest memory. + match unsafe { sbrk(self.vmctx_mut(), new_heap_top) } { + Ok(Some(_)) => {}, + Ok(None) => return Err(ExecutionError::Error("initial sbrk failed: cannot grow the heap over the maximum".into())), + Err(()) => return Err(ExecutionError::Error("initial sbrk failed".into())) } } - self.vmctx_mut().regs.copy_from_slice(args.initial_regs); - if let Some(gas) = args.get_gas(self.program.as_ref().and_then(|program| program.0.gas_metering)) { - self.vmctx_mut().gas = gas; - } + let mut trap_kind = TrapKind::None; + if let Some(entry_point) = args.entry_point { + let entry_point = ::as_compiled_module(self.module.as_ref().unwrap()).export_trampolines[entry_point] as usize; - let mut trap_triggered = false; - if args.rpc_address != 0 { { let Some(program) = self.program.as_ref() else { return Err(ExecutionError::Trap(Trap::default())); @@ -827,15 +907,19 @@ impl Sandbox { let address = code.as_ptr() as u64; self.vmctx_mut().program_range = address..address + code.len() as u64; } - log::trace!("Jumping to: 0x{:x}", args.rpc_address); + log::trace!("Jumping to: 0x{:x}", entry_point); + + let hostcall_handler: Option = match args.hostcall_handler { + Some(ref mut hostcall_handler) => Some(&mut *hostcall_handler), + None => None, + }; - let on_hostcall: Option> = args.on_hostcall.take(); // SAFETY: Transmuting an arbitrary lifetime into a 'static lifetime is safe as long as the invariants // that the shorter lifetime requires are still upheld. - let on_hostcall: Option> = unsafe { core::mem::transmute(on_hostcall) }; - self.vmctx_mut().on_hostcall = on_hostcall; + let hostcall_handler: Option> = unsafe { core::mem::transmute(hostcall_handler) }; + self.vmctx_mut().hostcall_handler = hostcall_handler; self.vmctx_mut().sandbox = self; - self.vmctx_mut().trap_triggered = false; + self.vmctx_mut().trap_kind = TrapKind::None; #[allow(clippy::undocumented_unsafe_blocks)] unsafe { @@ -867,7 +951,7 @@ impl Sandbox { pop rbx pop rbp "#, - entry_point = in(reg) args.rpc_address, + entry_point = in(reg) entry_point, // Mark all of the clobbered registers. // // We need to save and restore rbp and rbx manually since @@ -891,25 +975,25 @@ impl Sandbox { THREAD_VMCTX.with(|thread_ctx| core::ptr::write(thread_ctx.get(), core::ptr::null_mut())); } - trap_triggered = core::mem::replace(&mut self.vmctx_mut().trap_triggered, false); + trap_kind = core::mem::replace(&mut self.vmctx_mut().trap_kind, TrapKind::None); self.vmctx_mut().sandbox = core::ptr::null_mut(); - self.vmctx_mut().on_hostcall = None; + self.vmctx_mut().hostcall_handler = None; self.vmctx_mut().return_address = 0; self.vmctx_mut().return_stack_pointer = 0; self.vmctx_mut().program_range = 0..0; }; - if args.rpc_flags & VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION != 0 { + if args.flags & VM_RPC_FLAG_CLEAR_PROGRAM_AFTER_EXECUTION != 0 { self.clear_program()?; - } else if args.rpc_flags & VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION != 0 { - self.reset_memory()?; + } else if args.flags & VM_RPC_FLAG_RESET_MEMORY_AFTER_EXECUTION != 0 { + self.force_reset_memory()?; } - if trap_triggered { - return Err(ExecutionError::Trap(Trap::default())); + match trap_kind { + TrapKind::None => Ok(()), + TrapKind::Trap => Err(ExecutionError::Trap(Trap::default())), + TrapKind::Error => Err(ExecutionError::Error("fatal error".into())), } - - Ok(()) } } @@ -928,25 +1012,37 @@ impl super::Sandbox for Sandbox { type Program = SandboxProgram; type AddressSpace = Mmap; + fn as_sandbox_vec(vec: &SandboxVec) -> &Mutex> { + #[allow(clippy::match_wildcard_for_single_variants)] + match vec { + SandboxVec::Generic(ref vec) => vec, + _ => unreachable!(), + } + } + + fn as_compiled_module(module: &Module) -> &CompiledModule { + match module.compiled_module() { + CompiledModuleKind::Generic(ref module) => module, + _ => unreachable!(), + } + } + fn reserve_address_space() -> Result { Mmap::reserve_address_space(VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE as usize + VM_SANDBOX_MAXIMUM_JUMP_TABLE_VIRTUAL_SIZE as usize) } - fn prepare_program(init: SandboxProgramInit, mut map: Self::AddressSpace, gas_metering: Option) -> Result { + fn prepare_program(init: SandboxInit, mut map: Self::AddressSpace) -> Result { let native_page_size = get_native_page_size(); let cfg = init.memory_config(native_page_size)?; - assert_eq!(cfg.code_size() % native_page_size, 0); - assert!(init.code.len() <= cfg.code_size()); - - let jump_table_offset = cfg.code_size(); + let jump_table_offset = cfg.code_size as usize; let sysreturn_offset = jump_table_offset + (VM_ADDR_JUMP_TABLE_RETURN_TO_HOST - VM_ADDR_JUMP_TABLE) as usize; - map.modify_and_protect(0, cfg.code_size(), PROT_EXEC, |slice| { + map.modify_and_protect(0, cfg.code_size as usize, PROT_EXEC, |slice| { slice[..init.code.len()].copy_from_slice(init.code); })?; - map.modify_and_protect(jump_table_offset, cfg.jump_table_size(), PROT_READ, |slice| { + map.modify_and_protect(jump_table_offset, cfg.jump_table_size as usize, PROT_READ, |slice| { slice[..init.jump_table.len()].copy_from_slice(init.jump_table); })?; @@ -957,15 +1053,15 @@ impl super::Sandbox for Sandbox { log::trace!( "New code range: 0x{:x}-0x{:x} (0x{:x})", map.as_ptr() as u64, - map.as_ptr() as u64 + cfg.code_size() as u64, - cfg.code_size() + map.as_ptr() as u64 + u64::from(cfg.code_size), + cfg.code_size ); log::trace!( "New jump table range: 0x{:x}-0x{:x} (0x{:x})", map.as_ptr() as u64 + jump_table_offset as u64, - map.as_ptr() as u64 + jump_table_offset as u64 + cfg.jump_table_size() as u64, - cfg.jump_table_size() + map.as_ptr() as u64 + jump_table_offset as u64 + u64::from(cfg.jump_table_size), + cfg.jump_table_size ); log::trace!( @@ -976,11 +1072,10 @@ impl super::Sandbox for Sandbox { Ok(SandboxProgram(Arc::new(SandboxProgramInner { memory_config: cfg, - ro_data: init.ro_data().to_vec(), - rw_data: init.rw_data().to_vec(), + ro_data: init.guest_init.ro_data.to_vec(), + rw_data: init.guest_init.rw_data.to_vec(), code_memory: map, code_length: init.code.len(), - gas_metering, }))) } @@ -1003,12 +1098,12 @@ impl super::Sandbox for Sandbox { poison: Poison::None, program: None, memory, - memory_config: SandboxMemoryConfig::empty(), guest_memory_offset, + module: None, }) } - fn execute(&mut self, args: ExecuteArgs) -> Result<(), ExecutionError> { + fn execute(&mut self, args: ExecuteArgs) -> Result<(), ExecutionError> { if !matches!(self.poison, Poison::None) { return Err(ExecutionError::Error("sandbox has been poisoned".into())); } @@ -1041,6 +1136,7 @@ impl super::Sandbox for Sandbox { syscall_trap, syscall_return, syscall_trace, + syscall_sbrk, }) } @@ -1052,9 +1148,13 @@ impl super::Sandbox for Sandbox { get_field_offset!(VmCtx::new(), |base| &base.gas) } + fn vmctx_heap_info_offset() -> usize { + get_field_offset!(VmCtx::new(), |base| &base.heap_info) + } + fn gas_remaining_impl(&self) -> Result, super::OutOfGas> { - let Some(program) = self.program.as_ref() else { return Ok(None) }; - if program.0.gas_metering.is_none() { return Ok(None) }; + let Some(module) = self.module.as_ref() else { return Ok(None) }; + if module.gas_metering().is_none() { return Ok(None) }; let raw_gas = self.vmctx().gas; Gas::from_i64(raw_gas).ok_or(super::OutOfGas).map(Some) } @@ -1146,6 +1246,23 @@ impl<'a> Access<'a> for SandboxAccess<'a> { Ok(()) } + fn sbrk(&mut self, size: u32) -> Option { + let new_heap_top = self.sandbox.vmctx().heap_info.heap_top + u64::from(size); + + // SAFETY: `vmctx` is valid and was allocated along with the guest memory. + match unsafe { sbrk(self.sandbox.vmctx_mut(), new_heap_top) } { + Ok(result) => result, + Err(()) => panic!("sbrk failed") + } + } + + fn heap_size(&self) -> u32 { + let Some(program) = self.sandbox.program.as_ref() else { return 0 }; + let heap_base = program.0.memory_config.memory_map.heap_base(); + let heap_top = self.sandbox.vmctx().heap_info.heap_top; + (heap_top - u64::from(heap_base)) as u32 + } + fn program_counter(&self) -> Option { self.sandbox.vmctx().instruction_number } @@ -1160,7 +1277,7 @@ impl<'a> Access<'a> for SandboxAccess<'a> { } fn consume_gas(&mut self, gas: u64) { - if self.sandbox.program.as_ref().and_then(|program| program.0.gas_metering).is_none() { + if self.sandbox.module.as_ref().and_then(|module| module.gas_metering()).is_none() { return; } diff --git a/crates/polkavm/src/sandbox/linux.rs b/crates/polkavm/src/sandbox/linux.rs index a18f599d..3ec1f191 100644 --- a/crates/polkavm/src/sandbox/linux.rs +++ b/crates/polkavm/src/sandbox/linux.rs @@ -4,7 +4,7 @@ extern crate polkavm_linux_raw as linux_raw; use polkavm_common::{ - abi::VM_PAGE_SIZE, + abi::VM_MAX_PAGE_SIZE, error::{ExecutionError, Trap}, program::Reg, utils::{align_to_next_page_usize, slice_assume_init_mut, Access, AsUninitSliceMut, Gas}, @@ -22,13 +22,15 @@ pub use linux_raw::Error; use core::ffi::{c_int, c_uint}; use core::ops::Range; use core::sync::atomic::Ordering; +use core::time::Duration; use linux_raw::{abort, cstr, syscall_readonly, Fd, Mmap, STDERR_FILENO, STDIN_FILENO}; use std::borrow::Cow; use std::time::Instant; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; -use super::{OnHostcall, SandboxKind, SandboxProgramInit, get_native_page_size}; -use crate::api::{BackendAccess, MemoryAccessError}; +use super::{SandboxKind, SandboxInit, SandboxVec, get_native_page_size}; +use crate::api::{BackendAccess, CompiledModuleKind, MemoryAccessError, Module, HostcallHandler}; +use crate::compiler::CompiledModule; use crate::config::GasMeteringKind; pub struct SandboxConfig { @@ -725,9 +727,7 @@ pub struct SandboxProgram(Arc); struct SandboxProgramInner { memfd: Fd, memory_config: SandboxMemoryConfig, - sysreturn_address: u64, code_range: Range, - gas_metering: Option, } impl super::SandboxProgram for SandboxProgram { @@ -890,6 +890,7 @@ pub struct Sandbox { count_wait_loop_start: u64, count_futex_wait: u64, + module: Option, gas_metering: Option, } @@ -928,26 +929,41 @@ impl super::Sandbox for Sandbox { type Program = SandboxProgram; type AddressSpace = (); + fn as_sandbox_vec(vec: &SandboxVec) -> &Mutex> { + #[allow(clippy::match_wildcard_for_single_variants)] + match vec { + SandboxVec::Linux(ref vec) => vec, + _ => unreachable!(), + } + } + + fn as_compiled_module(module: &Module) -> &CompiledModule { + match module.compiled_module() { + CompiledModuleKind::Linux(ref module) => module, + _ => unreachable!(), + } + } + fn reserve_address_space() -> Result { Ok(()) } - fn prepare_program(init: SandboxProgramInit, (): Self::AddressSpace, gas_metering: Option) -> Result { - static PADDING: [u8; VM_PAGE_SIZE as usize] = [0; VM_PAGE_SIZE as usize]; + fn prepare_program(init: SandboxInit, (): Self::AddressSpace) -> Result { + static PADDING: [u8; VM_MAX_PAGE_SIZE as usize] = [0; VM_MAX_PAGE_SIZE as usize]; let native_page_size = get_native_page_size(); let cfg = init.memory_config(native_page_size)?; - let ro_data_padding = &PADDING[..cfg.ro_data_size() as usize - init.ro_data().len()]; - let rw_data_padding = &PADDING[..cfg.rw_data_size() as usize - init.rw_data().len()]; - let code_padding = &PADDING[..cfg.code_size() - init.code.len()]; + let ro_data_padding = &PADDING[..cfg.ro_data_fd_size as usize - init.guest_init.ro_data.len()]; + let rw_data_padding = &PADDING[..cfg.rw_data_fd_size as usize - init.guest_init.rw_data.len()]; + let code_padding = &PADDING[..cfg.code_size as usize - init.code.len()]; let memfd = prepare_sealed_memfd( create_program_memfd()?, - cfg.ro_data_size() as usize + cfg.rw_data_size() as usize + cfg.code_size() + cfg.jump_table_size(), + cfg.ro_data_fd_size as usize + cfg.rw_data_fd_size as usize + cfg.code_size as usize + cfg.jump_table_size as usize, [ - init.ro_data(), + init.guest_init.ro_data, ro_data_padding, - init.rw_data(), + init.guest_init.rw_data, rw_data_padding, init.code, code_padding, @@ -955,15 +971,13 @@ impl super::Sandbox for Sandbox { ] )?; - let offset = cfg.ro_data_size() as usize + cfg.rw_data_size() as usize; - let code_range = offset..offset + init.code.len(); + let code_offset = cfg.ro_data_fd_size as usize + cfg.rw_data_fd_size as usize; + let code_range = code_offset..code_offset + init.code.len(); Ok(SandboxProgram(Arc::new(SandboxProgramInner { memfd, memory_config: cfg, - sysreturn_address: init.sysreturn_address, code_range, - gas_metering, }))) } @@ -1271,46 +1285,64 @@ impl super::Sandbox for Sandbox { count_wait_loop_start: 0, count_futex_wait: 0, + module: None, gas_metering: None, }) } - fn execute(&mut self, mut args: ExecuteArgs) -> Result<(), ExecutionError> { - self.wait_if_necessary(match args.on_hostcall { - Some(ref mut on_hostcall) => Some(&mut *on_hostcall), + fn execute(&mut self, mut args: ExecuteArgs) -> Result<(), ExecutionError> { + self.wait_if_necessary(match args.hostcall_handler { + Some(ref mut hostcall_handler) => Some(&mut *hostcall_handler), None => None, }, true)?; - if args.is_async && args.on_hostcall.is_some() { + if args.is_async && args.hostcall_handler.is_some() { return Err(Error::from_str("requested asynchronous execution with a borrowed hostcall handler").into()); } unsafe { - if let Some(program) = args.program { - *self.vmctx().new_memory_config.get() = program.0.memory_config; - *self.vmctx().new_sysreturn_address.get() = program.0.sysreturn_address; - self.gas_metering = program.0.gas_metering; + if let Some(module) = args.module { + args.flags |= polkavm_common::zygote::VM_RPC_FLAG_RECONFIGURE; + + let compiled_module = Self::as_compiled_module(module); + let program = &compiled_module.sandbox_program; + *self.vmctx().memory_config.get() = program.0.memory_config.clone(); + *self.vmctx().heap_info.heap_top.get() = u64::from(module.memory_map().heap_base()); + *self.vmctx().heap_info.heap_threshold.get() = u64::from(module.memory_map().rw_data_range().end); + self.gas_metering = module.gas_metering(); + self.module = Some(module.clone()); } - if let Some(gas) = args.get_gas(self.gas_metering) { + if let Some(gas) = crate::sandbox::get_gas(&args, self.gas_metering) { *self.vmctx().gas().get() = gas; } - *self.vmctx().rpc_address.get() = args.rpc_address; - *self.vmctx().rpc_flags.get() = args.rpc_flags; + *self.vmctx().rpc_address.get() = args.entry_point.map_or(0, |entry_point| + Self::as_compiled_module(self.module.as_ref().unwrap()).export_trampolines[entry_point] as usize + ) as u64; + + *self.vmctx().rpc_flags.get() = args.flags; + *self.vmctx().rpc_sbrk.get() = args.sbrk; + + if let Some(regs) = args.regs { + (*self.vmctx().regs().get()).copy_from_slice(regs); + } - (*self.vmctx().regs().get()).copy_from_slice(args.initial_regs); self.vmctx().futex.store(VMCTX_FUTEX_BUSY, Ordering::Release); linux_raw::sys_futex_wake_one(&self.vmctx().futex)?; - if let Some(program) = args.program { + if let Some(module) = args.module { + let compiled_module = Self::as_compiled_module(module); // TODO: This can block forever. - linux_raw::sendfd(self.socket.borrow(), program.0.memfd.borrow())?; + linux_raw::sendfd(self.socket.borrow(), compiled_module.sandbox_program.0.memfd.borrow())?; } } if !args.is_async { - self.wait_if_necessary(args.on_hostcall, args.rpc_address == 0)?; + self.wait_if_necessary(match args.hostcall_handler { + Some(ref mut hostcall_handler) => Some(&mut *hostcall_handler), + None => None, + }, args.entry_point.is_none())?; } Ok(()) @@ -1337,6 +1369,10 @@ impl super::Sandbox for Sandbox { get_field_offset!(VmCtx::new(), |base| base.gas().get()) } + fn vmctx_heap_info_offset() -> usize { + get_field_offset!(VmCtx::new(), |base| base.heap_info()) + } + fn gas_remaining_impl(&self) -> Result, super::OutOfGas> { if self.gas_metering.is_none() { return Ok(None) }; let raw_gas = unsafe { *self.vmctx().gas().get() }; @@ -1362,7 +1398,7 @@ impl Sandbox { #[inline(never)] #[cold] - fn wait(&mut self, mut on_hostcall: Option>, low_latency: bool) -> Result<(), ExecutionError> { + fn wait(&mut self, mut hostcall_handler: Option, low_latency: bool) -> Result<(), ExecutionError> { let mut spin_target = 0; let mut yield_target = 0; if low_latency { @@ -1390,8 +1426,8 @@ impl Sandbox { if state == VMCTX_FUTEX_HOSTCALL { core::sync::atomic::fence(Ordering::Acquire); - let on_hostcall = match on_hostcall { - Some(ref mut on_hostcall) => &mut *on_hostcall, + let hostcall_handler = match hostcall_handler { + Some(ref mut hostcall_handler) => &mut *hostcall_handler, None => { unsafe { *self.vmctx().hostcall().get() = polkavm_common::zygote::HOSTCALL_ABORT_EXECUTION; @@ -1409,7 +1445,7 @@ impl Sandbox { spin_target = 512; } - match on_hostcall(hostcall, super::Sandbox::access(self)) { + match hostcall_handler(hostcall, super::Sandbox::access(self).into()) { Ok(()) => { self.vmctx().futex.store(VMCTX_FUTEX_BUSY, Ordering::Release); linux_raw::sys_futex_wake_one(&self.vmctx().futex)?; @@ -1454,26 +1490,32 @@ impl Sandbox { Err(error) if error.errno() == linux_raw::EAGAIN || error.errno() == linux_raw::EINTR => continue, Err(error) if error.errno() == linux_raw::ETIMEDOUT => { log::trace!("Timeout expired while waiting for child #{}...", self.child.pid); - let status = self.child.check_status(true)?; - if !status.is_running() { - log::trace!("Child #{} is not running anymore: {status}", self.child.pid); - let message = get_message(self.vmctx()); - if let Some(message) = message { - return Err(Error::from(format!("{status}: {message}")).into()); - } else { - return Err(Error::from(format!("worker process unexpectedly quit: {status}")).into()); - } - } + self.check_child_status()?; } Err(error) => return Err(error.into()), } } } + fn check_child_status(&mut self) -> Result<(), Error> { + let status = self.child.check_status(true)?; + if status.is_running() { + return Ok(()); + } + + log::trace!("Child #{} is not running anymore: {status}", self.child.pid); + let message = get_message(self.vmctx()); + if let Some(message) = message { + Err(Error::from(format!("{status}: {message}"))) + } else { + Err(Error::from(format!("worker process unexpectedly quit: {status}"))) + } + } + #[inline] - fn wait_if_necessary(&mut self, on_hostcall: Option>, low_latency: bool) -> Result<(), ExecutionError> { + fn wait_if_necessary(&mut self, hostcall_handler: Option, low_latency: bool) -> Result<(), ExecutionError> { if self.vmctx().futex.load(Ordering::Relaxed) != VMCTX_FUTEX_IDLE { - self.wait(on_hostcall, low_latency)?; + self.wait(hostcall_handler, low_latency)?; } Ok(()) @@ -1562,6 +1604,8 @@ impl<'a> Access<'a> for SandboxAccess<'a> { }); } + self.sandbox.vmctx().is_memory_dirty.store(true, Ordering::Relaxed); + let length = data.len(); match linux_raw::vm_write_memory(self.sandbox.child.pid, [data], [(address as usize, length)]) { Ok(actual_length) if actual_length == length => { @@ -1584,6 +1628,54 @@ impl<'a> Access<'a> for SandboxAccess<'a> { } } + fn sbrk(&mut self, size: u32) -> Option { + debug_assert_eq!(self.sandbox.vmctx().futex.load(Ordering::Relaxed), VMCTX_FUTEX_HOSTCALL); + + unsafe { + *self.sandbox.vmctx().rpc_sbrk.get() = size; + *self.sandbox.vmctx().hostcall().get() = polkavm_common::zygote::HOSTCALL_SBRK; + } + + self.sandbox.vmctx().futex.store(VMCTX_FUTEX_BUSY, Ordering::Release); + if let Err(error) = linux_raw::sys_futex_wake_one(&self.sandbox.vmctx().futex) { + panic!("sbrk failed: {error}"); + } + + let mut timestamp = Instant::now(); + loop { + let _ = linux_raw::sys_sched_yield(); + if self.sandbox.vmctx().futex.load(Ordering::Relaxed) == VMCTX_FUTEX_BUSY { + let new_timestamp = Instant::now(); + let elapsed = new_timestamp - timestamp; + if elapsed >= Duration::from_millis(100) { + timestamp = new_timestamp; + if let Err(error) = self.sandbox.check_child_status() { + panic!("sbrk failed: {error}"); + } + } + continue; + } + + core::sync::atomic::fence(Ordering::Acquire); + break; + } + + debug_assert_eq!(self.sandbox.vmctx().futex.load(Ordering::Relaxed), VMCTX_FUTEX_HOSTCALL); + + let result = unsafe { *self.sandbox.vmctx().rpc_sbrk.get() }; + if result == 0 { + None + } else { + Some(result) + } + } + + fn heap_size(&self) -> u32 { + let heap_base = unsafe { (*self.sandbox.vmctx().memory_config.get()).memory_map.heap_base() }; + let heap_top = unsafe { *self.sandbox.vmctx().heap_info().heap_top.get() }; + (heap_top - u64::from(heap_base)) as u32 + } + fn program_counter(&self) -> Option { let value = unsafe { *self.sandbox.vmctx().nth_instruction().get() }; diff --git a/crates/polkavm/src/sandbox/polkavm-zygote b/crates/polkavm/src/sandbox/polkavm-zygote index 4ffc7121..a99f8bbf 100755 Binary files a/crates/polkavm/src/sandbox/polkavm-zygote and b/crates/polkavm/src/sandbox/polkavm-zygote differ diff --git a/crates/polkavm/src/tests.rs b/crates/polkavm/src/tests.rs index 3f06ef75..e111b74c 100644 --- a/crates/polkavm/src/tests.rs +++ b/crates/polkavm/src/tests.rs @@ -1,16 +1,16 @@ use crate::{ - Caller, CallerRef, Config, Engine, ExecutionConfig, ExecutionError, Gas, GasMeteringKind, Linker, Module, ModuleConfig, ProgramBlob, - Reg, Trap, + CallArgs, Caller, CallerRef, Config, Engine, ExecutionError, Gas, GasMeteringKind, Linker, MemoryMap, Module, ModuleConfig, + ProgramBlob, Reg, StateArgs, Trap, }; use core::cell::RefCell; use std::collections::HashMap; use std::rc::Rc; use std::sync::Mutex; -use polkavm_common::abi::{VM_ADDR_USER_MEMORY, VM_PAGE_SIZE}; use polkavm_common::program::asm; use polkavm_common::program::Reg::*; use polkavm_common::program::{ProgramExport, ProgramImport}; +use polkavm_common::utils::align_to_next_page_u32; use polkavm_common::writer::ProgramBlobBuilder; macro_rules! run_tests { @@ -88,12 +88,13 @@ macro_rules! run_tests { } fn basic_test_blob() -> ProgramBlob<'static> { + let memory_map = MemoryMap::new(0x4000, 0, 0x4000, 0).unwrap(); let mut builder = ProgramBlobBuilder::new(); - builder.set_bss_size(VM_PAGE_SIZE); + builder.set_rw_data_size(0x4000); builder.add_export(ProgramExport::new(0, "main".into())); builder.add_import(ProgramImport::new("hostcall".into())); builder.set_code(&[ - asm::store_imm_u32(0x12345678, VM_ADDR_USER_MEMORY), + asm::store_imm_u32(0x12345678, memory_map.rw_data_address()), asm::add(S0, A0, A1), asm::ecalli(0), asm::add(A0, A0, S0), @@ -114,15 +115,16 @@ fn caller_and_caller_ref_work(config: Config) { illegal_contraband: Rc>>>, } + let address = module.memory_map().rw_data_address(); linker .func_wrap("hostcall", move |caller: Caller| -> Result { { - let value = caller.read_u32(VM_ADDR_USER_MEMORY)?; + let value = caller.read_u32(address)?; assert_eq!(value, 0x12345678); } { let caller = caller.into_ref(); - let value = caller.read_u32(VM_ADDR_USER_MEMORY)?; + let value = caller.read_u32(address)?; assert_eq!(value, 0x12345678); let illegal_contraband = Rc::clone(&caller.data().illegal_contraband); @@ -136,11 +138,7 @@ fn caller_and_caller_ref_work(config: Config) { let instance_pre = linker.instantiate_pre(&module).unwrap(); let instance = instance_pre.instantiate().unwrap(); let mut state = State::default(); - let result = instance - .get_typed_func::<(u32, u32), u32>("main") - .unwrap() - .call(&mut state, (1, 10)) - .unwrap(); + let result = instance.call_typed::<(u32, u32), u32>(&mut state, "main", (1, 10)).unwrap(); assert_eq!(result, 111); @@ -161,15 +159,16 @@ fn caller_split_works(config: Config) { value: u32, } + let address = module.memory_map().rw_data_address(); linker .func_wrap("hostcall", move |caller: Caller| -> Result { { - let value = caller.read_u32(VM_ADDR_USER_MEMORY)?; + let value = caller.read_u32(address)?; assert_eq!(value, 0x12345678); } { let (caller, state) = caller.split(); - state.value = caller.read_u32(VM_ADDR_USER_MEMORY)?; + state.value = caller.read_u32(address)?; } Ok(100) @@ -179,11 +178,7 @@ fn caller_split_works(config: Config) { let instance_pre = linker.instantiate_pre(&module).unwrap(); let instance = instance_pre.instantiate().unwrap(); let mut state = State::default(); - let result = instance - .get_typed_func::<(u32, u32), u32>("main") - .unwrap() - .call(&mut state, (1, 10)) - .unwrap(); + let result = instance.call_typed::<(u32, u32), u32>(&mut state, "main", (1, 10)).unwrap(); assert_eq!(result, 111); assert_eq!(state.value, 0x12345678); @@ -213,30 +208,11 @@ fn trapping_from_hostcall_handler_works(config: Config) { let instance_pre = linker.instantiate_pre(&module).unwrap(); let instance = instance_pre.instantiate().unwrap(); - let result = instance - .get_typed_func::<(u32, u32), u32>("main") - .unwrap() - .call(&mut Kind::Ok, (1, 10)); + let result = instance.call_typed::<(u32, u32), u32>(&mut Kind::Ok, "main", (1, 10)); assert!(matches!(result, Ok(111))); - let result = instance - .get_typed_func::<(u32, u32), u32>("main") - .unwrap() - .call(&mut Kind::Trap, (1, 10)); - assert!(matches!(result, Err(ExecutionError::Trap(..)))); - - let mut return_value = [0]; - let result = instance.get_func("main").unwrap().call(&mut Kind::Ok, &[1, 10], &mut return_value); - assert!(matches!(result, Ok(()))); - assert_eq!(return_value, [111]); - - return_value = [999]; - let result = instance - .get_func("main") - .unwrap() - .call(&mut Kind::Trap, &[1, 10], &mut return_value); + let result = instance.call_typed::<(u32, u32), u32>(&mut Kind::Trap, "main", (1, 10)); assert!(matches!(result, Err(ExecutionError::Trap(..)))); - assert_eq!(return_value, [999]); // Is unchanged. } fn fallback_hostcall_handler_works(config: Config) { @@ -254,11 +230,7 @@ fn fallback_hostcall_handler_works(config: Config) { let instance_pre = linker.instantiate_pre(&module).unwrap(); let instance = instance_pre.instantiate().unwrap(); - let result = instance - .get_typed_func::<(u32, u32), u32>("main") - .unwrap() - .call(&mut (), (1, 10)) - .unwrap(); + let result = instance.call_typed::<(u32, u32), u32>(&mut (), "main", (1, 10)).unwrap(); assert_eq!(result, 111); } @@ -371,8 +343,8 @@ fn doom(config: Config, elf: &'static [u8]) { let instance_pre = linker.instantiate_pre(&module).unwrap(); let instance = instance_pre.instantiate().unwrap(); - let ext_initialize = instance.get_typed_func::<(), ()>("ext_initialize").unwrap(); - let ext_tick = instance.get_typed_func::<(), ()>("ext_tick").unwrap(); + let ext_initialize = instance.module().lookup_export("ext_initialize").unwrap(); + let ext_tick = instance.module().lookup_export("ext_tick").unwrap(); let mut state = State { frame: Vec::new(), @@ -380,9 +352,11 @@ fn doom(config: Config, elf: &'static [u8]) { frame_height: 0, }; - ext_initialize.call(&mut state, ()).unwrap(); + instance + .call(Default::default(), CallArgs::new(&mut state, ext_initialize)) + .unwrap(); for nth_frame in 0..=10440 { - ext_tick.call(&mut state, ()).unwrap(); + instance.call(Default::default(), CallArgs::new(&mut state, ext_tick)).unwrap(); let expected_frame_raw = match nth_frame { 120 => decompress_zstd(include_bytes!("../../../test-data/doom_00120.tga.zst")), @@ -446,16 +420,19 @@ fn pinky(config: Config) { let linker = Linker::new(&engine); let instance_pre = linker.instantiate_pre(&module).unwrap(); let instance = instance_pre.instantiate().unwrap(); - let ext_initialize = instance.get_typed_func::<(), ()>("initialize").unwrap(); - let ext_run = instance.get_typed_func::<(), ()>("run").unwrap(); - let ext_get_framebuffer = instance.get_typed_func::<(), u32>("get_framebuffer").unwrap(); + let ext_initialize = instance.module().lookup_export("initialize").unwrap(); + let ext_run = instance.module().lookup_export("run").unwrap(); + let ext_get_framebuffer = instance.module().lookup_export("get_framebuffer").unwrap(); - ext_initialize.call(&mut (), ()).unwrap(); + instance.call(Default::default(), CallArgs::new(&mut (), ext_initialize)).unwrap(); for _ in 0..256 { - ext_run.call(&mut (), ()).unwrap(); + instance.call(Default::default(), CallArgs::new(&mut (), ext_run)).unwrap(); } - let address = ext_get_framebuffer.call(&mut (), ()).unwrap(); + instance + .call(Default::default(), CallArgs::new(&mut (), ext_get_framebuffer)) + .unwrap(); + let address = instance.get_result_typed::(); let framebuffer = instance.read_memory_into_new_vec(address, 256 * 240 * 4).unwrap(); let expected_frame_raw = decompress_zstd(include_bytes!("../../../test-data/pinky_00256.tga.zst")); @@ -469,6 +446,7 @@ fn pinky(config: Config) { } struct TestInstance { + module: crate::Module, instance: crate::Instance<()>, } @@ -504,10 +482,16 @@ impl TestInstance { }) .unwrap(); + linker + .func_wrap("call_sbrk_indirectly_impl", |mut caller: Caller<()>, size: u32| -> u32 { + caller.sbrk(size).unwrap_or(0) + }) + .unwrap(); + let instance_pre = linker.instantiate_pre(&module).unwrap(); let instance = instance_pre.instantiate().unwrap(); - TestInstance { instance } + TestInstance { module, instance } } pub fn call(&self, name: &str, args: FnArgs) -> Result> @@ -515,11 +499,8 @@ impl TestInstance { FnArgs: crate::api::FuncArgs, FnResult: crate::api::FuncResult, { - let function = self - .instance - .get_typed_func::(name) - .expect("function doesn't exist"); - function.call(&mut (), args) + self.instance.call_typed::(&mut (), name, args)?; + Ok(self.instance.get_result_typed::()) } } @@ -572,9 +553,9 @@ fn test_blob_atomic_fetch_minmax(config: Config) { for a in [-10, 0, 10] { for b in [-10, 0, 10] { let new_value = cb(a, b); - i.call::<(i32,), ()>("set_atomic_global", (a,)).unwrap(); + i.call::<(i32,), ()>("set_global", (a,)).unwrap(); assert_eq!(i.call::<(i32,), i32>(name, (b,)).unwrap(), a); - assert_eq!(i.call::<(), i32>("get_atomic_global", ()).unwrap(), new_value); + assert_eq!(i.call::<(), i32>("get_global", ()).unwrap(), new_value); } } } @@ -595,6 +576,129 @@ fn test_blob_input_registers(config: Config) { assert!(i.call::<(), ()>("test_input_registers", ()).is_ok()); } +fn test_blob_call_sbrk_from_guest(config: Config) { + test_blob_call_sbrk_impl(config, |i, size| i.call::<(u32,), u32>("call_sbrk", (size,)).unwrap()) +} + +fn test_blob_call_sbrk_from_host_instance(config: Config) { + test_blob_call_sbrk_impl(config, |i, size| i.instance.sbrk(size).unwrap().unwrap_or(0)) +} + +fn test_blob_call_sbrk_from_host_function(config: Config) { + test_blob_call_sbrk_impl(config, |i, size| i.call::<(u32,), u32>("call_sbrk_indirectly", (size,)).unwrap()) +} + +fn test_blob_program_memory_can_be_reused_and_cleared(config: Config) { + let i = TestInstance::new(&config); + let address = i.call::<(), u32>("get_global_address", ()).unwrap(); + + assert_eq!(i.instance.read_memory_into_new_vec(address, 4).unwrap(), [0x00, 0x00, 0x00, 0x00]); + + i.call::<(), ()>("increment_global", ()).unwrap(); + assert_eq!(i.instance.read_memory_into_new_vec(address, 4).unwrap(), [0x01, 0x00, 0x00, 0x00]); + + i.call::<(), ()>("increment_global", ()).unwrap(); + assert_eq!(i.instance.read_memory_into_new_vec(address, 4).unwrap(), [0x02, 0x00, 0x00, 0x00]); + + let ext_increment_global = i.instance.module().lookup_export("increment_global").unwrap(); + { + let mut state = (); + let mut call_args = CallArgs::new(&mut state, ext_increment_global); + call_args.reset_memory_after_call(true); + i.instance.call(Default::default(), call_args).unwrap(); + } + assert_eq!(i.instance.read_memory_into_new_vec(address, 4).unwrap(), [0x00, 0x00, 0x00, 0x00]); + + i.call::<(), ()>("increment_global", ()).unwrap(); + assert_eq!(i.instance.read_memory_into_new_vec(address, 4).unwrap(), [0x01, 0x00, 0x00, 0x00]); + + i.call::<(), ()>("increment_global", ()).unwrap(); + assert_eq!(i.instance.read_memory_into_new_vec(address, 4).unwrap(), [0x02, 0x00, 0x00, 0x00]); + + { + let mut state_args = StateArgs::new(); + state_args.reset_memory(true); + i.instance.call(state_args, CallArgs::new(&mut (), ext_increment_global)).unwrap(); + } + assert_eq!(i.instance.read_memory_into_new_vec(address, 4).unwrap(), [0x01, 0x00, 0x00, 0x00]); +} + +fn test_blob_out_of_bounds_memory_access_generates_a_trap(config: Config) { + let i = TestInstance::new(&config); + let address = i.call::<(), u32>("get_global_address", ()).unwrap(); + assert_eq!(i.call::<(u32,), u32>("read_u32", (address,)).unwrap(), 0); + i.call::<(), ()>("increment_global", ()).unwrap(); + assert_eq!(i.call::<(u32,), u32>("read_u32", (address,)).unwrap(), 1); + assert!(matches!(i.call::<(u32,), u32>("read_u32", (4,)), Err(ExecutionError::Trap(..)))); + + assert_eq!(i.call::<(u32,), u32>("read_u32", (address,)).unwrap(), 1); + i.call::<(), ()>("increment_global", ()).unwrap(); + assert_eq!(i.call::<(u32,), u32>("read_u32", (address,)).unwrap(), 2); +} + +fn test_blob_call_sbrk_impl(config: Config, mut call_sbrk: impl FnMut(&mut TestInstance, u32) -> u32) { + let mut i = TestInstance::new(&config); + let memory_map = i.module.memory_map().clone(); + let heap_base = memory_map.heap_base(); + let page_size = memory_map.page_size(); + + assert_eq!( + i.instance.read_memory_into_new_vec(memory_map.rw_data_range().end - 1, 1).unwrap(), + vec![0] + ); + assert!(i.instance.read_memory_into_new_vec(memory_map.rw_data_range().end, 1).is_err()); + assert!(i + .instance + .read_memory_into_new_vec(heap_base, memory_map.rw_data_range().end - heap_base) + .unwrap() + .iter() + .all(|&byte| byte == 0)); + assert_eq!(i.instance.heap_size(), 0); + + log::error!("AAA"); + assert_eq!(call_sbrk(&mut i, 0), heap_base); + log::error!("BBB"); + assert_eq!(i.instance.heap_size(), 0); + assert_eq!(call_sbrk(&mut i, 0), heap_base); + assert_eq!(call_sbrk(&mut i, 1), heap_base + 1); + assert_eq!(i.instance.heap_size(), 1); + assert_eq!(call_sbrk(&mut i, 0), heap_base + 1); + assert_eq!(call_sbrk(&mut i, 0xffffffff), 0); + assert_eq!(call_sbrk(&mut i, 0), heap_base + 1); + + i.instance.write_memory(heap_base, &[0x33]).unwrap(); + assert_eq!(i.instance.read_memory_into_new_vec(heap_base, 1).unwrap(), vec![0x33]); + + let new_origin = align_to_next_page_u32(memory_map.page_size(), heap_base + i.instance.heap_size()).unwrap(); + { + let until_next_page = new_origin - (heap_base + i.instance.heap_size()); + assert_eq!(call_sbrk(&mut i, until_next_page), new_origin); + } + + assert_eq!(i.instance.read_memory_into_new_vec(new_origin - 1, 1).unwrap(), vec![0]); + assert!(i.instance.read_memory_into_new_vec(new_origin, 1).is_err()); + assert!(i.instance.write_memory(new_origin, &[0x34]).is_err()); + + assert_eq!(call_sbrk(&mut i, 1), new_origin + 1); + assert_eq!( + i.instance.read_memory_into_new_vec(new_origin, page_size).unwrap().len(), + page_size as usize + ); + assert!(i.instance.read_memory_into_new_vec(new_origin, page_size + 1).is_err()); + assert!(i.instance.write_memory(new_origin, &[0x35]).is_ok()); + + assert_eq!(call_sbrk(&mut i, page_size - 1), new_origin + page_size); + assert!(i.instance.read_memory_into_new_vec(new_origin, page_size + 1).is_err()); + + i.instance.reset_memory().unwrap(); + assert_eq!(call_sbrk(&mut i, 0), heap_base); + assert_eq!(i.instance.heap_size(), 0); + assert!(i.instance.read_memory_into_new_vec(memory_map.rw_data_range().end, 1).is_err()); + + assert_eq!(call_sbrk(&mut i, 1), heap_base + 1); + assert_eq!(i.instance.read_memory_into_new_vec(heap_base, 1).unwrap(), vec![0]); +} + fn basic_gas_metering(config: Config, gas_metering_kind: GasMeteringKind) { let _ = env_logger::try_init(); @@ -611,38 +715,39 @@ fn basic_gas_metering(config: Config, gas_metering_kind: GasMeteringKind) { let linker = Linker::new(&engine); let instance_pre = linker.instantiate_pre(&module).unwrap(); let instance = instance_pre.instantiate().unwrap(); + let ext_main = instance.module().lookup_export("main").unwrap(); { - let mut config = ExecutionConfig::default(); - config.set_gas(Gas::new(2).unwrap()); + let mut state_args = StateArgs::default(); + state_args.set_gas(Gas::new(2).unwrap()); - let result = instance.get_typed_func::<(), i32>("main").unwrap().call_ex(&mut (), (), config); - assert!(matches!(result, Ok(666)), "unexpected result: {result:?}"); + instance.call(state_args, CallArgs::new(&mut (), ext_main)).unwrap(); + assert_eq!(instance.get_result_typed::(), 666); assert_eq!(instance.gas_remaining().unwrap(), Gas::new(0).unwrap()); } { - let mut config = ExecutionConfig::default(); - config.set_gas(Gas::new(1).unwrap()); + let mut state_args = StateArgs::default(); + state_args.set_gas(Gas::new(1).unwrap()); - let result = instance.get_typed_func::<(), i32>("main").unwrap().call_ex(&mut (), (), config); + let result = instance.call(state_args, CallArgs::new(&mut (), ext_main)); assert!(matches!(result, Err(ExecutionError::OutOfGas)), "unexpected result: {result:?}"); assert_eq!(instance.gas_remaining().unwrap(), Gas::new(0).unwrap()); } { - let mut config = ExecutionConfig::default(); - config.set_gas(Gas::new(4).unwrap()); + let mut state_args = StateArgs::default(); + state_args.set_gas(Gas::new(4).unwrap()); - let result = instance.get_typed_func::<(), i32>("main").unwrap().call_ex(&mut (), (), config); - assert!(matches!(result, Ok(666)), "unexpected result: {result:?}"); + instance.call(state_args, CallArgs::new(&mut (), ext_main)).unwrap(); + assert_eq!(instance.get_result_typed::(), 666); assert_eq!(instance.gas_remaining().unwrap(), Gas::new(2).unwrap()); - let result = instance.get_typed_func::<(), i32>("main").unwrap().call(&mut (), ()); - assert!(matches!(result, Ok(666)), "unexpected result: {result:?}"); + instance.call(StateArgs::default(), CallArgs::new(&mut (), ext_main)).unwrap(); + assert_eq!(instance.get_result_typed::(), 666); assert_eq!(instance.gas_remaining().unwrap(), Gas::new(0).unwrap()); - let result = instance.get_typed_func::<(), i32>("main").unwrap().call(&mut (), ()); + let result = instance.call(StateArgs::default(), CallArgs::new(&mut (), ext_main)); assert_eq!(instance.gas_remaining().unwrap(), Gas::new(0).unwrap()); assert!(matches!(result, Err(ExecutionError::OutOfGas)), "unexpected result: {result:?}"); } @@ -652,7 +757,7 @@ fn basic_gas_metering(config: Config, gas_metering_kind: GasMeteringKind) { let instance = instance_pre.instantiate().unwrap(); assert_eq!(instance.gas_remaining().unwrap(), Gas::new(0).unwrap()); - let result = instance.get_typed_func::<(), i32>("main").unwrap().call(&mut (), ()); + let result = instance.call(StateArgs::default(), CallArgs::new(&mut (), ext_main)); assert!(matches!(result, Err(ExecutionError::OutOfGas)), "unexpected result: {result:?}"); assert_eq!(instance.gas_remaining().unwrap(), Gas::new(0).unwrap()); } @@ -691,30 +796,31 @@ fn consume_gas_in_host_function(config: Config, gas_metering_kind: GasMeteringKi let instance_pre = linker.instantiate_pre(&module).unwrap(); let instance = instance_pre.instantiate().unwrap(); + let ext_main = instance.module().lookup_export("main").unwrap(); { - let mut config = ExecutionConfig::default(); - config.set_gas(Gas::new(3).unwrap()); + let mut state_args = StateArgs::default(); + state_args.set_gas(Gas::new(3).unwrap()); - let result = instance.get_typed_func::<(), i32>("main").unwrap().call_ex(&mut 0, (), config); - assert!(matches!(result, Ok(666)), "unexpected result: {result:?}"); + instance.call(state_args, CallArgs::new(&mut 0, ext_main)).unwrap(); + assert_eq!(instance.get_result_typed::(), 666); assert_eq!(instance.gas_remaining().unwrap(), Gas::new(1).unwrap()); } { - let mut config = ExecutionConfig::default(); - config.set_gas(Gas::new(3).unwrap()); + let mut state_args = StateArgs::default(); + state_args.set_gas(Gas::new(3).unwrap()); - let result = instance.get_typed_func::<(), i32>("main").unwrap().call_ex(&mut 1, (), config); - assert!(matches!(result, Ok(666)), "unexpected result: {result:?}"); + instance.call(state_args, CallArgs::new(&mut 1, ext_main)).unwrap(); + assert_eq!(instance.get_result_typed::(), 666); assert_eq!(instance.gas_remaining().unwrap(), Gas::new(0).unwrap()); } { - let mut config = ExecutionConfig::default(); - config.set_gas(Gas::new(3).unwrap()); + let mut state_args = StateArgs::default(); + state_args.set_gas(Gas::new(3).unwrap()); - let result = instance.get_typed_func::<(), i32>("main").unwrap().call_ex(&mut 2, (), config); + let result = instance.call(state_args, CallArgs::new(&mut 2, ext_main)); assert_eq!(instance.gas_remaining().unwrap(), Gas::new(0).unwrap()); assert!(matches!(result, Err(ExecutionError::OutOfGas)), "unexpected result: {result:?}"); } @@ -751,26 +857,77 @@ fn gas_metering_with_more_than_one_basic_block(config: Config) { let linker = Linker::new(&engine); let instance_pre = linker.instantiate_pre(&module).unwrap(); let instance = instance_pre.instantiate().unwrap(); + let ext_1 = instance.module().lookup_export("export_1").unwrap(); + let ext_2 = instance.module().lookup_export("export_2").unwrap(); { - let mut config = ExecutionConfig::default(); - config.set_gas(Gas::new(10).unwrap()); + let mut state_args = StateArgs::default(); + state_args.set_gas(Gas::new(10).unwrap()); - let result = instance.get_typed_func::<(), i32>("export_1").unwrap().call_ex(&mut (), (), config); - assert!(matches!(result, Ok(666)), "unexpected result: {result:?}"); + instance.call(state_args, CallArgs::new(&mut (), ext_1)).unwrap(); + assert_eq!(instance.get_result_typed::(), 666); assert_eq!(instance.gas_remaining().unwrap(), Gas::new(8).unwrap()); } { - let mut config = ExecutionConfig::default(); - config.set_gas(Gas::new(10).unwrap()); + let mut state_args = StateArgs::default(); + state_args.set_gas(Gas::new(10).unwrap()); - let result = instance.get_typed_func::<(), i32>("export_2").unwrap().call_ex(&mut (), (), config); - assert!(matches!(result, Ok(766)), "unexpected result: {result:?}"); + instance.call(state_args, CallArgs::new(&mut (), ext_2)).unwrap(); + assert_eq!(instance.get_result_typed::(), 766); assert_eq!(instance.gas_remaining().unwrap(), Gas::new(7).unwrap()); } } +fn spawn_stress_test(mut config: Config) { + let _ = env_logger::try_init(); + + let mut builder = ProgramBlobBuilder::new(); + builder.add_export(ProgramExport::new(0, "main".into())); + builder.set_ro_data_size(1); + builder.set_rw_data_size(1); + builder.set_ro_data(vec![0x00]); + builder.set_code(&[asm::ret()]); + + let blob = ProgramBlob::parse(builder.into_vec()).unwrap(); + + for worker_count in [0, 1] { + config.set_worker_count(worker_count); + let engine = Engine::new(&config).unwrap(); + + let module = Module::from_blob(&engine, &ModuleConfig::default(), &blob).unwrap(); + let ext_main = module.lookup_export("main").unwrap(); + let linker = Linker::new(&engine); + let instance_pre = linker.instantiate_pre(&module).unwrap(); + + const THREAD_COUNT: usize = 32; + let barrier = std::sync::Arc::new(std::sync::Barrier::new(THREAD_COUNT)); + + let mut threads = Vec::new(); + for _ in 0..THREAD_COUNT { + let instance_pre = instance_pre.clone(); + let barrier = std::sync::Arc::clone(&barrier); + let thread = std::thread::spawn(move || { + barrier.wait(); + for _ in 0..64 { + let instance = instance_pre.instantiate().unwrap(); + instance.call(Default::default(), CallArgs::new(&mut (), ext_main)).unwrap(); + } + }); + threads.push(thread); + } + + let mut results = Vec::new(); + for thread in threads { + results.push(thread.join()); + } + + for result in results { + result.unwrap(); + } + } +} + run_tests! { caller_and_caller_ref_work caller_split_works @@ -788,12 +945,19 @@ run_tests! { test_blob_hostcall test_blob_define_abi test_blob_input_registers + test_blob_call_sbrk_from_guest + test_blob_call_sbrk_from_host_instance + test_blob_call_sbrk_from_host_function + test_blob_program_memory_can_be_reused_and_cleared + test_blob_out_of_bounds_memory_access_generates_a_trap basic_gas_metering_sync basic_gas_metering_async consume_gas_in_host_function_sync consume_gas_in_host_function_async gas_metering_with_more_than_one_basic_block + + spawn_stress_test } // Source: https://users.rust-lang.org/t/a-macro-to-assert-that-a-type-does-not-implement-trait-bounds/31179 @@ -829,11 +993,10 @@ macro_rules! assert_send_sync { } assert_send_sync! { + crate::CallArgs<'static, ()>, crate::Config, crate::Engine, crate::Error, - crate::ExecutionConfig, - crate::Func<()>, crate::Gas, crate::Instance<()>, crate::InstancePre<()>, @@ -841,8 +1004,8 @@ assert_send_sync! { crate::Module, crate::ModuleConfig, crate::ProgramBlob<'static>, + crate::StateArgs, crate::Trap, - crate::TypedFunc<(), (), ()>, } assert_not_impl!(crate::Caller<'static, ()>, Send); diff --git a/crates/polkavm/src/tracer.rs b/crates/polkavm/src/tracer.rs index f2de9794..a1d1aba8 100644 --- a/crates/polkavm/src/tracer.rs +++ b/crates/polkavm/src/tracer.rs @@ -1,11 +1,11 @@ use crate::api::BackendAccess; -use crate::api::ExecutionConfig; +use crate::api::ExecuteArgs; use crate::api::Module; use crate::interpreter::{InterpretedInstance, InterpreterContext}; use crate::source_cache::SourceCache; use core::mem::MaybeUninit; use polkavm_common::error::Trap; -use polkavm_common::program::{FrameKind, Opcode, ProgramExport, Reg}; +use polkavm_common::program::{FrameKind, Opcode, Reg}; use polkavm_common::utils::Access; pub(crate) struct Tracer { @@ -17,7 +17,7 @@ pub(crate) struct Tracer { crosscheck_reg: Option<(Reg, u32)>, crosscheck_store: Option<(u32, u32)>, crosscheck_store_bytes: [u8; 8], - crosscheck_reset_memory_after_execution: bool, + crosscheck_execution_flags: u32, current_line_program_position: Option<(usize, usize)>, current_source_location: Option<(u32, u32)>, @@ -25,21 +25,21 @@ pub(crate) struct Tracer { } impl Tracer { - pub fn new(module: Module) -> Self { + pub fn new(module: &Module) -> Self { Tracer { program_counter_history: [!0; 8], program_counter_history_position: 0, crosscheck_interpreter: if module.compiled_module().is_some() { - InterpretedInstance::new(module.clone()).ok() + InterpretedInstance::new_from_module(module).ok() } else { None }, - module, + module: module.clone(), source_cache: SourceCache::default(), crosscheck_reg: None, crosscheck_store: None, crosscheck_store_bytes: Default::default(), - crosscheck_reset_memory_after_execution: false, + crosscheck_execution_flags: 0, current_line_program_position: None, current_source_location: None, @@ -48,24 +48,16 @@ impl Tracer { } } - pub fn on_before_call(&mut self, export_index: usize, export: &ProgramExport, config: &ExecutionConfig) { - let target = self - .module - .instruction_by_basic_block(export.jump_target()) - .expect("internal error: invalid export address"); - log::trace!("Calling export: {} (at #{})", export.symbol(), target); - + pub fn on_before_execute(&mut self, args: &ExecuteArgs) { if let Some(ref mut interpreter) = self.crosscheck_interpreter { - self.crosscheck_reset_memory_after_execution = config.reset_memory_after_execution; - interpreter.prepare_for_call(export_index, config); + self.crosscheck_execution_flags = args.flags; + interpreter.prepare_for_execution(args); } } - pub fn on_after_call(&mut self) { + pub fn on_after_execute(&mut self) { if let Some(ref mut interpreter) = self.crosscheck_interpreter { - if self.crosscheck_reset_memory_after_execution { - interpreter.reset_memory(); - } + interpreter.finish_execution(self.crosscheck_execution_flags); } } diff --git a/crates/polkavm/src/utils.rs b/crates/polkavm/src/utils.rs index 0acb4e5e..2b8caf87 100644 --- a/crates/polkavm/src/utils.rs +++ b/crates/polkavm/src/utils.rs @@ -19,3 +19,19 @@ impl From for RegImm { RegImm::Imm(value) } } + +#[derive(Copy, Clone, Default)] +pub struct GuestInit<'a> { + pub page_size: u32, + pub ro_data: &'a [u8], + pub rw_data: &'a [u8], + pub ro_data_size: u32, + pub rw_data_size: u32, + pub stack_size: u32, +} + +impl<'a> GuestInit<'a> { + pub fn memory_map(&self) -> Result { + polkavm_common::abi::MemoryMap::new(self.page_size, self.ro_data_size, self.rw_data_size, self.stack_size) + } +} diff --git a/examples/doom/roms/doom.polkavm b/examples/doom/roms/doom.polkavm index dd30c6cb..026ef38a 100644 Binary files a/examples/doom/roms/doom.polkavm and b/examples/doom/roms/doom.polkavm differ diff --git a/examples/doom/src/vm.rs b/examples/doom/src/vm.rs index 4b377282..85d2ba94 100644 --- a/examples/doom/src/vm.rs +++ b/examples/doom/src/vm.rs @@ -1,5 +1,5 @@ use core::mem::MaybeUninit; -use polkavm::{Caller, Config, Engine, ExecutionError, Linker, Module, ProgramBlob, Trap, TypedFunc}; +use polkavm::{Caller, Config, Engine, ExecutionError, Instance, Linker, Module, ProgramBlob, Trap}; struct State { rom: Vec, @@ -13,9 +13,7 @@ struct State { pub struct Vm { state: State, - ext_initialize: TypedFunc, - ext_tick: TypedFunc, - ext_on_keychange: TypedFunc, + instance: Instance, } impl Vm { @@ -108,9 +106,6 @@ impl Vm { let instance_pre = linker.instantiate_pre(&module)?; let instance = instance_pre.instantiate()?; - let ext_initialize = instance.get_typed_func::<(), ()>("ext_initialize")?; - let ext_tick = instance.get_typed_func::<(), ()>("ext_tick")?; - let ext_on_keychange = instance.get_typed_func::<(u32, u32), ()>("ext_on_keychange")?; Ok(Self { state: State { @@ -121,9 +116,7 @@ impl Vm { audio_buffer: Default::default(), on_audio_frame: None, }, - ext_initialize, - ext_tick, - ext_on_keychange, + instance, }) } @@ -133,15 +126,16 @@ impl Vm { pub fn initialize(&mut self, rom: impl Into>) -> Result<(), ExecutionError> { self.state.rom = rom.into(); - self.ext_initialize.call(&mut self.state, ()) + self.instance.call_typed(&mut self.state, "ext_initialize", ()) } pub fn run_for_a_frame(&mut self) -> Result<(u32, u32, &[u8]), ExecutionError> { - self.ext_tick.call(&mut self.state, ())?; + self.instance.call_typed(&mut self.state, "ext_tick", ())?; Ok((self.state.frame_width, self.state.frame_height, &self.state.frame)) } pub fn on_keychange(&mut self, key: u8, is_pressed: bool) -> Result<(), ExecutionError> { - self.ext_on_keychange.call(&mut self.state, (key as u32, is_pressed as u32)) + self.instance + .call_typed(&mut self.state, "ext_on_keychange", (key as u32, is_pressed as u32)) } } diff --git a/examples/hello-world/src/main.rs b/examples/hello-world/src/main.rs index f48408a4..5c62b634 100644 --- a/examples/hello-world/src/main.rs +++ b/examples/hello-world/src/main.rs @@ -1,4 +1,4 @@ -use polkavm::{Config, Engine, Linker, Module, ProgramBlob}; +use polkavm::{CallArgs, Config, Engine, Linker, Module, ProgramBlob, Reg, StateArgs}; fn main() { env_logger::init(); @@ -21,14 +21,19 @@ fn main() { let instance = instance_pre.instantiate().unwrap(); // Grab the function and call it. - println!("Calling into the guest program (through typed function):"); - let fn_typed = instance.get_typed_func::<(u32, u32), u32>("add_numbers").unwrap(); - let result = fn_typed.call(&mut (), (1, 10)).unwrap(); + println!("Calling into the guest program (simple):"); + let result = instance.call_typed::<(u32, u32), u32>(&mut (), "add_numbers", (1, 10)).unwrap(); println!(" 1 + 10 + 100 = {}", result); - println!("Calling into the guest program (through untyped function):"); - let fn_untyped = instance.get_func("add_numbers").unwrap(); - let mut return_value = [0]; - fn_untyped.call(&mut (), &[1, 10], &mut return_value).unwrap(); - println!(" 1 + 10 + 100 = {}", return_value[0]); + println!("Calling into the guest program (full):"); + let export_index = instance.module().lookup_export("add_numbers").unwrap(); + + #[allow(clippy::let_unit_value)] + let mut user_data = (); + let mut call_args = CallArgs::new(&mut user_data, export_index); + call_args.args_untyped(&[1, 10]); + + instance.call(StateArgs::new(), call_args).unwrap(); + let return_value = instance.get_reg(Reg::A0); + println!(" 1 + 10 + 100 = {}", return_value); } diff --git a/guest-programs/Cargo.lock b/guest-programs/Cargo.lock index 17429cb1..792f9baf 100644 --- a/guest-programs/Cargo.lock +++ b/guest-programs/Cargo.lock @@ -81,18 +81,18 @@ dependencies = [ [[package]] name = "polkavm-common" -version = "0.8.0" +version = "0.9.0" [[package]] name = "polkavm-derive" -version = "0.8.0" +version = "0.9.0" dependencies = [ "polkavm-derive-impl-macro", ] [[package]] name = "polkavm-derive-impl" -version = "0.8.0" +version = "0.9.0" dependencies = [ "polkavm-common", "proc-macro2", @@ -102,7 +102,7 @@ dependencies = [ [[package]] name = "polkavm-derive-impl-macro" -version = "0.8.0" +version = "0.9.0" dependencies = [ "polkavm-derive-impl", "syn 2.0.38", @@ -128,7 +128,7 @@ dependencies = [ [[package]] name = "simplealloc" -version = "0.8.0" +version = "0.9.0" [[package]] name = "softfloat" diff --git a/guest-programs/build-benchmarks.sh b/guest-programs/build-benchmarks.sh index 3c13bc0e..a5b587e0 100755 --- a/guest-programs/build-benchmarks.sh +++ b/guest-programs/build-benchmarks.sh @@ -64,7 +64,7 @@ function build_benchmark() { if [ "${RV32E_TOOLCHAIN:-}" != "" ]; then echo "> Building: '$1' (polkavm)" - RUSTFLAGS="-C target-feature=+lui-addi-fusion -C relocation-model=pie -C link-arg=--emit-relocs -C link-arg=--unique $extra_flags" rustup run $RV32E_TOOLCHAIN cargo build -q --release --bin $1 -p $1 + RUSTFLAGS="-C target-feature=+lui-addi-fusion,+fast-unaligned-access,+xtheadcondmov -C relocation-model=pie -C link-arg=--emit-relocs -C link-arg=--unique $extra_flags" rustup run $RV32E_TOOLCHAIN cargo build -q --release --bin $1 -p $1 cd .. cargo run -q -p polkatool link --run-only-if-newer guest-programs/target/riscv32ema-unknown-none-elf/release/$1 -o guest-programs/target/riscv32ema-unknown-none-elf/release/$1.polkavm cd $current_dir diff --git a/guest-programs/build-test-data.sh b/guest-programs/build-test-data.sh index 84f0fd51..41def216 100755 --- a/guest-programs/build-test-data.sh +++ b/guest-programs/build-test-data.sh @@ -6,7 +6,7 @@ function build_test_data() { output_path="../test-data/$1.elf.zst" echo "> Building: '$1' (-> $output_path)" - RUSTFLAGS="-C target-feature=+lui-addi-fusion -C relocation-model=pie -C link-arg=--emit-relocs -C link-arg=--unique --remap-path-prefix=$(pwd)= --remap-path-prefix=$HOME=~" cargo build -q --profile $2 --bin $1 -p $1 + RUSTFLAGS="-C target-feature=+lui-addi-fusion,+fast-unaligned-access,+xtheadcondmov -C relocation-model=pie -C link-arg=--emit-relocs -C link-arg=--unique --remap-path-prefix=$(pwd)= --remap-path-prefix=$HOME=~" cargo build -q --profile $2 --bin $1 -p $1 zstd -f -q -19 -o $output_path target/riscv32ema-unknown-none-elf/$2/$1 chmod -x $output_path } diff --git a/guest-programs/output/example-hello-world.polkavm b/guest-programs/output/example-hello-world.polkavm index 4d80c214..a97295d4 100644 Binary files a/guest-programs/output/example-hello-world.polkavm and b/guest-programs/output/example-hello-world.polkavm differ diff --git a/guest-programs/test-blob/src/main.rs b/guest-programs/test-blob/src/main.rs index 53caddbe..10e08c88 100644 --- a/guest-programs/test-blob/src/main.rs +++ b/guest-programs/test-blob/src/main.rs @@ -24,27 +24,44 @@ extern "C" fn push_one_to_global_vec() -> u32 { } } -static mut ATOMIC_GLOBAL: u32 = 0; +static mut GLOBAL: u32 = 0; #[polkavm_derive::polkavm_export] -extern "C" fn get_atomic_global() -> u32 { - unsafe { ATOMIC_GLOBAL } +extern "C" fn get_global() -> u32 { + unsafe { GLOBAL } } #[polkavm_derive::polkavm_export] -extern "C" fn set_atomic_global(value: u32) { +extern "C" fn set_global(value: u32) { unsafe { - ATOMIC_GLOBAL = value; + GLOBAL = value; } } +#[polkavm_derive::polkavm_export] +extern "C" fn increment_global() { + unsafe { + GLOBAL += 1; + } +} + +#[polkavm_derive::polkavm_export] +extern "C" fn get_global_address() -> *mut u32 { + unsafe { core::ptr::addr_of_mut!(GLOBAL) } +} + +#[polkavm_derive::polkavm_export] +extern "C" fn read_u32(address: u32) -> u32 { + unsafe { *(address as *const u32) } +} + #[polkavm_derive::polkavm_export] extern "C" fn atomic_fetch_add(value: u32) -> u32 { unsafe { let output; core::arch::asm!( "amoadd.w a0, a1, (a0)", - inout("a0") &mut ATOMIC_GLOBAL => output, + inout("a0") &mut GLOBAL => output, in("a1") value, ); output @@ -57,7 +74,7 @@ extern "C" fn atomic_fetch_swap(value: u32) -> u32 { let output; core::arch::asm!( "amoswap.w a0, a1, (a0)", - inout("a0") &mut ATOMIC_GLOBAL => output, + inout("a0") &mut GLOBAL => output, in("a1") value, ); output @@ -70,7 +87,7 @@ extern "C" fn atomic_fetch_max_signed(value: i32) -> i32 { let output; core::arch::asm!( "amomax.w a0, a1, (a0)", - inout("a0") &mut ATOMIC_GLOBAL => output, + inout("a0") &mut GLOBAL => output, in("a1") value, ); output @@ -83,7 +100,7 @@ extern "C" fn atomic_fetch_min_signed(value: i32) -> i32 { let output; core::arch::asm!( "amomin.w a0, a1, (a0)", - inout("a0") &mut ATOMIC_GLOBAL => output, + inout("a0") &mut GLOBAL => output, in("a1") value, ); output @@ -96,7 +113,7 @@ extern "C" fn atomic_fetch_max_unsigned(value: u32) -> u32 { let output; core::arch::asm!( "amomaxu.w a0, a1, (a0)", - inout("a0") &mut ATOMIC_GLOBAL => output, + inout("a0") &mut GLOBAL => output, in("a1") value, ); output @@ -109,13 +126,28 @@ extern "C" fn atomic_fetch_min_unsigned(value: u32) -> u32 { let output; core::arch::asm!( "amominu.w a0, a1, (a0)", - inout("a0") &mut ATOMIC_GLOBAL => output, + inout("a0") &mut GLOBAL => output, in("a1") value, ); output } } +#[polkavm_derive::polkavm_export] +extern "C" fn call_sbrk(size: usize) -> *mut u8 { + polkavm_derive::sbrk(size) +} + +#[polkavm_derive::polkavm_import] +extern "C" { + fn call_sbrk_indirectly_impl(size: usize) -> usize; +} + +#[polkavm_derive::polkavm_export] +extern "C" fn call_sbrk_indirectly(size: usize) -> *mut u8 { + unsafe { call_sbrk_indirectly_impl(size) as *mut u8 } +} + // Test that an unused import will be stripped. #[polkavm_derive::polkavm_import] extern "C" { diff --git a/test-data/bench-pinky.elf.zst b/test-data/bench-pinky.elf.zst index 24ed1179..70d25869 100644 Binary files a/test-data/bench-pinky.elf.zst and b/test-data/bench-pinky.elf.zst differ diff --git a/test-data/test-blob.elf.zst b/test-data/test-blob.elf.zst index 43f350ab..cf3dba6d 100644 Binary files a/test-data/test-blob.elf.zst and b/test-data/test-blob.elf.zst differ diff --git a/tools/benchtool/src/backend/backend_polkavm.rs b/tools/benchtool/src/backend/backend_polkavm.rs index 9cced945..3ba8372b 100644 --- a/tools/benchtool/src/backend/backend_polkavm.rs +++ b/tools/benchtool/src/backend/backend_polkavm.rs @@ -3,12 +3,18 @@ use super::backend_prelude::*; #[derive(Copy, Clone)] pub struct PolkaVM(pub Option); +pub struct Instance { + ext_initialize: polkavm::ExportIndex, + ext_run: polkavm::ExportIndex, + instance: polkavm::Instance<()>, +} + #[cfg(target_arch = "x86_64")] impl Backend for PolkaVM { type Engine = polkavm::Engine; type Blob = Vec; type Module = polkavm::Module; - type Instance = (polkavm::TypedFunc<(), (), ()>, polkavm::TypedFunc<(), (), ()>, Option); + type Instance = Instance; fn name(&self) -> &'static str { match self.0 { @@ -38,26 +44,36 @@ impl Backend for PolkaVM { let linker = polkavm::Linker::<()>::new(engine); let instance_pre = linker.instantiate_pre(module).unwrap(); let instance = instance_pre.instantiate().unwrap(); - let ext_initialize = instance.get_typed_func::<(), ()>("initialize").unwrap(); - let ext_run = instance.get_typed_func::<(), ()>("run").unwrap(); - (ext_initialize, ext_run, instance.pid()) + let ext_initialize = module.lookup_export("initialize").unwrap(); + let ext_run = module.lookup_export("run").unwrap(); + Instance { + ext_initialize, + ext_run, + instance, + } } fn initialize(&self, instance: &mut Self::Instance) { - let mut config = polkavm::ExecutionConfig::default(); + let mut state_args = polkavm::StateArgs::default(); if self.0.is_some() { - config.set_gas(polkavm::Gas::MAX); + state_args.set_gas(polkavm::Gas::MAX); } - instance.0.call_ex(&mut (), (), config).unwrap(); + instance + .instance + .call(state_args, polkavm::CallArgs::new(&mut (), instance.ext_initialize)) + .unwrap(); } fn run(&self, instance: &mut Self::Instance) { - instance.1.call(&mut (), ()).unwrap(); + instance + .instance + .call(Default::default(), polkavm::CallArgs::new(&mut (), instance.ext_run)) + .unwrap(); } fn pid(&self, instance: &Self::Instance) -> Option { - instance.2 + instance.instance.pid() } fn is_compiled(&self) -> bool {