From d0a56e7808c8265b2800805def97a8a25ae9ac0c Mon Sep 17 00:00:00 2001 From: Andrey Khmuro Date: Fri, 29 Nov 2024 10:57:52 +0300 Subject: [PATCH 1/2] feat: add MASM format guidebook (#987) --- CHANGELOG.md | 1 + .../asm/kernels/transaction/lib/epilogue.masm | 12 +- miden-lib/masm_doc_comment_fmt.md | 260 ++++++++++++++++++ 3 files changed, 271 insertions(+), 2 deletions(-) create mode 100644 miden-lib/masm_doc_comment_fmt.md diff --git a/CHANGELOG.md b/CHANGELOG.md index ba287e280..7c6b21ba1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - [BREAKING] Remove `AccountBuilder::build_testing` and make `Account::initialize_from_components` private (#969). - [BREAKING] Add error messages to errors and implement `core::error::Error` (#974). - Implemented new `digest_from_hex!` macro (#984). +- Added Format Guidebook to the `miden-lib` crate (#987). ## 0.6.2 (2024-11-20) diff --git a/miden-lib/asm/kernels/transaction/lib/epilogue.masm b/miden-lib/asm/kernels/transaction/lib/epilogue.masm index f0555e1f6..409b53698 100644 --- a/miden-lib/asm/kernels/transaction/lib/epilogue.masm +++ b/miden-lib/asm/kernels/transaction/lib/epilogue.masm @@ -25,11 +25,19 @@ const.ERR_EPILOGUE_TOTAL_NUMBER_OF_ASSETS_MUST_STAY_THE_SAME=0x00020029 #! The output note data includes the note id, metadata, recipient, assets hash, number of assets and #! a set of assets for each note. #! -#! Inputs: [OUTPUT_NOTES_COMMITMENT] -#! Outputs: [OUTPUT_NOTES_COMMITMENT] +#! Inputs: +#! Operand stack: [OUTPUT_NOTES_COMMITMENT] +#! Outputs: +#! Operand stack: [OUTPUT_NOTES_COMMITMENT] +#! Advice map: { +#! OUTPUT_NOTES_COMMITMENT: [mem[output_note_ptr]...mem[output_notes_end_ptr]], +#! } #! #! Where: #! - OUTPUT_NOTES_COMMITMENT is the note commitment computed from note's id and metadata. +#! - output_note_ptr is the start boundary of the output notes section. +#! - output_notes_end_ptr is the end boundary of the output notes section. +#! - mem[i] is the memory value stored at some address i. proc.copy_output_notes_to_advice_map # get the number of notes created by the transaction exec.memory::get_num_output_notes diff --git a/miden-lib/masm_doc_comment_fmt.md b/miden-lib/masm_doc_comment_fmt.md new file mode 100644 index 000000000..bc8cac122 --- /dev/null +++ b/miden-lib/masm_doc_comment_fmt.md @@ -0,0 +1,260 @@ +# Format guidebook + +A guidebook defining the format of documentation comments and regular comments for `masm` procedures. + +## General + +Entire procedure doc comment should be created using the `#!` pair of symbols as the commenting sign. + +Doc comment for a procedure should have these blocks: + +- Procedure description. +- Inputs and outputs. +- Description of the values used in the "Inputs and outputs" block (optional). +- Panic block (optional). +- Invocation hint (optional and will become redundant after the procedure annotations will be implemented). + +Each block should be separated from the others with a blank line. + +Example: + +```masm +#! This procedure is executed somewhere in the execution pipeline. Its responsibility is: +#! 1. Do the first point of this list. +#! 2. Compute some extremely important values which will be used in future. +#! 3. Finally do the actions specified in the third point of this list. +#! +#! Inputs: +#! Operand stack: [ +#! single_felt, +#! [felt_1, 0, 0, felt_2], +#! SOME_WORD, +#! memory_value_ptr, +#! ] +#! Advice stack: [HASH_A, HASH_B, [ARRAY_OF_HASHES]] +#! Advice map: { +#! KEY_1: [VALUE_1], +#! KEY_2: [VALUE_2], +#! } +#! Outputs: +#! Operand stack: [] +#! Advice stack: [] +#! +#! Where: +#! - single_felt is the ordinary value placed on top of the stack. +#! - SOME_WORD is the word specifying maybe some hash. +#! +#! Panics if: +#! - something went wrong. +#! - some check is failed. +#! +#! Invocation: call +``` + +## Procedure description + +Contains the general information about the purpose of the procedure and the way it works. May contain any other valuable information. + +If some list is used for description, it should be formatted like so: + +- The description of the list should not have a blank like between it and the list. +- The description of the list should have a colon at the end. +- Depending on what kind of sentences form a list, they should start with a capital letter and end with a period, or start with a lowercase letter and without period at the end. +- List should use a `-` symbol in case of unordered list or arabic numerals for ordered ones (for example, for the description of the execution steps). +- Nested list should follow the same format. + +Some data could be formatted as a subparagraph, in that case a blank line should be used to separate them. + +Example: + +```masm +#! Transaction kernel program. +#! +#! This is the entry point of the transaction kernel, the program will perform the following +#! operations: +#! 1. Run the prologue to prepare the transaction's root context. +#! 2. Run all the notes' scripts. +#! 3. Run the transaction script. +#! 4. Run the epilogue to compute and validate the final state. +#! +#! See `prologue::prepare_transaction` for additional details on the VM's initial state, including +#! the advice provider. +``` + +## Inputs and outputs + +Each variable could represent a single value or a sequence of four values (a Word). Variable representing a single value should be written in lowercase, and a variable for the word should be written in uppercase. + +Example: + +```masm +#! Inputs: [single_value, SOME_WORD] +``` + +Variable, which represents a memory address, should have a `_ptr` suffix in its name. For example, `note_script_root_ptr`. + +It is strongly not recommended to use a single-letter names for variables, with just an exception for the loop indexes (i.e. `i`). So, for example, instead of `H` a proper `HASH` or even more expanded version like `INIT_HASH` should be used. + +### Inputs + +Inputs block could contain three components: operand stack, advice stack and advice map. Description of the each container should be offseted with two spaces relative to the start of the `Inputs` word. Each name of the container should be separated from its value by the colon (e.g. `Operand stack: [value_1]`). + +Operand stack and advice stack should be presented as an array containing some data. + +The lines which exceed 100 symbols should be formatted differently, it could be done in two different ways: + +1. The line should be broken, and the end of the line should be moved to the new line with an offset such that the first symbol of the first element on the second line should be directly above the first symbol of the first element on the first line (see the value of the `FOREIGN_ACCOUNT_ID` in the example in `Formats` section). +2. The exceeded array should be formatted in a column, forming a Word or some other number of related elements on each line. Each new line should be offseted with two spaces relative to the name of the container (see example below). + +Example: + +```masm +#! Inputs: +#! Operand stack: [] +#! Advice stack: [ +#! account_id, 0, 0, account_nonce, +#! ACCOUNT_VAULT_ROOT, +#! ACCOUNT_STORAGE_COMMITMENT, +#! ACCOUNT_CODE_COMMITMENT +#! ] +``` + +To show that some internal value array could have dynamic length, additional brackets should be used (see the `[VALUE_B]` in the advice stack in the example in `Formats` section). + +In case some inputs are presented on the stack only if some condition is satisfied, such inputs should be placed in the "optional" box: inside the parentheses with a question mark at the end. Opening and closing brackets should be placed on a new line with the same offset as the other inputs, and values inside the brackets should be offseted by two spaces. + +Example: + +```masm +#! ... +#! Advice stack: [ +#! NOTE_METADATA, +#! assets_count, +#! ( +#! block_num, +#! BLOCK_SUB_HASH, +#! NOTE_ROOT, +#! )? +#! ] +#! ... +``` + +Advice map should be presented as a sequence of the key-value pairs in the curly brackets. Opening bracket should stay on the same line, and the closing bracket should be placed on the next line after the last key-value pair with the same offset as the `Advice map` phrase. + +Each pair should start at the new line with additional two spaces offset relative to the start of the `Advice map` phrase. Pairs should be separated with comma. The same formatting rules as to the operand and advice stacks should be applied for the each key-value pair. + +### Outputs + +Outputs should show the final state of each container, used in the inputs, except for the advice map. Advice map should be specified in the outputs section only if it was modified. + +Example: + +```masm +#! Inputs: +#! Operand stack: [OUTPUT_NOTES_COMMITMENT] +#! Outputs: +#! Operand stack: [OUTPUT_NOTES_COMMITMENT] +#! Advice map: { +#! OUTPUT_NOTES_COMMITMENT: [mem[output_note_ptr]...mem[output_notes_end_ptr]], +#! } +#! +#! Where: +#! - OUTPUT_NOTES_COMMITMENT is the note commitment computed from note's id and metadata. +#! - output_note_ptr is the start boundary of the output notes section. +#! - output_notes_end_ptr is the end boundary of the output notes section. +#! - mem[i] is the memory value stored at some address i. +``` + +### Formats + +#### Full version + +In case the values are provided not only through the operand stack, but also through any other container, the full version if the inputs should be used. + +Notice that operand stack should be presented in any case, even if it is empty. Other components should be presented only if they have some values used in the describing function. + +Example: + +```masm +#! Inputs: +#! Operand stack: [] +#! Advice stack: [VALUE_A, [VALUE_B]] +#! Advice map: { +#! FOREIGN_ACCOUNT_ID: [[foreign_account_id, 0, 0, account_nonce], VAULT_ROOT, STORAGE_ROOT, +#! CODE_ROOT], +#! STORAGE_ROOT: [[STORAGE_SLOT_DATA]], +#! CODE_ROOT: [num_procs, [ACCOUNT_PROCEDURE_DATA]] +#! } +#! Outputs: +#! Operand stack: [value] +#! Advice stack: [] +``` + +#### Short version + +In case the values are provided only through the operand stack, a short version of the inputs and outputs should be used. In that case only `Inputs` and `Outputs` components are used, representing the values on the operand stack. + +Input values array should be offseted by one space to be inline with the output values array (see the example). + +Example: + +```masm +#! Inputs: [single_value, WORD_1] +#! Outputs: [WORD_2] +``` + +## Description of the used values + +If some value was used in the inputs and outputs block (and its meaning is not obvious) this value should be described. + +Values description block should start with `Where` word with a colon at the end. Definitions should be represented as an unordered list constructed with `-` symbols, without any space offset. Each definition should start with the name of the variable followed by the `is/are the` phrase, after which the definition should be placed. At the end of each definition should be a period. + +Example: + +```masm +#! Where: +#! - tag is the tag to be included in the note. +#! - aux is the auxiliary metadata to be included in the note. +#! - note_type is the storage type of the note. +#! - execution_hint is the note's execution hint. +#! - RECIPIENT is the recipient of the note. +#! - note_idx is the index of the crated note. +``` + +## Panic block + +If the describing procedure could potentially panic, a panic block should be specified. + +Panic block should start with `Panics if` phrase with a colon at the end. Panic cases should be represented as an unordered list constructed with `-` symbols, without any space offset. Definitions should start with lowercase letter, except for the cases which form the nested list (see example). Each case should end with a period. + +Example: + +```masm +#! Panics if: +#! - the transaction is not being executed against a faucet. +#! - the invocation of this procedure does not originate from the native account. +#! - the asset being burned is not associated with the faucet the transaction is being executed +#! against. +#! - the asset is not well formed. +#! - For fungible faucets: +#! - the amount being burned is greater than the total input to the transaction. +#! - For non-fungible faucets: +#! - the non-fungible asset being burned does not exist or was not provided as input to the +#! transaction via a note or the accounts vault. +``` + +## Invocation hint + +Invocation hint is the temporary comment showing how the procedure is meant to be used. It will help to implement the procedure annotations in future. + +The hint could show how this procedures is invoked: + +- with `exec` +- with `call`/`syscall` +- is not used anywhere + +Example: + +```masm +#! Invocation: call +``` From e7b80308b71191bf6bf334da703ebc260ceb376f Mon Sep 17 00:00:00 2001 From: Mirko <48352201+Mirko-von-Leipzig@users.noreply.github.com> Date: Fri, 29 Nov 2024 11:24:46 +0200 Subject: [PATCH 2/2] feat: support varlen hex strings in digest macro (#988) Improves the new `digest!` macro by allowing any length string literals. The previous implementation only allowed for Felt-multiple lengths. --- miden-lib/build.rs | 2 +- .../src/transaction/procedures/kernel_v0.rs | 2 +- objects/src/lib.rs | 185 +++++++++--------- 3 files changed, 95 insertions(+), 94 deletions(-) diff --git a/miden-lib/build.rs b/miden-lib/build.rs index bcc399b5e..d04b07159 100644 --- a/miden-lib/build.rs +++ b/miden-lib/build.rs @@ -229,7 +229,7 @@ fn generate_kernel_proc_hash_file(kernel: KernelLibrary) -> Result<()> { format!( r#"/// This file is generated by build.rs, do not modify -use miden_objects::{{digest, Digest, Felt}}; +use miden_objects::{{digest, Digest}}; // KERNEL V0 PROCEDURES // ================================================================================================ diff --git a/miden-lib/src/transaction/procedures/kernel_v0.rs b/miden-lib/src/transaction/procedures/kernel_v0.rs index e6208fa99..7fcdb4d43 100644 --- a/miden-lib/src/transaction/procedures/kernel_v0.rs +++ b/miden-lib/src/transaction/procedures/kernel_v0.rs @@ -1,6 +1,6 @@ /// This file is generated by build.rs, do not modify -use miden_objects::{digest, Digest, Felt}; +use miden_objects::{digest, Digest}; // KERNEL V0 PROCEDURES // ================================================================================================ diff --git a/objects/src/lib.rs b/objects/src/lib.rs index 676c4047f..176baa98c 100644 --- a/objects/src/lib.rs +++ b/objects/src/lib.rs @@ -55,117 +55,118 @@ pub mod utils { /// Construct a new `Digest` from a hex value. /// - /// Marco supports hex strings of length 18, 34, 50 and 66 (with prefix). + /// Expects a '0x' prefixed hex string followed by up to 64 hex digits. #[macro_export] macro_rules! digest { ($hex:expr) => {{ - // hex prefix offset - const START: usize = 2; - const fn parse_hex_digit(digit: u8) -> u8 { match digit { b'0'..=b'9' => digit - b'0', - b'A'..=b'F' => digit - b'A' + 10, - b'a'..=b'f' => digit - b'a' + 10, + b'A'..=b'F' => digit - b'A' + 0x0a, + b'a'..=b'f' => digit - b'a' + 0x0a, _ => panic!("Invalid hex character"), } } - // Returns a byte array of the u64 value decoded from the hex byte array. - // - // Where: - // - global_index is the index of the hex digit byte in the hex_bytes array - // - upper_limit is the index at which the bytes of the current u64 value end - const fn decode_u64_bytes( - mut global_index: usize, - hex_bytes: &[u8], - upper_limit: usize, - ) -> [u8; 8] { - let mut u64_bytes = [0u8; 8]; - - let mut local_index = 0; - while global_index < upper_limit { - let upper = parse_hex_digit(hex_bytes[START + global_index]); - let lower = parse_hex_digit(hex_bytes[START + global_index + 1]); - u64_bytes[local_index] = upper << 4 | lower; - - local_index += 1; - global_index += 2; - } + // Enforce and skip the '0x' prefix. + let hex_bytes = match $hex.as_bytes() { + [b'0', b'x', rest @ ..] => rest, + _ => panic!(r#"Hex string must have a "0x" prefix"#), + }; - u64_bytes + if hex_bytes.len() > 64 { + panic!("Hex string has more than 64 characters"); } - let hex_bytes = $hex.as_bytes(); - - if hex_bytes[0] != b'0' || hex_bytes[1] != b'x' { - panic!("Hex string should start with \"0x\" prefix"); + // Aggregate each byte into the appropriate felt value. Doing it this way also allows + // for variable string lengths. + let mut felts = [0u64; 4]; + let mut i = 0; + // We are forced to use a while loop because the others aren't supported in const + // context. + while i < hex_bytes.len() { + // This digit's nibble offset within the felt. We need to invert the nibbles per + // byte for endianess reasons i.e. ABCD -> BADC. + let inibble = if i % 2 == 0 { (i + 1) % 16 } else { (i - 1) % 16 }; + + // SAFETY: u8 cast to u64 is safe. We cannot use u64::from in const context so we + // are forced to cast. + let value = (parse_hex_digit(hex_bytes[i]) as u64) << (inibble * 4); + felts[i / 2 / 8] += value; + + i += 1; } - match hex_bytes.len() { - 18 => { - let v1 = u64::from_le_bytes(decode_u64_bytes(0, hex_bytes, 16)); - - Digest::new([Felt::new(v1), Felt::new(0u64), Felt::new(0u64), Felt::new(0u64)]) - }, - 34 => { - let v1 = u64::from_le_bytes(decode_u64_bytes(0, hex_bytes, 16)); - let v2 = u64::from_le_bytes(decode_u64_bytes(16, hex_bytes, 32)); - - Digest::new([Felt::new(v1), Felt::new(v2), Felt::new(0u64), Felt::new(0u64)]) - }, - 50 => { - let v1 = u64::from_le_bytes(decode_u64_bytes(0, hex_bytes, 16)); - let v2 = u64::from_le_bytes(decode_u64_bytes(16, hex_bytes, 32)); - let v3 = u64::from_le_bytes(decode_u64_bytes(32, hex_bytes, 48)); - - Digest::new([Felt::new(v1), Felt::new(v2), Felt::new(v3), Felt::new(0u64)]) - }, - 66 => { - let v1 = u64::from_le_bytes(decode_u64_bytes(0, hex_bytes, 16)); - let v2 = u64::from_le_bytes(decode_u64_bytes(16, hex_bytes, 32)); - let v3 = u64::from_le_bytes(decode_u64_bytes(32, hex_bytes, 48)); - let v4 = u64::from_le_bytes(decode_u64_bytes(48, hex_bytes, 64)); - - Digest::new([Felt::new(v1), Felt::new(v2), Felt::new(v3), Felt::new(v4)]) - }, - _ => panic!("Hex string has invalid length"), + // Ensure each felt is within bounds as `Felt::new` silently wraps around. + // This matches the behaviour of `Digest::try_from(String)`. + let mut i = 0; + while i < felts.len() { + use $crate::StarkField; + if felts[i] > $crate::Felt::MODULUS { + panic!("Felt overflow"); + } + i += 1; } + + $crate::Digest::new([ + $crate::Felt::new(felts[0]), + $crate::Felt::new(felts[1]), + $crate::Felt::new(felts[2]), + $crate::Felt::new(felts[3]), + ]) }}; } - /// Test the correctness of the `digest!` macro for every supported hex string length. - #[test] - fn test_digest_macro() { - use crate::{Digest, Felt}; - - let digest_18_hex = "0x8B5563E13FE8135D"; - let expected = - Digest::try_from("0x8B5563E13FE8135D000000000000000000000000000000000000000000000000") - .unwrap(); - let result_18 = digest!(digest_18_hex); - assert_eq!(expected, result_18); - - let digest_34_hex = "0x64FA911355C7818D58E3EE5BC6D1452D"; - let expected = - Digest::try_from("0x64FA911355C7818D58E3EE5BC6D1452D00000000000000000000000000000000") - .unwrap(); - let result_34 = digest!(digest_34_hex); - assert_eq!(expected, result_34); - - let digest_50_hex = "0x12613C3D43EE1A4C7B528B8AB68A23A31A45336DAFDEDAC9"; - let expected = - Digest::try_from("0x12613C3D43EE1A4C7B528B8AB68A23A31A45336DAFDEDAC90000000000000000") - .unwrap(); - let result_50 = digest!(digest_50_hex); - assert_eq!(expected, result_50); - - let digest_66_hex = "0x7F75ED9C5826FCAF77A5B227D66D6A874003027009783494C55FE741E87D89BC"; - let expected = - Digest::try_from("0x7F75ED9C5826FCAF77A5B227D66D6A874003027009783494C55FE741E87D89BC") - .unwrap(); - let result_66 = digest!(digest_66_hex); - assert_eq!(expected, result_66); + #[cfg(test)] + mod tests { + #[rstest::rstest] + #[case::missing_prefix("1234")] + #[case::invalid_character("1234567890abcdefg")] + #[case::too_long("0xx00000000000000000000000000000000000000000000000000000000000000001")] + #[case::overflow_felt0( + "0xffffffffffffffff000000000000000000000000000000000000000000000000" + )] + #[case::overflow_felt1( + "0x0000000000000000ffffffffffffffff00000000000000000000000000000000" + )] + #[case::overflow_felt2( + "0x00000000000000000000000000000000ffffffffffffffff0000000000000000" + )] + #[case::overflow_felt3( + "0x000000000000000000000000000000000000000000000000ffffffffffffffff" + )] + #[should_panic] + fn digest_macro_invalid(#[case] bad_input: &str) { + digest!(bad_input); + } + + #[rstest::rstest] + #[case::each_digit("0x1234567890abcdef")] + #[case::empty("0x")] + #[case::zero("0x0")] + #[case::zero_full("0x0000000000000000000000000000000000000000000000000000000000000000")] + #[case::one_lsb("0x1")] + #[case::one_msb("0x0000000000000000000000000000000000000000000000000000000000000001")] + #[case::one_partial("0x0001")] + #[case::odd("0x123")] + #[case::even("0x1234")] + #[case::touch_each_felt( + "0x00000000000123450000000000067890000000000000abcd00000000000000ef" + )] + #[case::unique_felt("0x111111111111111155555555555555559999999999999999cccccccccccccccc")] + #[case::digits_on_repeat( + "0x1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef" + )] + fn digest_macro(#[case] input: &str) { + let uut = digest!(input); + + // Right pad to 64 hex digits (66 including prefix). This is required by the + // Digest::try_from(String) implementation. + let padded_input = format!("{input:<66}").replace(" ", "0"); + let expected = crate::Digest::try_from(std::dbg!(padded_input)).unwrap(); + + assert_eq!(uut, expected); + } } }