From d2b8cd4924ca9ed23c90175ce8c2ff391f331164 Mon Sep 17 00:00:00 2001 From: Alex Kocharin Date: Mon, 5 Jun 2023 23:40:44 +0400 Subject: [PATCH 1/3] implement try_run and error propagation --- Cargo.toml | 1 + examples/error_handling/README.md | 1 + examples/error_handling/main.rs | 103 +++++++++++++++ src/common/ruler.rs | 33 ++--- src/common/typekey.rs | 5 + src/lib.rs | 3 + src/parser/block/builtin/block_parser.rs | 34 ++++- src/parser/block/mod.rs | 122 +++++++++++++++--- src/parser/block/rule.rs | 36 +++++- src/parser/block/state.rs | 5 +- src/parser/core/rule.rs | 25 +++- src/parser/inline/builtin/inline_parser.rs | 49 +++++-- src/parser/inline/mod.rs | 142 ++++++++++++++++++--- src/parser/inline/rule.rs | 42 +++++- src/parser/main.rs | 121 ++++++++++++++++-- src/plugins/extra/linkify.rs | 14 +- src/plugins/sourcepos.rs | 15 ++- tests/extras.rs | 19 ++- 18 files changed, 671 insertions(+), 99 deletions(-) create mode 100644 examples/error_handling/README.md create mode 100644 examples/error_handling/main.rs diff --git a/Cargo.toml b/Cargo.toml index 5eda31c..9313595 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,6 +28,7 @@ default = ["linkify", "syntect"] # Lower range limit of these dependencies was manually checked to work with # minimal versions possible, higher range limit is best guess based on semver. # So older versions will not work, but newer versions might. +anyhow = ">= 1.0.18, < 2" argparse = ">= 0.2.1, < 0.3" const_format = ">= 0.1.0, < 0.3" derivative = ">= 1.0.2, < 3" diff --git a/examples/error_handling/README.md b/examples/error_handling/README.md new file mode 100644 index 0000000..939a298 --- /dev/null +++ b/examples/error_handling/README.md @@ -0,0 +1 @@ +This is an example of error handling and error propagation in custom markdown-it rules. diff --git a/examples/error_handling/main.rs b/examples/error_handling/main.rs new file mode 100644 index 0000000..3f6ef81 --- /dev/null +++ b/examples/error_handling/main.rs @@ -0,0 +1,103 @@ +use markdown_it::parser::block::{BlockRule, BlockState}; +use markdown_it::parser::core::CoreRule; +use markdown_it::parser::inline::{InlineRule, InlineState}; +use markdown_it::{MarkdownIt, Node, Result}; +use std::error::Error; +use std::fmt::Display; + +#[derive(Debug)] +struct MyError(&'static str); + +impl Display for MyError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(self.0) + } +} + +impl Error for MyError {} + +struct FallibleInlineRule; + +impl InlineRule for FallibleInlineRule { + const MARKER: char = '@'; + + // This is implementation of a rule that always fails on `@` character. + fn try_run(state: &mut InlineState) -> Result> { + // skip other characters + if !state.src[state.pos..].starts_with(Self::MARKER) { return Ok(None); }; + + Err(MyError("AAA").into()) + } + + fn run(state: &mut InlineState) -> Option<(Node, usize)> { + Self::try_run(state).unwrap_or_default() + } +} + +struct FallibleBlockRule; + +impl BlockRule for FallibleBlockRule { + // This is implementation of a rule that always fails on `@@@` at the start of the line. + fn try_run(state: &mut BlockState) -> Result> { + if !state.get_line(state.line).starts_with("@@@") { return Ok(None); }; + + Err(MyError("BBB").into()) + } + + fn run(state: &mut BlockState) -> Option<(Node, usize)> { + Self::try_run(state).unwrap_or_default() + } +} + +struct FallibleCoreRule; + +impl CoreRule for FallibleCoreRule { + fn try_run(_root: &mut Node, _md: &MarkdownIt) -> Result<()> { + Err(MyError("CCC").into()) + } + + fn run(root: &mut Node, md: &MarkdownIt) { + let _ = Self::try_run(root, md); + } +} + +fn main() { + let md = &mut markdown_it::MarkdownIt::new(); + markdown_it::plugins::cmark::add(md); + + md.inline.add_rule::(); + md.block.add_rule::(); + md.add_rule::().after_all(); + + // inline rule fails + let text1 = r#"*hello @world*"#; + let err = md.try_parse(text1).err().unwrap(); + println!("{err}"); + assert_eq!(err.source().unwrap().to_string(), "AAA"); + + // block rule fails + let text2 = r#"@@@ *hello*"#; + let err = md.try_parse(text2).err().unwrap(); + println!("{err}"); + assert_eq!(err.source().unwrap().to_string(), "BBB"); + + // core rule fails + let text3 = r#"*hello*"#; + let err = md.try_parse(text3).err().unwrap(); + println!("{err}"); + assert_eq!(err.source().unwrap().to_string(), "CCC"); + + // If you run parse() function instead of try_parse(), failing rules will be skipped. + // This will result in custom syntax being left as user wrote it (not parsed). + let html = md.parse(text1).render(); + print!("{html}"); + assert_eq!(html, "

hello @world

\n"); + + let html = md.parse(text2).render(); + print!("{html}"); + assert_eq!(html, "

@@@ hello

\n"); + + let html = md.parse(text3).render(); + print!("{html}"); + assert_eq!(html, "

hello

\n"); +} diff --git a/src/common/ruler.rs b/src/common/ruler.rs index e9bddba..bfc35a6 100644 --- a/src/common/ruler.rs +++ b/src/common/ruler.rs @@ -1,11 +1,9 @@ //! Plugin manager with dependency resolution. use derivative::Derivative; -use once_cell::sync::OnceCell; use std::collections::{HashMap, HashSet}; use std::fmt::Debug; use std::hash::Hash; -use std::slice::Iter; /// /// Ruler allows you to implement a plugin system with dependency management and ensure that @@ -35,7 +33,7 @@ use std::slice::Iter; /// /// // now we run this chain /// let mut result = String::new(); -/// for f in chain.iter() { f(&mut result); } +/// for f in chain.compile() { f(&mut result); } /// assert_eq!(result, "[ hello, world! ]"); /// ``` /// @@ -50,7 +48,6 @@ use std::slice::Iter; /// pub struct Ruler { deps: Vec>, - compiled: OnceCell<(Vec, Vec)>, } impl Ruler { @@ -62,7 +59,6 @@ impl Ruler { impl Ruler { /// Add a new rule identified by `mark` with payload `value`. pub fn add(&mut self, mark: M, value: T) -> &mut RuleItem { - self.compiled = OnceCell::new(); let dep = RuleItem::new(mark, value); self.deps.push(dep); self.deps.last_mut().unwrap() @@ -74,17 +70,12 @@ impl Ruler { } /// Check if there are any rules identified by `mark`. - pub fn contains(&mut self, mark: M) -> bool { + pub fn contains(&self, mark: M) -> bool { self.deps.iter().any(|dep| dep.marks.contains(&mark)) } - /// Ordered iteration through rules. - #[inline] - pub fn iter(&self) -> Iter { - self.compiled.get_or_init(|| self.compile()).1.iter() - } - - fn compile(&self) -> (Vec, Vec) { + /// Convert dependency tree into an ordered list. + pub fn compile(&self) -> Vec { // ID -> [RuleItem index] let mut idhash = HashMap::>::new(); @@ -206,20 +197,15 @@ impl Ruler { panic!("cyclic dependency: (use debug mode for more details)"); } - (result_idx, result) + //(result_idx, result) + result } } impl Debug for Ruler { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let vec: Vec<(usize, M)> = self.compiled.get_or_init(|| self.compile()).0 - .iter() - .map(|idx| (*idx, *self.deps.get(*idx).unwrap().marks.get(0).unwrap())) - .collect(); - f.debug_struct("Ruler") .field("deps", &self.deps) - .field("compiled", &vec) .finish() } } @@ -228,7 +214,6 @@ impl Default for Ruler { fn default() -> Self { Self { deps: Vec::new(), - compiled: OnceCell::new(), } } } @@ -267,7 +252,7 @@ impl RuleItem { /// chain.add("b", |s| s.push_str("foo")).before("a"); /// /// let mut result = String::new(); - /// for f in chain.iter() { f(&mut result); } + /// for f in chain.compile() { f(&mut result); } /// assert_eq!(result, "foobar"); /// ``` pub fn before(&mut self, mark: M) -> &mut Self { @@ -293,7 +278,7 @@ impl RuleItem { /// chain.add("b", |s| s.push_str("B")).after("a").before_all(); /// /// let mut result = String::new(); - /// for f in chain.iter() { f(&mut result); } + /// for f in chain.compile() { f(&mut result); } /// // without before_all order will be ACB /// assert_eq!(result, "ABC"); /// ``` @@ -321,7 +306,7 @@ impl RuleItem { /// chain.add("a", |s| s.push_str("A")).before("BorC"); /// /// let mut result = String::new(); - /// for f in chain.iter() { f(&mut result); } + /// for f in chain.compile() { f(&mut result); } /// assert_eq!(result, "ABC"); /// ``` pub fn alias(&mut self, mark: M) -> &mut Self { diff --git a/src/common/typekey.rs b/src/common/typekey.rs index 3424405..d6386d4 100644 --- a/src/common/typekey.rs +++ b/src/common/typekey.rs @@ -35,6 +35,11 @@ impl TypeKey { pub fn of() -> Self { Self { id: TypeId::of::(), name: any::type_name::() } } + + #[must_use] + pub fn short_name(&self) -> &str { + &self.name[self.name.rfind("::").map(|p| p + 2).unwrap_or(0)..] + } } impl Hash for TypeKey { diff --git a/src/lib.rs b/src/lib.rs index 710a91d..9aaac03 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,6 +18,9 @@ // just a style choice that clippy has no business complaining about #![allow(clippy::uninlined_format_args)] +// reexport for using in try_parse apis +pub use anyhow::Result; + pub mod common; pub mod examples; pub mod generics; diff --git a/src/parser/block/builtin/block_parser.rs b/src/parser/block/builtin/block_parser.rs index 368a778..43ff03e 100644 --- a/src/parser/block/builtin/block_parser.rs +++ b/src/parser/block/builtin/block_parser.rs @@ -1,5 +1,6 @@ use crate::parser::core::{CoreRule, Root}; -use crate::{MarkdownIt, Node}; +use crate::parser::main::RootNodeWrongType; +use crate::{MarkdownIt, Node, Result}; pub fn add(md: &mut MarkdownIt) { md.add_rule::() @@ -8,16 +9,39 @@ pub fn add(md: &mut MarkdownIt) { pub struct BlockParserRule; impl CoreRule for BlockParserRule { + fn try_run(root: &mut Node, md: &MarkdownIt) -> Result<()> { + Self::_run::(root, md)?; + Ok(()) + } + fn run(root: &mut Node, md: &MarkdownIt) { + let _ = Self::_run::(root, md); + } +} + +impl BlockParserRule { + fn _run(root: &mut Node, md: &MarkdownIt) -> Result<()> { let mut node = std::mem::take(root); - let data = node.cast_mut::().unwrap(); + let Some(data) = node.cast_mut::() else { + return Err(RootNodeWrongType.into()); + }; let source = std::mem::take(&mut data.content); let mut ext = std::mem::take(&mut data.ext); - node = md.block.parse(source.as_str(), node, md, &mut ext); - let data = node.cast_mut::().unwrap(); + md.block.compile(); + node = if CAN_FAIL { + md.block.try_parse(source.as_str(), node, md, &mut ext)? + } else { + md.block.parse(source.as_str(), node, md, &mut ext) + }; + *root = node; + + let Some(data) = root.cast_mut::() else { + return Err(RootNodeWrongType.into()); + }; data.content = source; data.ext = ext; - *root = node; + + Ok(()) } } diff --git a/src/parser/block/mod.rs b/src/parser/block/mod.rs index 0847a66..b162f44 100644 --- a/src/parser/block/mod.rs +++ b/src/parser/block/mod.rs @@ -1,4 +1,8 @@ //! Block rule chain +use anyhow::Context; +use derivative::Derivative; +use once_cell::sync::OnceCell; + mod state; pub use state::*; @@ -13,17 +17,49 @@ use crate::common::TypeKey; use crate::parser::extset::RootExtSet; use crate::parser::inline::InlineRoot; use crate::parser::node::NodeEmpty; -use crate::{MarkdownIt, Node}; +use crate::{MarkdownIt, Node, Result}; + +#[derive(Clone)] +#[doc(hidden)] +pub struct RuleStruct { + marker: TypeKey, + check: fn (&mut BlockState) -> Option<()>, + run: fn (&mut BlockState) -> Option<(Node, usize)>, + try_run: fn (&mut BlockState) -> Result>, +} -type RuleFns = ( - fn (&mut BlockState) -> Option<()>, - fn (&mut BlockState) -> Option<(Node, usize)>, -); +struct RuleStructVecs { + marker: Vec, + check: Vec Option<()>>, + run: Vec Option<(Node, usize)>>, + try_run: Vec Result>>, +} + +impl RuleStructVecs { + pub fn with_capacity(capacity: usize) -> Self { + Self { + marker: Vec::with_capacity(capacity), + check: Vec::with_capacity(capacity), + run: Vec::with_capacity(capacity), + try_run: Vec::with_capacity(capacity), + } + } -#[derive(Debug, Default)] + pub fn push(&mut self, rule: RuleStruct) { + self.marker.push(rule.marker); + self.check.push(rule.check); + self.run.push(rule.run); + self.try_run.push(rule.try_run); + } +} + +#[derive(Derivative, Default)] +#[derivative(Debug)] /// Block-level tokenizer. pub struct BlockParser { - ruler: Ruler, + ruler: Ruler, + #[derivative(Debug = "ignore")] + compiled_rules: OnceCell, } impl BlockParser { @@ -34,7 +70,20 @@ impl BlockParser { // Generate tokens for input range // pub fn tokenize(&self, state: &mut BlockState) { + // _tokenize with CAN_FAIL=false never returns errors + let _ = Self::_tokenize::(self, state); + } + + // Generate tokens for input range, but fail if any custom rule produces an error. + // Note: inline state will be unusable if you get an Error from this function. + // + pub fn try_tokenize(&self, state: &mut BlockState) -> Result<()> { + Self::_tokenize::(self, state) + } + + fn _tokenize(&self, state: &mut BlockState) -> Result<()> { stacker::maybe_grow(64*1024, 1024*1024, || { + let rules = self.compiled_rules.get().expect("rules not compiled"); let mut has_empty_lines = false; while state.line < state.line_max { @@ -60,11 +109,22 @@ impl BlockParser { // - return true let mut ok = None; - for rule in self.ruler.iter() { - ok = rule.1(state); - if ok.is_some() { - break; - } + if CAN_FAIL { + for (idx, rule) in rules.try_run.iter().enumerate() { + ok = rule(state).with_context(|| BlockRuleError { + name: rules.marker[idx], + })?; + if ok.is_some() { + break; + } + }; + } else { + for rule in rules.run.iter() { + ok = rule(state); + if ok.is_some() { + break; + } + }; } if let Some((mut node, len)) = ok { @@ -101,7 +161,9 @@ impl BlockParser { state.line += 1; } } - }); + + Ok(()) + }) } // Process input string and push block tokens into `out_tokens` @@ -112,16 +174,44 @@ impl BlockParser { state.node } - pub fn add_rule(&mut self) -> RuleBuilder { - let item = self.ruler.add(TypeKey::of::(), (T::check, T::run)); + // Process input string and push block tokens into `out_tokens`, + // fail if any custom rule produces an error. + // + pub fn try_parse(&self, src: &str, node: Node, md: &MarkdownIt, root_ext: &mut RootExtSet) -> Result { + let mut state = BlockState::new(src, md, root_ext, node); + self.try_tokenize(&mut state)?; + Ok(state.node) + } + + pub fn add_rule(&mut self) -> RuleBuilder { + self.compiled_rules = OnceCell::new(); + let item = self.ruler.add(TypeKey::of::(), RuleStruct { + marker: TypeKey::of::(), + check: T::check, + run: T::run, + try_run: T::try_run, + }); RuleBuilder::new(item) } - pub fn has_rule(&mut self) -> bool { + pub fn has_rule(&self) -> bool { self.ruler.contains(TypeKey::of::()) } pub fn remove_rule(&mut self) { + self.compiled_rules = OnceCell::new(); self.ruler.remove(TypeKey::of::()); } + + fn compile(&self) { + self.compiled_rules.get_or_init(|| { + let compiled_rules = self.ruler.compile(); + let mut result = RuleStructVecs::with_capacity(compiled_rules.len()); + + for rule in compiled_rules { + result.push(rule); + } + result + }); + } } diff --git a/src/parser/block/rule.rs b/src/parser/block/rule.rs index ceae5f6..a9bdf99 100644 --- a/src/parser/block/rule.rs +++ b/src/parser/block/rule.rs @@ -1,13 +1,47 @@ +use std::fmt::Display; + +use crate::common::TypeKey; use crate::parser::core::rule_builder; -use crate::Node; +use crate::{Node, Result}; /// Each member of block rule chain must implement this trait pub trait BlockRule : 'static { + /// Check block state at a given line (`state.get_line(state.line)`). + /// Return `Some(())` if it is a start of a block token of your type, + /// and `None` otherwise. + /// + /// You need to implement this function if your custom token spans + /// arbitrary amount of lines to avoid quadratic execution time. + /// + /// Default implementation is fine if your token is a single line, + /// and it's cheap to create then discard it. + /// fn check(state: &mut super::BlockState) -> Option<()> { Self::run(state).map(|_| ()) } + /// Check block state at a given line (`state.get_line(state.line)`). + /// Return token of your type and amount of lines it spans, + /// (None if no token is found). fn run(state: &mut super::BlockState) -> Option<(Node, usize)>; + + /// Same as `run()`, but used for functions that can fail. Use functions like + /// `try_parse()` instead of `parse()` to retrieve this error. + fn try_run(state: &mut super::BlockState) -> Result> { + // NOTE: Send+Sync bound is required for compatibility with anyhow!() + Ok(Self::run(state)) + } } rule_builder!(BlockRule); + +#[derive(Debug)] +pub struct BlockRuleError { + pub name: TypeKey, +} + +impl Display for BlockRuleError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("block rule `{}` returned an error", self.name.short_name())) + } +} diff --git a/src/parser/block/state.rs b/src/parser/block/state.rs index 296ef42..370f422 100644 --- a/src/parser/block/state.rs +++ b/src/parser/block/state.rs @@ -171,8 +171,9 @@ impl<'a, 'b> BlockState<'a, 'b> { #[must_use] pub fn test_rules_at_line(&mut self) -> bool { - for rule in self.md.block.ruler.iter() { - if rule.0(self).is_some() { + let rules = self.md.block.compiled_rules.get().expect("rules not compiled"); + for rule in rules.check.iter() { + if rule(self).is_some() { return true; } } diff --git a/src/parser/core/rule.rs b/src/parser/core/rule.rs index 73471d4..a23bc54 100644 --- a/src/parser/core/rule.rs +++ b/src/parser/core/rule.rs @@ -1,8 +1,20 @@ -use crate::{MarkdownIt, Node}; +use std::fmt::Display; + +use crate::common::TypeKey; +use crate::{MarkdownIt, Node, Result}; /// Each member of core rule chain must implement this trait pub trait CoreRule : 'static { + /// Perform arbitrary operations on the root Node. fn run(root: &mut Node, md: &MarkdownIt); + + /// Same as `run()`, but used for functions that can fail. Use functions like + /// `try_parse()` instead of `parse()` to retrieve this error. + fn try_run(root: &mut Node, md: &MarkdownIt) -> Result<()> { + // NOTE: Send+Sync bound is required for compatibility with anyhow!() + Self::run(root, md); + Ok(()) + } } macro_rules! rule_builder { @@ -53,3 +65,14 @@ macro_rules! rule_builder { rule_builder!(CoreRule); pub(crate) use rule_builder; + +#[derive(Debug)] +pub struct CoreRuleError { + pub name: TypeKey, +} + +impl Display for CoreRuleError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("core rule `{}` returned an error", self.name.short_name())) + } +} diff --git a/src/parser/inline/builtin/inline_parser.rs b/src/parser/inline/builtin/inline_parser.rs index 94c713f..87cd341 100644 --- a/src/parser/inline/builtin/inline_parser.rs +++ b/src/parser/inline/builtin/inline_parser.rs @@ -1,7 +1,8 @@ use crate::parser::block::builtin::BlockParserRule; use crate::parser::core::{CoreRule, Root}; use crate::parser::extset::{InlineRootExtSet, RootExtSet}; -use crate::{MarkdownIt, Node, NodeValue}; +use crate::parser::main::RootNodeWrongType; +use crate::{MarkdownIt, Node, NodeValue, Result}; #[derive(Debug)] /// Temporary node which gets replaced with inline nodes when @@ -29,8 +30,26 @@ pub fn add(md: &mut MarkdownIt) { pub struct InlineParserRule; impl CoreRule for InlineParserRule { + fn try_run(root: &mut Node, md: &MarkdownIt) -> Result<()> { + Self::_run::(root, md)?; + Ok(()) + } + fn run(root: &mut Node, md: &MarkdownIt) { - fn walk_recursive(node: &mut Node, md: &MarkdownIt, root_ext: &mut RootExtSet) { + let _ = Self::_run::(root, md); + } +} + +impl InlineParserRule { + fn _run( + root: &mut Node, + md: &MarkdownIt, + ) -> Result<()> { + fn walk_recursive( + node: &mut Node, + md: &MarkdownIt, + root_ext: &mut RootExtSet, + ) -> Result<()> { let mut idx = 0; while idx < node.children.len() { let child = &mut node.children[idx]; @@ -41,21 +60,29 @@ impl CoreRule for InlineParserRule { let mut root = std::mem::take(child); root.children = Vec::new(); - root = md.inline.parse(content, mapping, root, md, root_ext, &mut inline_ext); + root = if CAN_FAIL { + md.inline.try_parse(content, mapping, root, md, root_ext, &mut inline_ext)? + } else { + md.inline.parse(content, mapping, root, md, root_ext, &mut inline_ext) + }; let len = root.children.len(); node.children.splice(idx..=idx, std::mem::take(&mut root.children)); idx += len; } else { - stacker::maybe_grow(64*1024, 1024*1024, || { - walk_recursive(child, md, root_ext); - }); + stacker::maybe_grow(64*1024, 1024*1024, || -> Result<()> { + walk_recursive::(child, md, root_ext)?; + Ok(()) + })?; idx += 1; } } + Ok(()) } - let data = root.cast_mut::().unwrap(); + let Some(data) = root.cast_mut::() else { + return Err(RootNodeWrongType.into()); + }; let mut root_ext = std::mem::take(&mut data.ext); // this is invalid if input only contains reference; @@ -70,9 +97,13 @@ impl CoreRule for InlineParserRule { root.children.push(node); }*/ - walk_recursive(root, md, &mut root_ext); + md.inline.compile(); + walk_recursive::(root, md, &mut root_ext)?; - let data = root.cast_mut::().unwrap(); + let Some(data) = root.cast_mut::() else { + return Err(RootNodeWrongType.into()); + }; data.ext = root_ext; + Ok(()) } } diff --git a/src/parser/inline/mod.rs b/src/parser/inline/mod.rs index 10ff9a2..e660e72 100644 --- a/src/parser/inline/mod.rs +++ b/src/parser/inline/mod.rs @@ -1,4 +1,6 @@ //! Inline rule chain +use anyhow::Context; +use derivative::Derivative; use once_cell::sync::OnceCell; use std::collections::HashMap; @@ -15,22 +17,56 @@ pub use builtin::inline_parser::InlineRoot; pub use builtin::skip_text::{Text, TextSpecial}; use builtin::skip_text::TextScannerImpl; -use crate::{MarkdownIt, Node}; +use crate::{MarkdownIt, Node, Result}; use crate::common::TypeKey; use crate::common::ruler::Ruler; use crate::parser::extset::{InlineRootExtSet, RootExtSet}; use super::node::NodeEmpty; -type RuleFns = ( - fn (&mut InlineState) -> Option, - fn (&mut InlineState) -> Option<(Node, usize)>, -); +#[derive(Clone)] +#[doc(hidden)] +pub struct RuleStruct { + marker: TypeKey, + check: fn (&mut InlineState) -> Option, + run: fn (&mut InlineState) -> Option<(Node, usize)>, + try_run: fn (&mut InlineState) -> Result>, +} + +// use (Vec, Vec, Vec) instead of Vec<(A, B, C)> for cache locality, +// since only one thing will be accessed at a time, and code is hot +struct RuleStructVecs { + marker: Vec, + check: Vec Option>, + run: Vec Option<(Node, usize)>>, + try_run: Vec Result>>, +} + +impl RuleStructVecs { + pub fn with_capacity(capacity: usize) -> Self { + Self { + marker: Vec::with_capacity(capacity), + check: Vec::with_capacity(capacity), + run: Vec::with_capacity(capacity), + try_run: Vec::with_capacity(capacity), + } + } + + pub fn push(&mut self, rule: RuleStruct) { + self.marker.push(rule.marker); + self.check.push(rule.check); + self.run.push(rule.run); + self.try_run.push(rule.try_run); + } +} -#[derive(Debug, Default)] +#[derive(Derivative, Default)] +#[derivative(Debug)] /// Inline-level tokenizer. pub struct InlineParser { - ruler: Ruler, + ruler: Ruler, + #[derivative(Debug = "ignore")] + compiled_rules: OnceCell, text_charmap: HashMap>, text_impl: OnceCell, } @@ -45,15 +81,16 @@ impl InlineParser { // pub fn skip_token(&self, state: &mut InlineState) { stacker::maybe_grow(64*1024, 1024*1024, || { + let rules = self.compiled_rules.get().expect("rules not compiled"); let mut ok = None; if state.level < state.md.max_nesting { - for rule in self.ruler.iter() { - ok = rule.0(state); + for rule in rules.check.iter() { + ok = rule(state); if ok.is_some() { break; } - } + }; } else { // Too much nesting, just skip until the end of the paragraph. // @@ -78,10 +115,23 @@ impl InlineParser { }); } - // Generate tokens for input range + // Generate tokens for input range. // pub fn tokenize(&self, state: &mut InlineState) { + // _tokenize with CAN_FAIL=false never returns errors + let _ = Self::_tokenize::(self, state); + } + + // Generate tokens for input range, but fail if any custom rule produces an error. + // Note: inline state will be unusable if you get an Error from this function. + // + pub fn try_tokenize(&self, state: &mut InlineState) -> Result<()> { + Self::_tokenize::(self, state) + } + + fn _tokenize(&self, state: &mut InlineState) -> Result<()> { stacker::maybe_grow(64*1024, 1024*1024, || { + let rules = self.compiled_rules.get().expect("rules not compiled"); let end = state.pos_max; while state.pos < end { @@ -94,11 +144,22 @@ impl InlineParser { let mut ok = None; if state.level < state.md.max_nesting { - for rule in self.ruler.iter() { - ok = rule.1(state); - if ok.is_some() { - break; - } + if CAN_FAIL { + for (idx, rule) in rules.try_run.iter().enumerate() { + ok = rule(state).with_context(|| InlineRuleError { + name: rules.marker[idx], + })?; + if ok.is_some() { + break; + } + }; + } else { + for rule in rules.run.iter() { + ok = rule(state); + if ok.is_some() { + break; + } + }; } } @@ -117,7 +178,9 @@ impl InlineParser { state.trailing_text_push(state.pos, state.pos + len); state.pos += len; } - }); + + Ok(()) + }) } // Process input string and push inline tokens into `out_tokens` @@ -136,21 +199,48 @@ impl InlineParser { state.node } - pub fn add_rule(&mut self) -> RuleBuilder { + // Process input string and push inline tokens into `out_tokens`, + // fail if any custom rule produces an error. + // + pub fn try_parse( + &self, + src: String, + srcmap: Vec<(usize, usize)>, + node: Node, + md: &MarkdownIt, + root_ext: &mut RootExtSet, + inline_ext: &mut InlineRootExtSet, + ) -> Result { + let mut state = InlineState::new(src, srcmap, md, root_ext, inline_ext, node); + self.try_tokenize(&mut state)?; + Ok(state.node) + } + + pub fn add_rule(&mut self) -> RuleBuilder { + self.compiled_rules = OnceCell::new(); + if T::MARKER != '\0' { let charvec = self.text_charmap.entry(T::MARKER).or_insert(vec![]); charvec.push(TypeKey::of::()); } - let item = self.ruler.add(TypeKey::of::(), (T::check, T::run)); + let item = self.ruler.add(TypeKey::of::(), RuleStruct { + marker: TypeKey::of::(), + check: T::check, + run: T::run, + try_run: T::try_run, + }); + RuleBuilder::new(item) } - pub fn has_rule(&mut self) -> bool { + pub fn has_rule(&self) -> bool { self.ruler.contains(TypeKey::of::()) } pub fn remove_rule(&mut self) { + self.compiled_rules = OnceCell::new(); + if T::MARKER != '\0' { let mut charvec = self.text_charmap.remove(&T::MARKER).unwrap_or_default(); charvec.retain(|x| *x != TypeKey::of::()); @@ -159,4 +249,16 @@ impl InlineParser { self.ruler.remove(TypeKey::of::()); } + + fn compile(&self) { + self.compiled_rules.get_or_init(|| { + let compiled_rules = self.ruler.compile(); + let mut result = RuleStructVecs::with_capacity(compiled_rules.len()); + + for rule in compiled_rules { + result.push(rule); + } + result + }); + } } diff --git a/src/parser/inline/rule.rs b/src/parser/inline/rule.rs index a2e3843..c38b73a 100644 --- a/src/parser/inline/rule.rs +++ b/src/parser/inline/rule.rs @@ -1,15 +1,55 @@ +use std::fmt::Display; + +use crate::common::TypeKey; use crate::parser::core::rule_builder; -use crate::Node; +use crate::{Node, Result}; /// Each member of inline rule chain must implement this trait pub trait InlineRule : 'static { + /// A starting character of a token that this inline rule handles. + /// + /// Ideally, it would always be a first character at `state.src[state.pos]` position, + /// but it's not guaranteed at the moment. + /// + /// If you need to handle multiple starting characters, you should add multiple + /// rules. + /// + /// Reserved character `\0` matches any character, but using it can incur severe + /// performance penalties. const MARKER: char; + /// Check inline state at a given position (`state.src[state.pos]`) and return + /// the length of the next token of your type (None if no token is found). + /// + /// You usually don't need to specify this, unless creating a new token in `run()` + /// is too slow and/or has increased computational complexity (mainly recursive + /// tokens like links). + /// fn check(state: &mut super::InlineState) -> Option { Self::run(state).map(|(_node, len)| len) } + /// Check inline state at a given position (`state.src[state.pos]`) and return + /// next token of your type and its length (None if no token is found). fn run(state: &mut super::InlineState) -> Option<(Node, usize)>; + + /// Same as `run()`, but used for functions that can fail. Use functions like + /// `try_parse()` instead of `parse()` to retrieve this error. + fn try_run(state: &mut super::InlineState) -> Result> { + // NOTE: Send+Sync bound is required for compatibility with anyhow!() + Ok(Self::run(state)) + } } rule_builder!(InlineRule); + +#[derive(Debug)] +pub struct InlineRuleError { + pub name: TypeKey, +} + +impl Display for InlineRuleError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("inline rule `{}` returned an error", self.name.short_name())) + } +} diff --git a/src/parser/main.rs b/src/parser/main.rs index 459fbc3..3e709ca 100644 --- a/src/parser/main.rs +++ b/src/parser/main.rs @@ -1,16 +1,49 @@ use derivative::Derivative; +use once_cell::sync::OnceCell; +use std::error::Error; +use std::fmt::Display; use crate::common::ruler::Ruler; use crate::common::sourcemap::SourcePos; use crate::common::TypeKey; -use crate::parser::block::{self, BlockParser}; +use crate::parser::block::{self, BlockParser, BlockRuleError}; use crate::parser::core::{Root, *}; use crate::parser::extset::MarkdownItExtSet; -use crate::parser::inline::{self, InlineParser}; +use crate::parser::inline::{self, InlineParser, InlineRuleError}; use crate::parser::linkfmt::{LinkFormatter, MDLinkFormatter}; -use crate::Node; +use crate::{Node, Result}; -type RuleFn = fn (&mut Node, &MarkdownIt); +#[derive(Clone)] +#[doc(hidden)] +pub struct RuleStruct { + marker: TypeKey, + run: fn (&mut Node, &MarkdownIt), + try_run: fn (&mut Node, &MarkdownIt) -> Result<()>, +} + +// use (Vec, Vec, Vec) instead of Vec<(A, B, C)> for cache locality, +// since only one thing will be accessed at a time, and code is hot +struct RuleStructVecs { + marker: Vec, + run: Vec, + try_run: Vec Result<()>>, +} + +impl RuleStructVecs { + pub fn with_capacity(capacity: usize) -> Self { + Self { + marker: Vec::with_capacity(capacity), + run: Vec::with_capacity(capacity), + try_run: Vec::with_capacity(capacity), + } + } + + pub fn push(&mut self, rule: RuleStruct) { + self.marker.push(rule.marker); + self.run.push(rule.run); + self.try_run.push(rule.try_run); + } +} #[derive(Derivative)] #[derivative(Debug)] @@ -35,7 +68,9 @@ pub struct MarkdownIt { #[doc(hidden)] pub max_nesting: u32, - ruler: Ruler, + ruler: Ruler, + #[derivative(Debug = "ignore")] + compiled_rules: OnceCell, } impl MarkdownIt { @@ -43,29 +78,83 @@ impl MarkdownIt { Self::default() } + /// Parse input string and return syntax tree. + /// + /// You can convert that node to html/xhtml string by using `.render()` and + /// `.xrender()` respectively. + /// + /// Note that this function cannot produce errors, it will always return + /// correct markdown syntax tree. It makes this function suitable for parsing + /// arbitrary user input. + /// pub fn parse(&self, src: &str) -> Node { + let rules = self.compiled_rules.get_or_init(|| self.compile()); let mut node = Node::new(Root::new(src.to_owned())); node.srcmap = Some(SourcePos::new(0, src.len())); - for rule in self.ruler.iter() { + for rule in rules.run.iter() { rule(&mut node, self); - debug_assert!(node.is::(), "root node of the AST must always be Root"); } node } - pub fn add_rule(&mut self) -> RuleBuilder { - let item = self.ruler.add(TypeKey::of::(), T::run); + /// Parse input string and return syntax tree. + /// + /// You can convert that node to html/xhtml string by using `.render()` and + /// `.xrender()` respectively. + /// + /// This function can fail if any of the plugins fail. Any error in a custom + /// rule will be propagated into the result. It makes this function suitable + /// for parsing documents you wrote, where added validation helps to ensure + /// you didn't do any syntax mistakes. + /// + pub fn try_parse(&self, src: &str) -> Result { + let rules = self.compiled_rules.get_or_init(|| self.compile()); + let mut node = Node::new(Root::new(src.to_owned())); + node.srcmap = Some(SourcePos::new(0, src.len())); + + for (idx, rule) in rules.try_run.iter().enumerate() { + rule(&mut node, self).map_err(|err| { + if err.is::() || err.is::() { + err + } else { + err.context(CoreRuleError { + name: rules.marker[idx], + }) + } + })?; + } + Ok(node) + } + + pub fn add_rule(&mut self) -> RuleBuilder { + self.compiled_rules = OnceCell::new(); + let item = self.ruler.add(TypeKey::of::(), RuleStruct { + marker: TypeKey::of::(), + run: T::run, + try_run: T::try_run, + }); RuleBuilder::new(item) } - pub fn has_rule(&mut self) -> bool { + pub fn has_rule(&self) -> bool { self.ruler.contains(TypeKey::of::()) } pub fn remove_rule(&mut self) { + self.compiled_rules = OnceCell::new(); self.ruler.remove(TypeKey::of::()); } + + fn compile(&self) -> RuleStructVecs { + let compiled_rules = self.ruler.compile(); + let mut result = RuleStructVecs::with_capacity(compiled_rules.len()); + + for rule in compiled_rules { + result.push(rule); + } + result + } } impl Default for MarkdownIt { @@ -77,9 +166,21 @@ impl Default for MarkdownIt { ext: MarkdownItExtSet::new(), max_nesting: 100, ruler: Ruler::new(), + compiled_rules: OnceCell::new(), }; block::builtin::add(&mut md); inline::builtin::add(&mut md); md } } + +// Root node should always be of the type `Root`, +// but custom rules may insert weird stuff instead. +#[derive(Debug)] +pub(crate) struct RootNodeWrongType; +impl Display for RootNodeWrongType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("Root node of the AST is expected to have type `Root`, but the type was changed by one of the custom rules.") + } +} +impl Error for RootNodeWrongType {} diff --git a/src/plugins/extra/linkify.rs b/src/plugins/extra/linkify.rs index 40d2e5e..abded6d 100644 --- a/src/plugins/extra/linkify.rs +++ b/src/plugins/extra/linkify.rs @@ -9,7 +9,8 @@ use crate::parser::core::{CoreRule, Root}; use crate::parser::extset::RootExt; use crate::parser::inline::builtin::InlineParserRule; use crate::parser::inline::{InlineRule, InlineState, TextSpecial}; -use crate::{MarkdownIt, Node, NodeValue, Renderer}; +use crate::parser::main::RootNodeWrongType; +use crate::{MarkdownIt, Node, NodeValue, Renderer, Result}; static SCHEME_RE : Lazy = Lazy::new(|| { Regex::new(r"(?i)(?:^|[^a-z0-9.+-])([a-z][a-z0-9.+-]*)$").unwrap() @@ -51,8 +52,10 @@ struct LinkifyPosition { #[doc(hidden)] pub struct LinkifyPrescan; impl CoreRule for LinkifyPrescan { - fn run(root: &mut Node, _: &MarkdownIt) { - let root_data = root.cast_mut::().unwrap(); + fn try_run(root: &mut Node, _: &MarkdownIt) -> Result<()> { + let Some(root_data) = root.cast_mut::() else { + return Err(RootNodeWrongType.into()); + }; let source = root_data.content.as_str(); let finder = LinkFinder::new(); let positions = finder.links(source).filter_map(|link| { @@ -67,6 +70,11 @@ impl CoreRule for LinkifyPrescan { } }).collect::>(); root_data.ext.insert(positions); + Ok(()) + } + + fn run(root: &mut Node, md: &MarkdownIt) { + let _ = Self::try_run(root, md); } } diff --git a/src/plugins/sourcepos.rs b/src/plugins/sourcepos.rs index 801a105..7aa5ad8 100644 --- a/src/plugins/sourcepos.rs +++ b/src/plugins/sourcepos.rs @@ -11,7 +11,8 @@ use crate::common::sourcemap::SourceWithLineStarts; use crate::parser::block::builtin::BlockParserRule; use crate::parser::core::{CoreRule, Root}; use crate::parser::inline::builtin::InlineParserRule; -use crate::{MarkdownIt, Node}; +use crate::parser::main::RootNodeWrongType; +use crate::{MarkdownIt, Node, Result}; pub fn add(md: &mut MarkdownIt) { md.add_rule::() @@ -22,8 +23,11 @@ pub fn add(md: &mut MarkdownIt) { #[doc(hidden)] pub struct SyntaxPosRule; impl CoreRule for SyntaxPosRule { - fn run(root: &mut Node, _: &MarkdownIt) { - let source = root.cast::().unwrap().content.as_str(); + fn try_run(root: &mut Node, _: &MarkdownIt) -> Result<()> { + let Some(data) = root.cast::() else { + return Err(RootNodeWrongType.into()); + }; + let source = data.content.as_str(); let mapping = SourceWithLineStarts::new(source); root.walk_mut(|node, _| { @@ -32,6 +36,11 @@ impl CoreRule for SyntaxPosRule { node.attrs.push(("data-sourcepos", format!("{}:{}-{}:{}", startline, startcol, endline, endcol))); } }); + Ok(()) + } + + fn run(root: &mut Node, md: &MarkdownIt) { + let _ = Self::try_run(root, md); } } diff --git a/tests/extras.rs b/tests/extras.rs index 32b0cd2..51d4532 100644 --- a/tests/extras.rs +++ b/tests/extras.rs @@ -111,10 +111,21 @@ r#"
} mod examples { - include!("../examples/ferris/main.rs"); + mod ferris { + include!("../examples/ferris/main.rs"); - #[test] - fn test_examples() { - main(); + #[test] + fn ferris() { + main(); + } + } + + mod error_handling { + include!("../examples/error_handling/main.rs"); + + #[test] + fn error_handling() { + main(); + } } } From c398d4b9c531a6c9cc85fd2343d98882bfe4b354 Mon Sep 17 00:00:00 2001 From: Alex Kocharin Date: Tue, 6 Jun 2023 16:49:42 +0400 Subject: [PATCH 2/3] allow `Node::walk_*` functions to terminate early This commit adds a Result<> to all walk callbacks. In order to use this, you should change all `Node::walk_*` methods in your code the following way: ```rust // replace this: node.walk(|node, _| { dbg!(node); }); // with this (unwrap is safe here because walk only // returns error when your function does): node.walk(|node, _| { dbg!(node); Ok(()) }).unwrap(); ``` --- CHANGELOG.md | 32 +++++++++++ examples/ferris/core_rule.rs | 3 +- src/bin.rs | 3 +- src/generics/inline/emph_pair.rs | 5 +- src/parser/node.rs | 82 +++++++++++++++++----------- src/plugins/extra/heading_anchors.rs | 3 +- src/plugins/extra/smartquotes.rs | 8 ++- src/plugins/extra/syntect.rs | 3 +- src/plugins/extra/typographer.rs | 5 +- src/plugins/sourcepos.rs | 3 +- tests/commonmark.rs | 5 +- tests/extras.rs | 5 +- tests/linkify.rs | 5 +- tests/markdown-it-smartquotes.rs | 5 +- tests/markdown-it-typographer.rs | 5 +- tests/markdown-it.rs | 5 +- tests/sourcemaps.rs | 5 +- 17 files changed, 131 insertions(+), 51 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f329a3e..081872b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,37 @@ # Changelog +## 0.6.0 - WIP + +### Added + + - added `md.try_parse()` function which may return an error, as opposed to existing + `md.parse()` function which never does + + - added optional `try_run()` trait function for rules which can fail and will + propagate errors when using `md.try_parse()` + +### Changed + + - `Node::walk_*` methods now return `Result`, which allows you to terminate traversing early + +### Migration + +For all `Node::walk_*` methods change the following: + +```rust +// replace this: +node.walk(|node, _| { + dbg!(node); +}); + +// with this (unwrap is safe here because walk only +// returns error when your function does): +node.walk(|node, _| { + dbg!(node); + Ok(()) +}).unwrap(); +``` + ## 0.5.0 - 2023-05-13 ### Added diff --git a/examples/ferris/core_rule.rs b/examples/ferris/core_rule.rs index 2b56877..cfd0058 100644 --- a/examples/ferris/core_rule.rs +++ b/examples/ferris/core_rule.rs @@ -51,7 +51,8 @@ impl CoreRule for FerrisCounterRule { if node.is::() || node.is::() { counter += 1; } - }); + Ok(()) + }).unwrap(); // append a counter to the root as a custom node root.children.push(Node::new(FerrisCounter(counter))) diff --git a/src/bin.rs b/src/bin.rs index 3726135..05671f5 100644 --- a/src/bin.rs +++ b/src/bin.rs @@ -93,7 +93,8 @@ fn main() { } else { println!("{name}"); } - }); + Ok(()) + }).unwrap(); return; } diff --git a/src/generics/inline/emph_pair.rs b/src/generics/inline/emph_pair.rs index 002fe84..404a81e 100644 --- a/src/generics/inline/emph_pair.rs +++ b/src/generics/inline/emph_pair.rs @@ -265,7 +265,10 @@ fn is_odd_match(opener: &EmphMarker, closer: &EmphMarker) -> bool { pub struct FragmentsJoin; impl CoreRule for FragmentsJoin { fn run(node: &mut Node, _: &MarkdownIt) { - node.walk_mut(|node, _| fragments_join(node)); + node.walk_mut(|node, _| { + fragments_join(node); + Ok(()) + }).unwrap(); } } diff --git a/src/parser/node.rs b/src/parser/node.rs index 328b4c7..fb31b3a 100644 --- a/src/parser/node.rs +++ b/src/parser/node.rs @@ -7,7 +7,7 @@ use crate::common::TypeKey; use crate::parser::extset::NodeExtSet; use crate::parser::inline::Text; use crate::parser::renderer::HTMLRenderer; -use crate::Renderer; +use crate::{Renderer, Result}; /// Single node in the CommonMark AST. #[derive(Debug)] @@ -106,64 +106,78 @@ impl Node { /// Execute function `f` recursively on every member of AST tree /// (using preorder deep-first search). - pub fn walk<'a>(&'a self, mut f: impl FnMut(&'a Node, u32)) { + /// + /// This function will stop early if closure returns any error. + pub fn walk<'a>(&'a self, mut f: impl FnMut(&'a Node, u32) -> Result<()>) -> Result<()> { // performance note: this is faster than emulating recursion using vec stack - fn walk_recursive<'b>(node: &'b Node, depth: u32, f: &mut impl FnMut(&'b Node, u32)) { - f(node, depth); + fn walk_recursive<'a>(node: &'a Node, depth: u32, f: &mut impl FnMut(&'a Node, u32) -> Result<()>) -> Result<()> { + f(node, depth)?; for n in node.children.iter() { - stacker::maybe_grow(64*1024, 1024*1024, || { - walk_recursive(n, depth + 1, f); - }); + stacker::maybe_grow(64*1024, 1024*1024, || -> Result<()> { + walk_recursive(n, depth + 1, f) + })?; } + Ok(()) } - walk_recursive(self, 0, &mut f); + walk_recursive(self, 0, &mut f) } /// Execute function `f` recursively on every member of AST tree /// (using preorder deep-first search). - pub fn walk_mut(&mut self, mut f: impl FnMut(&mut Node, u32)) { - // performance note: this is faster than emulating recursion using vec stack - fn walk_recursive(node: &mut Node, depth: u32, f: &mut impl FnMut(&mut Node, u32)) { - f(node, depth); + /// + /// This function will stop early if closure returns any error. + pub fn walk_mut(&mut self, mut f: impl FnMut(&mut Node, u32) -> Result<()>) -> Result<()> { + // note: lifetime constrains are different from non-mut walk due to mutability + fn walk_recursive(node: &mut Node, depth: u32, f: &mut impl FnMut(&mut Node, u32) -> Result<()>) -> Result<()> { + f(node, depth)?; for n in node.children.iter_mut() { - stacker::maybe_grow(64*1024, 1024*1024, || { - walk_recursive(n, depth + 1, f); - }); + stacker::maybe_grow(64*1024, 1024*1024, || -> Result<()> { + walk_recursive(n, depth + 1, f) + })?; } + Ok(()) } - walk_recursive(self, 0, &mut f); + walk_recursive(self, 0, &mut f) } /// Execute function `f` recursively on every member of AST tree /// (using postorder deep-first search). - pub fn walk_post(&self, mut f: impl FnMut(&Node, u32)) { - fn walk_recursive(node: &Node, depth: u32, f: &mut impl FnMut(&Node, u32)) { + /// + /// This function will stop early if closure returns any error. + pub fn walk_post<'a>(&'a self, mut f: impl FnMut(&'a Node, u32) -> Result<()>) -> Result<()> { + // performance note: this is faster than emulating recursion using vec stack + fn walk_recursive<'a>(node: &'a Node, depth: u32, f: &mut impl FnMut(&'a Node, u32) -> Result<()>) -> Result<()> { for n in node.children.iter() { - stacker::maybe_grow(64*1024, 1024*1024, || { - walk_recursive(n, depth + 1, f); - }); + stacker::maybe_grow(64*1024, 1024*1024, || -> Result<()> { + walk_recursive(n, depth + 1, f) + })?; } - f(node, depth); + f(node, depth)?; + Ok(()) } - walk_recursive(self, 0, &mut f); + walk_recursive(self, 0, &mut f) } /// Execute function `f` recursively on every member of AST tree /// (using postorder deep-first search). - pub fn walk_post_mut(&mut self, mut f: impl FnMut(&mut Node, u32)) { - fn walk_recursive(node: &mut Node, depth: u32, f: &mut impl FnMut(&mut Node, u32)) { + /// + /// This function will stop early if closure returns any error. + pub fn walk_post_mut(&mut self, mut f: impl FnMut(&mut Node, u32) -> Result<()>) -> Result<()> { + // note: lifetime constrains are different from non-mut walk due to mutability + fn walk_recursive(node: &mut Node, depth: u32, f: &mut impl FnMut(&mut Node, u32) -> Result<()>) -> Result<()> { for n in node.children.iter_mut() { - stacker::maybe_grow(64*1024, 1024*1024, || { - walk_recursive(n, depth + 1, f); - }); + stacker::maybe_grow(64*1024, 1024*1024, || -> Result<()> { + walk_recursive(n, depth + 1, f) + })?; } - f(node, depth); + f(node, depth)?; + Ok(()) } - walk_recursive(self, 0, &mut f); + walk_recursive(self, 0, &mut f) } /// Walk recursively through child nodes and collect all text nodes @@ -171,11 +185,12 @@ impl Node { pub fn collect_text(&self) -> String { let mut result = String::new(); - self.walk(|node, _| { + self.walk(|node: &Node, _| { if let Some(text) = node.cast::() { result.push_str(text.content.as_str()); } - }); + Ok(()) + }).unwrap(); result } @@ -185,7 +200,8 @@ impl Drop for Node { fn drop(&mut self) { self.walk_post_mut(|node, _| { drop(std::mem::take(&mut node.children)); - }); + Ok(()) + }).unwrap(); } } diff --git a/src/plugins/extra/heading_anchors.rs b/src/plugins/extra/heading_anchors.rs index 1062b10..fba6d41 100644 --- a/src/plugins/extra/heading_anchors.rs +++ b/src/plugins/extra/heading_anchors.rs @@ -64,6 +64,7 @@ impl CoreRule for AddHeadingAnchors { if node.is::() || node.is::() { node.attrs.push(("id", slugify(&node.collect_text()))); } - }); + Ok(()) + }).unwrap(); } } diff --git a/src/plugins/extra/smartquotes.rs b/src/plugins/extra/smartquotes.rs index 7cf3d5d..8ebb4dd 100644 --- a/src/plugins/extra/smartquotes.rs +++ b/src/plugins/extra/smartquotes.rs @@ -153,7 +153,8 @@ impl< text_node.content = execute_replacements(current_replacements, &text_node.content); }; current_index += 1; - }); + Ok(()) + }).unwrap(); } } @@ -318,7 +319,7 @@ impl< fn all_text_tokens(root: &Node) -> Vec { let mut result = Vec::new(); let mut walk_index = 0; - root.walk(|node, nesting_level| { + root.walk(|node: &Node, nesting_level| { if let Some(text_node) = node.cast::() { result.push(FlatToken::Text { content: &text_node.content, @@ -335,7 +336,8 @@ fn all_text_tokens(root: &Node) -> Vec { result.push(FlatToken::Irrelevant); } walk_index += 1; - }); + Ok(()) + }).unwrap(); result } diff --git a/src/plugins/extra/syntect.rs b/src/plugins/extra/syntect.rs index 5014491..8f6f90f 100644 --- a/src/plugins/extra/syntect.rs +++ b/src/plugins/extra/syntect.rs @@ -69,6 +69,7 @@ impl CoreRule for SyntectRule { node.replace(SyntectSnippet { html }); } } - }); + Ok(()) + }).unwrap(); } } diff --git a/src/plugins/extra/typographer.rs b/src/plugins/extra/typographer.rs index 47841df..bcab74f 100644 --- a/src/plugins/extra/typographer.rs +++ b/src/plugins/extra/typographer.rs @@ -82,7 +82,7 @@ pub struct TypographerRule; impl CoreRule for TypographerRule { fn run(root: &mut Node, _: &MarkdownIt) { root.walk_mut(|node, _| { - let Some(mut text_node) = node.cast_mut::() else { return; }; + let Some(mut text_node) = node.cast_mut::() else { return Ok(()); }; if SCOPED_RE.is_match(&text_node.content) { text_node.content = SCOPED_RE @@ -119,6 +119,7 @@ impl CoreRule for TypographerRule { text_node.content = s; } } - }); + Ok(()) + }).unwrap(); } } diff --git a/src/plugins/sourcepos.rs b/src/plugins/sourcepos.rs index 7aa5ad8..0f5cc1f 100644 --- a/src/plugins/sourcepos.rs +++ b/src/plugins/sourcepos.rs @@ -35,7 +35,8 @@ impl CoreRule for SyntaxPosRule { let ((startline, startcol), (endline, endcol)) = map.get_positions(&mapping); node.attrs.push(("data-sourcepos", format!("{}:{}-{}:{}", startline, startcol, endline, endcol))); } - }); + Ok(()) + }).unwrap(); Ok(()) } diff --git a/tests/commonmark.rs b/tests/commonmark.rs index 0a55776..d2254e8 100644 --- a/tests/commonmark.rs +++ b/tests/commonmark.rs @@ -7,7 +7,10 @@ fn run(input: &str, output: &str) { let node = md.parse(&(input.to_owned() + "\n")); // make sure we have sourcemaps for everything - node.walk(|node, _| assert!(node.srcmap.is_some())); + node.walk(|node, _| { + assert!(node.srcmap.is_some()); + Ok(()) + }).unwrap(); let result = node.xrender(); assert_eq!(result, output); diff --git a/tests/extras.rs b/tests/extras.rs index 51d4532..aaf779f 100644 --- a/tests/extras.rs +++ b/tests/extras.rs @@ -51,7 +51,10 @@ fn run(input: &str, output: &str) { markdown_it::plugins::html::add(md); markdown_it::plugins::extra::beautify_links::add(md); let node = md.parse(&(input.to_owned() + "\n")); - node.walk(|node, _| assert!(node.srcmap.is_some())); + node.walk(|node, _| { + assert!(node.srcmap.is_some()); + Ok(()) + }).unwrap(); let result = node.render(); assert_eq!(result, output); } diff --git a/tests/linkify.rs b/tests/linkify.rs index f520fbe..20af419 100644 --- a/tests/linkify.rs +++ b/tests/linkify.rs @@ -8,7 +8,10 @@ fn run(input: &str, output: &str) { let node = md.parse(&(input.to_owned() + "\n")); // make sure we have sourcemaps for everything - node.walk(|node, _| assert!(node.srcmap.is_some())); + node.walk(|node, _| { + assert!(node.srcmap.is_some()); + Ok(()) + }).unwrap(); let result = node.render(); assert_eq!(result, output); diff --git a/tests/markdown-it-smartquotes.rs b/tests/markdown-it-smartquotes.rs index 76c5d14..13b771f 100644 --- a/tests/markdown-it-smartquotes.rs +++ b/tests/markdown-it-smartquotes.rs @@ -13,7 +13,10 @@ fn run(input: &str, output: &str) { let node = md.parse(&(input.to_owned() + "\n")); // make sure we have sourcemaps for everything - node.walk(|node, _| assert!(node.srcmap.is_some())); + node.walk(|node, _| { + assert!(node.srcmap.is_some()); + Ok(()) + }).unwrap(); let result = node.render(); assert_eq!(result, output); diff --git a/tests/markdown-it-typographer.rs b/tests/markdown-it-typographer.rs index eff648b..2a5b2da 100644 --- a/tests/markdown-it-typographer.rs +++ b/tests/markdown-it-typographer.rs @@ -12,7 +12,10 @@ fn run(input: &str, output: &str) { let node = md.parse(&(input.to_owned() + "\n")); // make sure we have sourcemaps for everything - node.walk(|node, _| assert!(node.srcmap.is_some())); + node.walk(|node, _| { + assert!(node.srcmap.is_some()); + Ok(()) + }).unwrap(); let result = node.render(); assert_eq!(result, output); diff --git a/tests/markdown-it.rs b/tests/markdown-it.rs index 81eeaab..2a5634a 100644 --- a/tests/markdown-it.rs +++ b/tests/markdown-it.rs @@ -8,7 +8,10 @@ fn run(input: &str, output: &str) { let node = md.parse(&(input.to_owned() + "\n")); // make sure we have sourcemaps for everything - node.walk(|node, _| assert!(node.srcmap.is_some())); + node.walk(|node, _| { + assert!(node.srcmap.is_some()); + Ok(()) + }).unwrap(); let result = node.render(); assert_eq!(result, output); diff --git a/tests/sourcemaps.rs b/tests/sourcemaps.rs index 72451ff..a1dd675 100644 --- a/tests/sourcemaps.rs +++ b/tests/sourcemaps.rs @@ -6,7 +6,10 @@ fn run(input: &str, f: fn (&Node, SourceWithLineStarts)) { markdown_it::plugins::cmark::add(md); markdown_it::plugins::html::add(md); let node = md.parse(input); - node.walk(|node, _| assert!(node.srcmap.is_some())); + node.walk(|node, _| { + assert!(node.srcmap.is_some()); + Ok(()) + }).unwrap(); f(&node, SourceWithLineStarts::new(input)); } From 32b33e851937e6f3ea8d12463e3ea2520f4d7056 Mon Sep 17 00:00:00 2001 From: Alex Kocharin Date: Tue, 6 Jun 2023 16:59:02 +0400 Subject: [PATCH 3/3] make syntext plugin report if syntax is not found --- CHANGELOG.md | 1 + Cargo.toml | 1 + src/plugins/extra/syntect.rs | 136 ++++++++++++++++++++++++++++++++++- 3 files changed, 135 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 081872b..445491d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ ### Changed - `Node::walk_*` methods now return `Result`, which allows you to terminate traversing early + - `syntext` rule now trims spaces in fence info string ### Migration diff --git a/Cargo.toml b/Cargo.toml index 9313595..cd32404 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,6 +36,7 @@ derive_more = ">= 0.99.0, < 1" downcast-rs = ">= 1.0.2, < 2" entities = ">= 0.1.0, < 2" html-escape = ">= 0.1.0, < 0.3" +indoc = ">= 0.3.4, < 3" linkify = { version = ">= 0.5.0, < 0.10", optional = true } mdurl = ">= 0.3.1, < 0.4" once_cell = ">= 1.0.1, < 2" diff --git a/src/plugins/extra/syntect.rs b/src/plugins/extra/syntect.rs index 8f6f90f..6123470 100644 --- a/src/plugins/extra/syntect.rs +++ b/src/plugins/extra/syntect.rs @@ -1,4 +1,5 @@ //! Syntax highlighting for code blocks +use anyhow::anyhow; use syntect::highlighting::ThemeSet; use syntect::html::highlighted_html_for_string; use syntect::parsing::SyntaxSet; @@ -7,7 +8,7 @@ use crate::parser::core::CoreRule; use crate::parser::extset::MarkdownItExt; use crate::plugins::cmark::block::code::CodeBlock; use crate::plugins::cmark::block::fence::CodeFence; -use crate::{MarkdownIt, Node, NodeValue, Renderer}; +use crate::{MarkdownIt, Node, NodeValue, Renderer, Result}; #[derive(Debug)] pub struct SyntectSnippet { @@ -40,7 +41,18 @@ pub fn set_theme(md: &mut MarkdownIt, theme: &'static str) { pub struct SyntectRule; impl CoreRule for SyntectRule { + fn try_run(root: &mut Node, md: &MarkdownIt) -> Result<()> { + Self::_run::(root, md)?; + Ok(()) + } + fn run(root: &mut Node, md: &MarkdownIt) { + let _ = Self::_run::(root, md); + } +} + +impl SyntectRule { + fn _run(root: &mut Node, md: &MarkdownIt) -> Result<()> { let ss = SyntaxSet::load_defaults_newlines(); let ts = ThemeSet::load_defaults(); let theme = &ts.themes[md.ext.get::().copied().unwrap_or_default().0]; @@ -59,10 +71,17 @@ impl CoreRule for SyntectRule { if let Some(content) = content { let mut syntax = None; if let Some(language) = language { - syntax = ss.find_syntax_by_token(&language); + let language = language.trim(); + if !language.is_empty() { + syntax = ss.find_syntax_by_token(language); + + if CAN_FAIL && syntax.is_none() { + return Err(anyhow!("syntax not found for language `{language}`")); + } + } } - let syntax = syntax.unwrap_or_else(|| ss.find_syntax_plain_text()); + let syntax = syntax.unwrap_or_else(|| ss.find_syntax_plain_text()); let html = highlighted_html_for_string(content, &ss, syntax, theme); if let Ok(html) = html { @@ -70,6 +89,117 @@ impl CoreRule for SyntectRule { } } Ok(()) + }) + } +} + +#[cfg(test)] +mod tests { + use indoc::{indoc, formatdoc}; + + fn run(input: &str) -> String { + let md = &mut crate::MarkdownIt::new(); + crate::plugins::cmark::block::fence::add(md); + crate::plugins::extra::syntect::add(md); + let node = md.parse(&(input.to_owned() + "\n")); + node.walk(|node, _| { + assert!(node.srcmap.is_some()); + Ok(()) + }).unwrap(); + node.render() + } + + fn try_run(input: &str) -> crate::Result { + let md = &mut crate::MarkdownIt::new(); + crate::plugins::cmark::block::fence::add(md); + crate::plugins::extra::syntect::add(md); + let node = md.try_parse(&(input.to_owned() + "\n"))?; + node.walk(|node, _| { + assert!(node.srcmap.is_some()); + Ok(()) }).unwrap(); + Ok(node.render()) + } + + #[test] + fn no_lang_prefix() { + let input = indoc!(r#" + ``` + hello + ``` + "#); + + let output = indoc!(r#" +
+            hello
+            
+ "#); + + assert_eq!(run(input), output); + assert_eq!(try_run(input).ok().unwrap(), output); + } + + #[test] + fn rust_highlight() { + let input = indoc!(r#" + ```rust + let hello = "world"; + ``` + "#); + + let output = indoc!(r#" +
+            let
+             hello 
+            = 
+            "world"
+            ;
+            
+ "#); + + assert_eq!(run(input).replace('\n', ""), output.replace('\n', "")); + assert_eq!(try_run(input).ok().unwrap().replace('\n', ""), output.replace('\n', "")); + } + + #[test] + fn rust_highlight_trim_spaces() { + let input = &formatdoc!(r#" + ``` rust{} + let hello = "world"; + ``` + "#, " "); + + let output = indoc!(r#" +
+            let
+             hello 
+            = 
+            "world"
+            ;
+            
+ "#); + + assert_eq!(run(input).replace('\n', ""), output.replace('\n', "")); + assert_eq!(try_run(input).ok().unwrap().replace('\n', ""), output.replace('\n', "")); + } + + #[test] + fn unknown_lang() { + let input = indoc!(r#" + ```some-unknown-language + hello + ``` + "#); + + let output = indoc!(r#" +
+            hello
+            
+ "#); + + assert_eq!(run(input), output); + assert!( + format!("{:?}", try_run(input).err().unwrap()).contains("syntax not found for language") + ); } }