diff --git a/README.md b/README.md index deae9d3..25e58cd 100644 --- a/README.md +++ b/README.md @@ -46,8 +46,8 @@ The dark theme is not built in, but rendered with the [Dark Reader Firefox exten - [ ] Add support for Babylon BGL glossary format - [X] Add support for StarDict format -- [X] Add support for ABBYY Lingvo DSL format[^4] -- [ ] Reduce DSL indexing and parsing time +- [X] Add support for ABBYY Lingvo DSL format +- [X] Reduce DSL parsing time - [X] Reduce the memory footprint of the MDict Reader - [ ] Inline styles to prevent them from being applied to the whole page (The commented-out implementation in [`server/app/dicts/mdict/html_cleaner.py`](/server/app/dicts/mdict/html_cleaner.py) breaks richly-formatted dictionaries.)[^5] - [X] Reorganise APIs (to facilitate dictionary groups) @@ -63,7 +63,6 @@ The dark theme is not built in, but rendered with the [Dark Reader Firefox exten - [X] Make the suggestion size customisable - [X] Allow configure suggestion matching mode, listening address, running mode, etc. via a configuration file, without modifying code - [X] Add a timestamp field to suggestions to avoid newer suggestions being overridden by older ones -- [ ] Use a linter - [ ] Full-text search ### Client-side @@ -79,7 +78,6 @@ The dark theme is not built in, but rendered with the [Dark Reader Firefox exten ### Issue backlog -- [ ] Malformed DSL tags (perhaps I'll write a C++ parser for DSL) - [ ] Make the dialogues children of the root element (How can I do this with nested dialogues?) ## Usage @@ -89,15 +87,18 @@ The dark theme is not built in, but rendered with the [Dark Reader Firefox exten This project utilises some Python 3.10 features, such as the _match_ syntax, and a minimal set of dependencies: ``` PyYAML # for better efficiency, please install libyaml before building the wheel -Flask +Flask # the web framework Flask-Cors -waitress -python-idzip -lxml -python-lzo -xxhash +waitress # the WSGI server +python-idzip # for dictzip +lxml # for XDXF-formatted StarDicts +python-lzo # for v1/v2 MDict +xxhash # for v3 MDict +dsl2html # for DSL ``` +The package [`dsl2html`](https://github.com/Crissium/python-dsl) is mine, and could be used by other projects. + In order to enable the feature of morphology analysis, you need to install the Python package `hunspell` and place the Hunspell dictionaries into `~/.silverdict/hunspell`. In order to enable the feature of Chinese conversion, you need to install the Python package `opencc`. @@ -173,8 +174,6 @@ I would also express my gratitude to Jiang Qian for his suggestions, encourageme [^1]: A note about type hinting in the code: I know for proper type hinting I should use the module `typing`, but the current way is a little easier to write and can be understood by VS Code. -[^4]: I tested with an extremely ill-formed DSL dictionary, and before such devilry my cleaning code is powerless. I will look into how GoldenDict handles this. - [^5]: The use of a custom styling manager such as Dark Reader is recommended until I fix this, as styles for different dictionaries meddle with each other. Or better, if you know CSS, you could just edit the dictionaries' stylesheets to make them less intrusive and individualistic. [^6]: A Russian-speaking friend told me that it is unusual to type Russian on an American keyboard, so whether this feature is useful is open to doubt. diff --git a/server/app/dicts/dsl/layer.py b/server/app/dicts/dsl/layer.py deleted file mode 100644 index fe84d1a..0000000 --- a/server/app/dicts/dsl/layer.py +++ /dev/null @@ -1,103 +0,0 @@ - -import typing - -# -*- coding: utf-8 -*- -# -# Copyright © 2016 Ratijas -# Copyright © 2016-2017 Saeed Rasooli -# -# This program is a free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# You can get a copy of GNU General Public License along this program -# But you can always get it from http://www.gnu.org/licenses/gpl.txt -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -""" -internal stuff. Layer class -""" - -from typing import Iterable - -from . import tag - - -class Layer(object): - - __slots__ = ["tags", "text"] - - def __init__(self: "typing.Self", stack: "list[Layer]") -> None: - stack.append(self) - self.tags = set() - self.text = "" - - def __contains__(self: "typing.Self", tag: "tag.Tag") -> bool: - return tag in self.tags - - def __repr__(self: "typing.Self") -> str: - tags = "{" + ", ".join(map(str, self.tags)) + "}" - return f"Layer({tags}, {self.text!r})" - - def __eq__(self: "typing.Self", other: "Layer") -> bool: - """ - mostly for unittest. - """ - return self.text == other.text and self.tags == other.tags - - -i_and_c = {tag.Tag("i", "i"), tag.Tag("c", "c")} -p_tag = tag.Tag("p", "p") - - -def close_tags( - stack: "Iterable[Layer]", - tags: "Iterable[tag.Tag]", - layer_index: bool = -1, -) -> None: - """ - close given tags on layer with index `layer_index`. - """ - if layer_index == -1: - layer_index = len(stack) - 1 - layer = stack[layer_index] - - if layer.text: - tags = set.intersection(layer.tags, tags) - if not tags: - return - - # shortcut: [i][c] equivalent to [p] - if tags.issuperset(i_and_c): - tags -= i_and_c - tags.add(p_tag) - layer.tags -= i_and_c - # no need to layer.tags.add() - - ordered_tags = tag.canonical_order(tags) - layer.text = "".join( - [f"[{x.opening}]" for x in ordered_tags] + - [layer.text] + - [f"[/{x.closing}]" for x in reversed(ordered_tags)], - ) - - # remove tags from layer - layer.tags -= tags - if layer.tags or layer_index == 0: - return - superlayer = stack[layer_index - 1] - superlayer.text += layer.text - del stack[layer_index] - - -def close_layer(stack: "list[Layer]") -> None: - """ - close top layer on stack. - """ - if not stack: - return - tags = stack[-1].tags - close_tags(stack, tags) diff --git a/server/app/dicts/dsl/main.py b/server/app/dicts/dsl/main.py deleted file mode 100644 index 4954065..0000000 --- a/server/app/dicts/dsl/main.py +++ /dev/null @@ -1,286 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright © 2016 Ratijas -# Copyright © 2016-2018 Saeed Rasooli -# -# This program is a free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# You can get a copy of GNU General Public License along this program -# But you can always get it from http://www.gnu.org/licenses/gpl.txt -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -""" -exposed API lives here. -""" - - -import copy -import re -import typing -from typing import Iterable - -from . import layer as _layer -from . import tag as _tag - - -def process_closing_tags( - stack: "Iterable[_layer.Layer]", - tags: "Iterable[str]", -) -> None: - """ - close `tags`, closing some inner layers if necessary. - """ - index = len(stack) - 1 - for tag in copy.copy(tags): - index_for_tag = _tag.index_of_layer_containing_tag(stack, tag) - if index_for_tag is not None: - index = min(index, index_for_tag) - else: - tags.remove(tag) - - if not tags: - return - - to_open = set() - for layer in stack[:index:-1]: - for lt in layer.tags: - if lt.closing not in tags: - to_open.add(lt) - _layer.close_layer(stack) - - to_close = set() - layer = stack[index] - for lt in layer.tags: - if lt.closing in tags: - to_close.add(lt) - _layer.close_tags(stack, to_close, index) - - if to_open: - _layer.Layer(stack) - stack[-1].tags = to_open - - -OPEN = 1 -CLOSE = 2 -TEXT = 3 -ACTION = "Literal[OPEN, CLOSE, TEXT]" - -BRACKET_L = "\0\1" -BRACKET_R = "\0\2" - -# precompiled regexs -# re_m_tag_with_content = re.compile(r"(\[m\d\])(.*?)(\[/m\])") -re_non_escaped_bracket = re.compile(r"(? None: - r""" - :param tags: set (or any other iterable) of tags where each tag is a - string or two-tuple. if string, it is tag name without - brackets, must be constant, i.e. non-save regex characters - will be escaped, e.g.: "i", "sub", "*". - if 2-tuple, then first item is tag"s base name, and - second is its extension for opening tag, - e.g.: ("c", r" (\w+)"), ("m", r"\d") - """ - tags_ = set() - for tag, ext_re in ( - t if isinstance(t, tuple) else (t, "") - for t in tags - ): - tag_re = re.escape(tag) - re_tag_open = re.compile(fr"\[{tag_re}{ext_re}\]") - tags_.add((tag, tag_re, ext_re, re_tag_open)) - self.tags = frozenset(tags_) - - def parse(self: "typing.Self", line: str) -> str: - r""" - parse dsl markup in `line` and return clean valid dsl markup. - - :type line: str - :param line: line with dsl formatting. - - :rtype: str - """ - line = self.put_brackets_away(line) - line = self._parse(line) - return self.bring_brackets_back(line) - - def _parse(self: "typing.Self", line: str) -> str: - items = self._split_line_by_tags(line) - return self._tags_and_text_loop(items) - - def _split_line_by_tags( - self: "typing.Self", - line: str, - ) -> "Iterable[[OPEN, _tag.Tag] | [CLOSE, str] | [TEXT, str]]": - """ - split line into chunks, each chunk is whether opening / closing - tag or text. - - return iterable of two-tuples. first element is item's type, one of: - - OPEN, second element is Tag object - - CLOSE, second element is str with closed tag's name - - TEXT, second element is str - - :param line: str - :return: Iterable - """ - ptr = 0 - while ptr < len(line): - bracket = line.find("[", ptr) - if bracket != -1: - chunk = line[ptr:bracket] - else: - chunk = line[ptr:] - - if chunk: - yield TEXT, chunk - - if bracket == -1: - break - - ptr = bracket - # at least two chars after opening bracket: - bracket = line.find("]", ptr + 2) - if line[ptr + 1] == "/": - yield CLOSE, line[ptr + 2:bracket] - ptr = bracket + 1 - continue - - for tag, _, _, re_tag_open in self.tags: - if re_tag_open.match(line[ptr:bracket + 1]): - yield OPEN, _tag.Tag(line[ptr + 1:bracket], tag) - break - else: - tag = line[ptr + 1:bracket] - yield OPEN, _tag.Tag(tag, tag) - ptr = bracket + 1 - - @staticmethod - def _tags_and_text_loop( - tags_and_text: "Iterable[[OPEN, _tag.Tag] | [CLOSE, str] | [TEXT, str]]", - ) -> str: - """ - parse chunks one by one. - """ - state = TEXT - stack = [] - closings = set() - - for item_t, item in tags_and_text: - # TODO: break into functions like: - # state = handle_tag_open(_tag, stack, closings, state) - if item_t is OPEN: - if _tag.was_opened(stack, item) and item.closing not in closings: - continue - - if item.closing == "m" and len(stack) >= 1: - # close all layers. [m*] tags can only appear - # at top layer. - # note: do not reopen tags that were marked as - # closed already. - to_open = set.union(*( - {t for t in layer.tags if t.closing not in closings} - for layer in stack - )) - for _ in range(len(stack)): - _layer.close_layer(stack) - # assert len(stack) == 1 - # assert not stack[0].tags - _layer.Layer(stack) - stack[-1].tags = to_open - - elif state is CLOSE: - process_closing_tags(stack, closings) - - if not stack or stack[-1].text: - _layer.Layer(stack) - - stack[-1].tags.add(item) - state = OPEN - continue - - if item_t is CLOSE: - if state in (OPEN, TEXT): - closings.clear() - closings.add(item) - state = CLOSE - continue - - if item_t is TEXT: - if state is CLOSE: - process_closing_tags(stack, closings) - - if not stack: - _layer.Layer(stack) - stack[-1].text += item - state = TEXT - continue - - if state is CLOSE and closings: - process_closing_tags(stack, closings) - # shutdown unclosed tags - return "".join(layer.text for layer in stack) - - def put_brackets_away(self: "typing.Self", line: str) -> str: - r"""put away \[, \] and brackets that does not belong to any of given tags. - - :rtype: str - """ - clean_line = "" - startswith_tag = _startswith_tag_cache.get(self.tags, None) - if startswith_tag is None: - openings = "|".join(f"{_[1]}{_[2]}" for _ in self.tags) - closings = "|".join(_[1] for _ in self.tags) - startswith_tag = re.compile( - fr"(?:(?:{openings})|/(?:{closings}))\]", - ) - _startswith_tag_cache[self.tags] = startswith_tag - for i, chunk in enumerate(re_non_escaped_bracket.split(line)): - if i != 0: - m = startswith_tag.match(chunk) - if m: - clean_line += "[" + \ - m.group() + \ - chunk[m.end():].replace("[", BRACKET_L)\ - .replace("]", BRACKET_R) - else: - clean_line += BRACKET_L + chunk.replace("[", BRACKET_L)\ - .replace("]", BRACKET_R) - else: # first chunk - clean_line += chunk.replace("[", BRACKET_L)\ - .replace("]", BRACKET_R) - return clean_line - - @staticmethod - def bring_brackets_back(line: str) -> str: - return line.replace(BRACKET_L, "[").replace(BRACKET_R, "]") diff --git a/server/app/dicts/dsl/markup_converter.py b/server/app/dicts/dsl/markup_converter.py index aca2eb1..ef42c61 100644 --- a/server/app/dicts/dsl/markup_converter.py +++ b/server/app/dicts/dsl/markup_converter.py @@ -1,99 +1,14 @@ -import re import os import shutil -import html +import dsl import concurrent.futures from zipfile import ZipFile -from xml.sax.saxutils import escape, quoteattr -from .main import DSLParser import logging logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -# order matters, a lot. -shortcuts = [ - # canonical: m > * > ex > i > c - ( - '[m1](?:-{2,})[/m]', - '
', - ), - ( - '[m(\\d)](?:-{2,})[/m]', - '
', - ), -] - -shortcuts = [ - ( - re.compile(repl.replace('[', '\\[').replace('*]', '\\*]')), - sub, - ) for (repl, sub) in shortcuts -] - -def apply_shortcuts(text: 'str') -> 'str': - for pattern, sub in shortcuts: - text = pattern.sub(sub, text) - return text - -htmlEntityPattern = re.compile(r"&#?\w+;") - -def unescape(text: str) -> str: - def fixup(m: "re.Match") -> str: - text = m.group(0) - if text[:2] == "&#": - # character reference - try: - if text[:3] == "&#x": - i = int(text[3:-1], 16) - else: - i = int(text[2:-1]) - except ValueError: - pass - else: - try: - return chr(i) - except ValueError: - # f"\\U{i:08x}", but no fb"..." - return (b"\\U%08x" % i).decode("unicode-escape") - else: - # named entity - try: - text = chr(html.entities.name2codepoint[text[1:-1]]) - except KeyError: - pass - return text # leave as is - return htmlEntityPattern.sub(fixup, text) - -def make_a_href(s: 'str', href_root: 'str') -> 'str': - return f"{escape(s)}" - class DSLConverter: - re_brackets_blocks = re.compile(r'\{\{[^}]*\}\}') - re_lang_open = re.compile(r'(?>') - re_remnant_m = re.compile(r'\[(?:/m|m[^]]*)\]') - - _REF_PATTERN = r'<<([^&]+)>>' - - IMAGE_EXTENSIONS = ['jpg', 'jpeg', 'png', 'gif', 'svg', 'bmp', 'tif', 'tiff', 'ico', 'webp', 'avif', 'apng', 'jfif', 'pjpeg', 'pjp'] - IMAGE_EXTENSIONS += [extension.upper() for extension in IMAGE_EXTENSIONS] - SOUND_EXTENSIONS = ['mp3', 'ogg', 'wav', 'wave'] - SOUND_EXTENSIONS += [extension.upper() for extension in SOUND_EXTENSIONS] - VIDEO_EXTENSIONS = ['mp4', 'webm', 'ogv', 'ogg'] - VIDEO_EXTENSIONS += [extension.upper() for extension in VIDEO_EXTENSIONS] - - def _replace_ref_match(self, match: 're.Match') -> 'str': - word = match.group(1) - return f'{word}' - - def ref_sub(self, x: 're.Match') -> 'str': - return make_a_href(unescape(x.groups()[0]), self._lookup_url_root) - def __init__(self, dict_filename: 'str', dict_name: 'str', resources_dir: 'str', resources_extracted: 'bool') -> None: if not resources_extracted: base, extension = os.path.splitext(dict_filename) @@ -119,162 +34,19 @@ def __init__(self, dict_filename: 'str', dict_name: 'str', resources_dir: 'str', except AttributeError: self._resources_filename = '' + self._name_dict = dict_name self._resources_extracted = resources_extracted self._resources_dir = resources_dir - self._href_root = '/api/cache/' + dict_name + '/' - self._lookup_url_root = '/api/lookup/' + dict_name + '/' - self._parser = DSLParser() - - def _clean_tags(self, line: 'str') -> 'str': - # remove {{...}} blocks - line = self.re_brackets_blocks.sub('', line) - - # remove trn tags - line = line.replace('[trn]', '').replace('[/trn]', '').replace('[trs]', '').replace('[/trs]','').replace('[!trn]', '').replace('[/!trn]', '').replace('[!trs]', '').replace('[/!trs]', '') - - # remove lang tags - line = self.re_lang_open.sub('', line).replace('[/lang]', '') - - # remove com tags - line = line.replace('[com]', '').replace('[/com]', '') - - # escape html special characters like '<' and '>' - line = html.escape(html.unescape(line)) - - # remove t tags - line = line.replace( - '[t]', - '', - ) - line = line.replace('[/t]', '') - - line = self._parser.parse(line) - - line = self.re_end.sub('
', line) - - # paragraph, part one: before shortcuts. - line = line.replace('[m]', '[m1]') - # if text somewhere contains "[m_]" tag like - # "[b]I[/b][m1] [c][i]conj.[/i][/c][/m][m1]1) ...[/m]" - # then leave it alone. only wrap in "[m1]" when no "m" tag found at all. - if not self.re_m_open.search(line): - line = '[m1]%s[/m]' % line - - line = apply_shortcuts(line) - # paragraph, part two: if any not shourcuted [m] left? - line = self.re_m.sub(r'
\g<2>
', line) - - # text formats - line = line.replace("[']", "").replace("[/']", "") - line = line.replace("[b]", "").replace("[/b]", "") - line = line.replace("[i]", "").replace("[/i]", "") - line = line.replace("[u]", "").replace("[/u]", "") - line = line.replace("[sup]", "").replace("[/sup]", "") - line = line.replace("[sub]", "").replace("[/sub]", "") - - # color - line = line.replace("[c]", "") - line = self.re_c_open_color.sub("\">", line) - line = line.replace("[/c]", "") - - # example zone - line = line.replace("[ex]", "") - line = line.replace("[/ex]", "") - - # secondary zone - line = line.replace("[*]", "")\ - .replace("[/*]", "") - - # abbrev. label - line = line.replace("[p]", "") - line = line.replace("[/p]", "") - - # cross reference - line = line.replace("[ref]", "<<").replace("[/ref]", ">>") - line = line.replace("[url]", "<<").replace("[/url]", ">>") - line = self.re_ref.sub(self.ref_sub, line) - - # \[...\] - line = line.replace("\\[", "[").replace("\\]", "]") - - # preserve newlines - if not line.endswith('>') and not line.endswith('[/m]'): - line += '
' - - return line - - def _correct_media_references(self, html: 'str') -> 'tuple[str, list[str]]': - files_to_be_extracted = [] - s_tag_end_position = 0 - autoplay_string = 'autoplay' - while True: - s_tag_begin_position = html.find('[s]', s_tag_end_position) - if s_tag_begin_position == -1: - break - s_tag_end_position = html.find('[/s]', s_tag_begin_position) - if s_tag_end_position == -1: - break - media_name = html[s_tag_begin_position+len('[s]'):s_tag_end_position] - - if not os.path.isfile(os.path.join(self._resources_dir, media_name)): - files_to_be_extracted.append(media_name) - if self._resources_extracted: - logger.warning('Media file %s not found in resources directory %s' % (media_name, self._resources_dir)) - - media_ref = self._href_root + media_name - if media_name.split('.')[-1] in self.IMAGE_EXTENSIONS: - proper_media_html = '' % media_ref - elif media_name.split('.')[-1] in self.SOUND_EXTENSIONS: - proper_media_html = '' % (autoplay_string, media_ref, media_name) - autoplay_string = '' - elif media_name.split('.')[-1] in self.VIDEO_EXTENSIONS: - proper_media_html = '' % media_ref - else: - proper_media_html = '%s' % (media_ref, media_name) - html = html.replace('[s]%s[/s]' % media_name, proper_media_html) - return html, files_to_be_extracted - def _extract_files(self, files_to_be_extracted: 'list[str]') -> 'None': - # ZipFile's extractall() is too slow, so we use a thread pool to extract files in parallel. - with ZipFile(self._resources_filename) as zip_file: - with concurrent.futures.ThreadPoolExecutor(len(files_to_be_extracted)) as executor: - executor.map(zip_file.extract, files_to_be_extracted, [self._resources_dir] * len(files_to_be_extracted)) - - def _clean_html(self, html: 'str') -> 'str': - # remove strange '\\ ' - html = html.replace('\\ ', '') - - # remove remnant [m] tags - html = self.re_remnant_m.sub('', html) - - # make references - html = re.sub(self._REF_PATTERN, self._replace_ref_match, html) - - html, files_to_be_extracted = self._correct_media_references(html) if not self._resources_extracted and files_to_be_extracted and self._resources_filename and os.path.isfile(self._resources_filename): - self._extract_files(files_to_be_extracted) - - return html + # ZipFile's extractall() is too slow, so we use a thread pool to extract files in parallel. + with ZipFile(self._resources_filename) as zip_file: + with concurrent.futures.ThreadPoolExecutor(len(files_to_be_extracted)) as executor: + executor.map(zip_file.extract, files_to_be_extracted, [self._resources_dir] * len(files_to_be_extracted)) - # def convert(self, text: 'str', headword: 'str') -> 'str': def convert(self, record: 'tuple[str, str]') -> 'str': text, headword = record - # for line in text.splitlines(): - # if line.startswith(' [m') and not line.endswith('[/m]'): - # text = text.replace(line, line + '[/m]') - - # text = self._clean_tags(text) - lines = text.splitlines() - definition_html = [] - for line in lines: - if line.startswith(' [m') and not line.endswith('[/m]'): - line += '[/m]' - definition_html.append(self._clean_tags(line)) - text = '\n'.join(definition_html) - - text = self._clean_html(text) - - text = '

%s

' % headword + text - - return text + text, files_to_be_extracted = dsl.to_html(text, self._name_dict) + self._extract_files(files_to_be_extracted) + return '

%s

' % headword + text diff --git a/server/app/dicts/dsl/tag.py b/server/app/dicts/dsl/tag.py deleted file mode 100644 index 7fb0290..0000000 --- a/server/app/dicts/dsl/tag.py +++ /dev/null @@ -1,82 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright © 2016 Ratijas -# Copyright © 2016-2017 Saeed Rasooli -# -# This program is a free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# You can get a copy of GNU General Public License along this program -# But you can always get it from http://www.gnu.org/licenses/gpl.txt -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -""" -internal stuff. Tag class -""" - - -from collections import namedtuple -from typing import Iterable - -from . import layer as _layer - -Tag = namedtuple("Tag", ["opening", "closing"]) - -Tag.__repr__ = lambda tag: \ - f"Tag({tag.opening!r})" if tag.opening == tag.closing \ - else f"Tag({tag.opening!r}, {tag.closing!r})" - -predefined = [ - "m", - "*", - "ex", - "i", - "c", -] - - -def was_opened(stack: "Iterable[_layer.Layer]", tag: "Tag") -> bool: - """ - check if tag was opened at some layer before. - """ - if not len(stack): - return False - layer = stack[-1] - if tag in layer: - return True - return was_opened(stack[:-1], tag) - - -def canonical_order(tags: "Iterable[Tag]") -> "list[Tag]": - """ - arrange tags in canonical way, where (outermost to innermost): - m > * > ex > i > c - with all other tags follow them in alphabetical order. - """ - result = [] - tags = list(tags) - for predef in predefined: - t = next((t for t in tags if t.closing == predef), None) - if t: - result.append(t) - tags.remove(t) - result.extend(sorted(tags, key=lambda x: x.opening)) - return result - - -def index_of_layer_containing_tag( - stack: "Iterable[_layer.Layer]", - tag: str, -) -> "int | None": - """ - return zero based index of layer with `tag` or None - """ - for i, layer in enumerate(reversed(stack)): - for t in layer.tags: - if t.closing == tag: - return len(stack) - i - 1 - return None