From 854da28b55c15728988197a393d5e7107cd3b0b9 Mon Sep 17 00:00:00 2001 From: Levente Hunyadi Date: Fri, 4 Oct 2024 20:39:06 +0200 Subject: [PATCH] Add support for emojis --- integration_tests/.gitignore | 1 + integration_tests/test_api.py | 1 + integration_tests/test_csf.py | 29 ++++++++++++ md2conf/api.py | 21 +-------- md2conf/converter.py | 87 ++++++++++++++++++++++++++++++++++- md2conf/emoji.py | 48 +++++++++++++++++++ md2conf/util.py | 19 ++++++++ sample/index.md | 10 ++++ tests/source/.gitignore | 1 + tests/target/.gitignore | 1 + tests/test_conversion.py | 15 +++--- tests/test_processor.py | 2 +- 12 files changed, 206 insertions(+), 29 deletions(-) create mode 100644 integration_tests/.gitignore create mode 100644 integration_tests/test_csf.py create mode 100644 md2conf/emoji.py create mode 100644 md2conf/util.py create mode 100644 tests/source/.gitignore create mode 100644 tests/target/.gitignore diff --git a/integration_tests/.gitignore b/integration_tests/.gitignore new file mode 100644 index 0000000..8b0aee8 --- /dev/null +++ b/integration_tests/.gitignore @@ -0,0 +1 @@ +/example.csf diff --git a/integration_tests/test_api.py b/integration_tests/test_api.py index 79bb56a..991a84e 100644 --- a/integration_tests/test_api.py +++ b/integration_tests/test_api.py @@ -26,6 +26,7 @@ class TestAPI(unittest.TestCase): out_dir: Path + sample_dir: Path def setUp(self) -> None: test_dir = Path(__file__).parent diff --git a/integration_tests/test_csf.py b/integration_tests/test_csf.py new file mode 100644 index 0000000..59a2609 --- /dev/null +++ b/integration_tests/test_csf.py @@ -0,0 +1,29 @@ +import unittest +from pathlib import Path + +from md2conf.api import ConfluenceAPI +from md2conf.converter import content_to_string + +TEST_SPACE = "DAP" +TEST_PAGE_ID = "86918529216" + + +class TestConfluenceStorageFormat(unittest.TestCase): + test_dir: Path + + def setUp(self) -> None: + self.test_dir = Path(__file__).parent + parent_dir = self.test_dir.parent + + self.sample_dir = parent_dir / "sample" + + def test_markdown(self) -> None: + with ConfluenceAPI() as api: + page = api.get_page(TEST_PAGE_ID, space_key=TEST_SPACE) + + with open(self.test_dir / "example.csf", "w") as f: + f.write(content_to_string(page.content)) + + +if __name__ == "__main__": + unittest.main() diff --git a/md2conf/api.py b/md2conf/api.py index a6c24b5..a6e6124 100644 --- a/md2conf/api.py +++ b/md2conf/api.py @@ -2,7 +2,6 @@ import json import logging import mimetypes -import sys import typing from contextlib import contextmanager from dataclasses import dataclass @@ -15,6 +14,7 @@ from .converter import ParseError, sanitize_confluence from .properties import ConfluenceError, ConfluenceProperties +from .util import removeprefix # a JSON type with possible `null` values JsonType = Union[ @@ -44,25 +44,6 @@ def build_url(base_url: str, query: Optional[Dict[str, str]] = None) -> str: return urlunparse(url_parts) -if sys.version_info >= (3, 9): - - def removeprefix(string: str, prefix: str) -> str: - "If the string starts with the prefix, return the string without the prefix; otherwise, return the original string." - - return string.removeprefix(prefix) - -else: - - def removeprefix(string: str, prefix: str) -> str: - "If the string starts with the prefix, return the string without the prefix; otherwise, return the original string." - - if string.startswith(prefix): - prefix_len = len(prefix) - return string[prefix_len:] - else: - return string - - LOGGER = logging.getLogger(__name__) diff --git a/md2conf/converter.py b/md2conf/converter.py index 10b1bc0..22dddb2 100644 --- a/md2conf/converter.py +++ b/md2conf/converter.py @@ -7,9 +7,10 @@ import re import sys import uuid +import xml.etree.ElementTree from dataclasses import dataclass from pathlib import Path -from typing import Dict, List, Literal, Optional, Tuple +from typing import Any, Dict, List, Literal, Optional, Tuple from urllib.parse import ParseResult, urlparse, urlunparse import lxml.etree as ET @@ -55,6 +56,27 @@ def is_relative_url(url: str) -> bool: return not bool(urlparts.scheme) and not bool(urlparts.netloc) +def emoji_generator( + index: str, + shortname: str, + alias: Optional[str], + uc: Optional[str], + alt: str, + title: Optional[str], + category: Optional[str], + options: Dict[str, Any], + md: markdown.Markdown, +) -> xml.etree.ElementTree.Element: + name = (alias or shortname).strip(":") + span = xml.etree.ElementTree.Element("span", {"data-emoji": name}) + if uc is not None: + # convert series of Unicode code point hexadecimal values into characters + span.text = "".join(chr(int(item, base=16)) for item in uc.split("-")) + else: + span.text = alt + return span + + def markdown_to_html(content: str) -> str: return markdown.markdown( content, @@ -62,11 +84,17 @@ def markdown_to_html(content: str) -> str: "admonition", "markdown.extensions.tables", "markdown.extensions.fenced_code", + "pymdownx.emoji", "pymdownx.magiclink", "pymdownx.tilde", "sane_lists", "md_in_html", ], + extension_configs={ + "pymdownx.emoji": { + "emoji_generator": emoji_generator, + } + }, ) @@ -81,6 +109,7 @@ def _elements_from_strings(dtd_path: Path, items: List[str]) -> ET._Element: parser = ET.XMLParser( remove_blank_text=True, + remove_comments=True, strip_cdata=False, load_dtd=True, ) @@ -678,6 +707,23 @@ def _transform_section(self, elem: ET._Element) -> ET._Element: AC("rich-text-body", {}, *list(elem)), ) + def _transform_emoji(self, elem: ET._Element) -> ET._Element: + shortname = elem.attrib.get("data-emoji", "") + alt = elem.text or "" + + # + # + # + return AC( + "emoticon", + { + # use "blue-star" as a placeholder name to ensure wiki page loads in timely manner + ET.QName(namespaces["ac"], "name"): "blue-star", + ET.QName(namespaces["ac"], "emoji-shortname"): f":{shortname}:", + ET.QName(namespaces["ac"], "emoji-fallback"): alt, + }, + ) + def transform(self, child: ET._Element) -> Optional[ET._Element]: # normalize line breaks to regular space in element text if child.text: @@ -764,6 +810,9 @@ def transform(self, child: ET._Element) -> Optional[ET._Element]: elif child.tag == "pre" and len(child) == 1 and child[0].tag == "code": return self._transform_block(child[0]) + elif child.tag == "span" and child.attrib.has_key("data-emoji"): + return self._transform_emoji(child) + return None @@ -963,3 +1012,39 @@ def elements_to_string(root: ET._Element) -> str: return m.group(1) else: raise ValueError("expected: Confluence content") + + +def _content_to_string(dtd_path: Path, content: str) -> str: + parser = ET.XMLParser( + remove_blank_text=True, + remove_comments=True, + strip_cdata=False, + load_dtd=True, + ) + + ns_attr_list = "".join( + f' xmlns:{key}="{value}"' for key, value in namespaces.items() + ) + + data = [ + '', + f'' + f"", + ] + data.append(content) + data.append("") + + tree = ET.fromstringlist(data, parser=parser) + return ET.tostring(tree, pretty_print=True).decode("utf-8") + + +def content_to_string(content: str) -> str: + "Converts a Confluence Storage Format document returned by the API into a readable XML document." + + if sys.version_info >= (3, 9): + resource_path = resources.files(__package__).joinpath("entities.dtd") + with resources.as_file(resource_path) as dtd_path: + return _content_to_string(dtd_path, content) + else: + with resources.path(__package__, "entities.dtd") as dtd_path: + return _content_to_string(dtd_path, content) diff --git a/md2conf/emoji.py b/md2conf/emoji.py new file mode 100644 index 0000000..08f9f52 --- /dev/null +++ b/md2conf/emoji.py @@ -0,0 +1,48 @@ +import pathlib + +import pymdownx.emoji1_db as emoji_db + + +def generate_source(path: pathlib.Path) -> None: + "Generates a source Markdown document for testing emojis." + + emojis = emoji_db.emoji + + with open(path, "w") as f: + print("", file=f) + print("", file=f) + print(file=f) + print("## Emoji", file=f) + print(file=f) + print("| Icon | Emoji code |", file=f) + print("| ---- | ---------- |", file=f) + for key in emojis.keys(): + key = key.strip(":") + print(f"| :{key}: | `:{key}:` |", file=f) + + +def generate_target(path: pathlib.Path) -> None: + "Generates a target Confluence Storage Format (XML) document for testing emojis." + + emojis = emoji_db.emoji + + with open(path, "w") as f: + print('', file=f) + print("", file=f) + print("

This page has been generated with a tool.

", file=f) + print("
", file=f) + print("
", file=f) + print("

Emoji

", file=f) + print("", file=f) + print("", file=f) + print("", file=f) + for key, data in emojis.items(): + key = key.strip(":") + unicode = "".join(f"&#x{item};" for item in data["unicode"].split("-")) + + print( + f'', + file=f, + ) + print("", file=f) + print("
IconEmoji code
:{key}:
", file=f) diff --git a/md2conf/util.py b/md2conf/util.py new file mode 100644 index 0000000..e2ebb96 --- /dev/null +++ b/md2conf/util.py @@ -0,0 +1,19 @@ +import sys + +if sys.version_info >= (3, 9): + + def removeprefix(string: str, prefix: str) -> str: + "If the string starts with the prefix, return the string without the prefix; otherwise, return the original string." + + return string.removeprefix(prefix) + +else: + + def removeprefix(string: str, prefix: str) -> str: + "If the string starts with the prefix, return the string without the prefix; otherwise, return the original string." + + if string.startswith(prefix): + prefix_len = len(prefix) + return string[prefix_len:] + else: + return string diff --git a/sample/index.md b/sample/index.md index c8751d9..8c48371 100644 --- a/sample/index.md +++ b/sample/index.md @@ -132,18 +132,28 @@ Markdown has no native support for admonitions. Admonitions that follow the [Pyt Alerts are a Markdown extension based on the blockquote syntax that you can use to emphasize critical information. [GitHub](https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#alerts) and [GitLab](https://docs.gitlab.com/ee/development/documentation/styleguide/#alert-boxes) display them with distinctive colors and icons to indicate the significance of the content. When converted to Confluence, they are represented as structured macros, which are displayed as info panels. +Note: + > [!NOTE] > Useful information that users should know, even when skimming content. +Tip: + > [!TIP] > Helpful advice for doing things better or more easily. +Important: + > [!IMPORTANT] > Key information users need to know to achieve their goal. +Warning: + > [!WARNING] > Urgent info that needs immediate user attention to avoid problems. +Caution: + > [!CAUTION] > Advises about risks or negative outcomes of certain actions. diff --git a/tests/source/.gitignore b/tests/source/.gitignore new file mode 100644 index 0000000..08f575f --- /dev/null +++ b/tests/source/.gitignore @@ -0,0 +1 @@ +/emoji.md diff --git a/tests/target/.gitignore b/tests/target/.gitignore new file mode 100644 index 0000000..6f7b873 --- /dev/null +++ b/tests/target/.gitignore @@ -0,0 +1 @@ +/emoji.xml diff --git a/tests/test_conversion.py b/tests/test_conversion.py index 9ce64c6..c02cbdf 100644 --- a/tests/test_conversion.py +++ b/tests/test_conversion.py @@ -2,10 +2,10 @@ import os import os.path import re -import shutil import unittest from pathlib import Path +import md2conf.emoji as emoji from md2conf.converter import ( ConfluenceDocument, ConfluenceDocumentOptions, @@ -36,21 +36,22 @@ def standardize(content: str) -> str: class TestConversion(unittest.TestCase): - out_dir: Path + source_dir: Path + target_dir: Path def setUp(self) -> None: self.maxDiff = None test_dir = Path(__file__).parent - self.out_dir = test_dir / "output" self.source_dir = test_dir / "source" self.target_dir = test_dir / "target" - os.makedirs(self.out_dir, exist_ok=True) - - def tearDown(self) -> None: - shutil.rmtree(self.out_dir) def test_markdown(self) -> None: + if not os.path.exists(self.source_dir / "emoji.md"): + emoji.generate_source(self.source_dir / "emoji.md") + if not os.path.exists(self.target_dir / "emoji.xml"): + emoji.generate_target(self.target_dir / "emoji.xml") + matcher = Matcher( MatcherOptions(source=".mdignore", extension="md"), self.source_dir ) diff --git a/tests/test_processor.py b/tests/test_processor.py index 90d371a..b2682db 100644 --- a/tests/test_processor.py +++ b/tests/test_processor.py @@ -29,7 +29,7 @@ def setUp(self) -> None: def tearDown(self) -> None: shutil.rmtree(self.out_dir) - def atest_process_document(self) -> None: + def test_process_document(self) -> None: options = ConfluenceDocumentOptions( ignore_invalid_url=False, generated_by="Test Case",