Skip to content

Commit

Permalink
Add support for emojis
Browse files Browse the repository at this point in the history
  • Loading branch information
hunyadi committed Oct 4, 2024
1 parent 9593ede commit 854da28
Show file tree
Hide file tree
Showing 12 changed files with 206 additions and 29 deletions.
1 change: 1 addition & 0 deletions integration_tests/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/example.csf
1 change: 1 addition & 0 deletions integration_tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

class TestAPI(unittest.TestCase):
out_dir: Path
sample_dir: Path

def setUp(self) -> None:
test_dir = Path(__file__).parent
Expand Down
29 changes: 29 additions & 0 deletions integration_tests/test_csf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import unittest
from pathlib import Path

from md2conf.api import ConfluenceAPI
from md2conf.converter import content_to_string

TEST_SPACE = "DAP"
TEST_PAGE_ID = "86918529216"


class TestConfluenceStorageFormat(unittest.TestCase):
test_dir: Path

def setUp(self) -> None:
self.test_dir = Path(__file__).parent
parent_dir = self.test_dir.parent

self.sample_dir = parent_dir / "sample"

def test_markdown(self) -> None:
with ConfluenceAPI() as api:
page = api.get_page(TEST_PAGE_ID, space_key=TEST_SPACE)

with open(self.test_dir / "example.csf", "w") as f:
f.write(content_to_string(page.content))


if __name__ == "__main__":
unittest.main()
21 changes: 1 addition & 20 deletions md2conf/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import json
import logging
import mimetypes
import sys
import typing
from contextlib import contextmanager
from dataclasses import dataclass
Expand All @@ -15,6 +14,7 @@

from .converter import ParseError, sanitize_confluence
from .properties import ConfluenceError, ConfluenceProperties
from .util import removeprefix

# a JSON type with possible `null` values
JsonType = Union[
Expand Down Expand Up @@ -44,25 +44,6 @@ def build_url(base_url: str, query: Optional[Dict[str, str]] = None) -> str:
return urlunparse(url_parts)


if sys.version_info >= (3, 9):

def removeprefix(string: str, prefix: str) -> str:
"If the string starts with the prefix, return the string without the prefix; otherwise, return the original string."

return string.removeprefix(prefix)

else:

def removeprefix(string: str, prefix: str) -> str:
"If the string starts with the prefix, return the string without the prefix; otherwise, return the original string."

if string.startswith(prefix):
prefix_len = len(prefix)
return string[prefix_len:]
else:
return string


LOGGER = logging.getLogger(__name__)


Expand Down
87 changes: 86 additions & 1 deletion md2conf/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
import re
import sys
import uuid
import xml.etree.ElementTree
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Literal, Optional, Tuple
from typing import Any, Dict, List, Literal, Optional, Tuple
from urllib.parse import ParseResult, urlparse, urlunparse

import lxml.etree as ET
Expand Down Expand Up @@ -55,18 +56,45 @@ def is_relative_url(url: str) -> bool:
return not bool(urlparts.scheme) and not bool(urlparts.netloc)


def emoji_generator(
index: str,
shortname: str,
alias: Optional[str],
uc: Optional[str],
alt: str,
title: Optional[str],
category: Optional[str],
options: Dict[str, Any],
md: markdown.Markdown,
) -> xml.etree.ElementTree.Element:
name = (alias or shortname).strip(":")
span = xml.etree.ElementTree.Element("span", {"data-emoji": name})
if uc is not None:
# convert series of Unicode code point hexadecimal values into characters
span.text = "".join(chr(int(item, base=16)) for item in uc.split("-"))
else:
span.text = alt
return span


def markdown_to_html(content: str) -> str:
return markdown.markdown(
content,
extensions=[
"admonition",
"markdown.extensions.tables",
"markdown.extensions.fenced_code",
"pymdownx.emoji",
"pymdownx.magiclink",
"pymdownx.tilde",
"sane_lists",
"md_in_html",
],
extension_configs={
"pymdownx.emoji": {
"emoji_generator": emoji_generator,
}
},
)


Expand All @@ -81,6 +109,7 @@ def _elements_from_strings(dtd_path: Path, items: List[str]) -> ET._Element:

parser = ET.XMLParser(
remove_blank_text=True,
remove_comments=True,
strip_cdata=False,
load_dtd=True,
)
Expand Down Expand Up @@ -678,6 +707,23 @@ def _transform_section(self, elem: ET._Element) -> ET._Element:
AC("rich-text-body", {}, *list(elem)),
)

def _transform_emoji(self, elem: ET._Element) -> ET._Element:
shortname = elem.attrib.get("data-emoji", "")
alt = elem.text or ""

# <ac:emoticon ac:name="wink" ac:emoji-shortname=":wink:" ac:emoji-id="1f609" ac:emoji-fallback="&#128521;"/>
# <ac:emoticon ac:name="blue-star" ac:emoji-shortname=":heavy_plus_sign:" ac:emoji-id="2795" ac:emoji-fallback="&#10133;"/>
# <ac:emoticon ac:name="blue-star" ac:emoji-shortname=":heavy_minus_sign:" ac:emoji-id="2796" ac:emoji-fallback="&#10134;"/>
return AC(
"emoticon",
{
# use "blue-star" as a placeholder name to ensure wiki page loads in timely manner
ET.QName(namespaces["ac"], "name"): "blue-star",
ET.QName(namespaces["ac"], "emoji-shortname"): f":{shortname}:",
ET.QName(namespaces["ac"], "emoji-fallback"): alt,
},
)

def transform(self, child: ET._Element) -> Optional[ET._Element]:
# normalize line breaks to regular space in element text
if child.text:
Expand Down Expand Up @@ -764,6 +810,9 @@ def transform(self, child: ET._Element) -> Optional[ET._Element]:
elif child.tag == "pre" and len(child) == 1 and child[0].tag == "code":
return self._transform_block(child[0])

elif child.tag == "span" and child.attrib.has_key("data-emoji"):
return self._transform_emoji(child)

return None


Expand Down Expand Up @@ -963,3 +1012,39 @@ def elements_to_string(root: ET._Element) -> str:
return m.group(1)
else:
raise ValueError("expected: Confluence content")


def _content_to_string(dtd_path: Path, content: str) -> str:
parser = ET.XMLParser(
remove_blank_text=True,
remove_comments=True,
strip_cdata=False,
load_dtd=True,
)

ns_attr_list = "".join(
f' xmlns:{key}="{value}"' for key, value in namespaces.items()
)

data = [
'<?xml version="1.0"?>',
f'<!DOCTYPE ac:confluence PUBLIC "-//Atlassian//Confluence 4 Page//EN" "{dtd_path}">'
f"<root{ns_attr_list}>",
]
data.append(content)
data.append("</root>")

tree = ET.fromstringlist(data, parser=parser)
return ET.tostring(tree, pretty_print=True).decode("utf-8")


def content_to_string(content: str) -> str:
"Converts a Confluence Storage Format document returned by the API into a readable XML document."

if sys.version_info >= (3, 9):
resource_path = resources.files(__package__).joinpath("entities.dtd")
with resources.as_file(resource_path) as dtd_path:
return _content_to_string(dtd_path, content)
else:
with resources.path(__package__, "entities.dtd") as dtd_path:
return _content_to_string(dtd_path, content)
48 changes: 48 additions & 0 deletions md2conf/emoji.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import pathlib

import pymdownx.emoji1_db as emoji_db


def generate_source(path: pathlib.Path) -> None:
"Generates a source Markdown document for testing emojis."

emojis = emoji_db.emoji

with open(path, "w") as f:
print("<!-- confluence-page-id: 86918529216 -->", file=f)
print("<!-- This file has been generated by a script. -->", file=f)
print(file=f)
print("## Emoji", file=f)
print(file=f)
print("| Icon | Emoji code |", file=f)
print("| ---- | ---------- |", file=f)
for key in emojis.keys():
key = key.strip(":")
print(f"| :{key}: | `:{key}:` |", file=f)


def generate_target(path: pathlib.Path) -> None:
"Generates a target Confluence Storage Format (XML) document for testing emojis."

emojis = emoji_db.emoji

with open(path, "w") as f:
print('<ac:structured-macro ac:name="info" ac:schema-version="1">', file=f)
print("<ac:rich-text-body>", file=f)
print("<p>This page has been generated with a tool.</p>", file=f)
print("</ac:rich-text-body>", file=f)
print("</ac:structured-macro>", file=f)
print("<h2>Emoji</h2>", file=f)
print("<table>", file=f)
print("<thead><tr><th>Icon</th><th>Emoji code</th></tr></thead>", file=f)
print("<tbody>", file=f)
for key, data in emojis.items():
key = key.strip(":")
unicode = "".join(f"&#x{item};" for item in data["unicode"].split("-"))

print(
f'<tr><td><ac:emoticon ac:name="blue-star" ac:emoji-shortname=":{key}:" ac:emoji-fallback="{unicode}"/></td><td><code>:{key}:</code></td></tr>',
file=f,
)
print("</tbody>", file=f)
print("</table>", file=f)
19 changes: 19 additions & 0 deletions md2conf/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import sys

if sys.version_info >= (3, 9):

def removeprefix(string: str, prefix: str) -> str:
"If the string starts with the prefix, return the string without the prefix; otherwise, return the original string."

return string.removeprefix(prefix)

else:

def removeprefix(string: str, prefix: str) -> str:
"If the string starts with the prefix, return the string without the prefix; otherwise, return the original string."

if string.startswith(prefix):
prefix_len = len(prefix)
return string[prefix_len:]
else:
return string
10 changes: 10 additions & 0 deletions sample/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,18 +132,28 @@ Markdown has no native support for admonitions. Admonitions that follow the [Pyt

Alerts are a Markdown extension based on the blockquote syntax that you can use to emphasize critical information. [GitHub](https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax#alerts) and [GitLab](https://docs.gitlab.com/ee/development/documentation/styleguide/#alert-boxes) display them with distinctive colors and icons to indicate the significance of the content. When converted to Confluence, they are represented as structured macros, which are displayed as info panels.

Note:

> [!NOTE]
> Useful information that users should know, even when skimming content.
Tip:

> [!TIP]
> Helpful advice for doing things better or more easily.
Important:

> [!IMPORTANT]
> Key information users need to know to achieve their goal.
Warning:

> [!WARNING]
> Urgent info that needs immediate user attention to avoid problems.
Caution:

> [!CAUTION]
> Advises about risks or negative outcomes of certain actions.
Expand Down
1 change: 1 addition & 0 deletions tests/source/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/emoji.md
1 change: 1 addition & 0 deletions tests/target/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
/emoji.xml
15 changes: 8 additions & 7 deletions tests/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
import os
import os.path
import re
import shutil
import unittest
from pathlib import Path

import md2conf.emoji as emoji
from md2conf.converter import (
ConfluenceDocument,
ConfluenceDocumentOptions,
Expand Down Expand Up @@ -36,21 +36,22 @@ def standardize(content: str) -> str:


class TestConversion(unittest.TestCase):
out_dir: Path
source_dir: Path
target_dir: Path

def setUp(self) -> None:
self.maxDiff = None

test_dir = Path(__file__).parent
self.out_dir = test_dir / "output"
self.source_dir = test_dir / "source"
self.target_dir = test_dir / "target"
os.makedirs(self.out_dir, exist_ok=True)

def tearDown(self) -> None:
shutil.rmtree(self.out_dir)

def test_markdown(self) -> None:
if not os.path.exists(self.source_dir / "emoji.md"):
emoji.generate_source(self.source_dir / "emoji.md")
if not os.path.exists(self.target_dir / "emoji.xml"):
emoji.generate_target(self.target_dir / "emoji.xml")

matcher = Matcher(
MatcherOptions(source=".mdignore", extension="md"), self.source_dir
)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def setUp(self) -> None:
def tearDown(self) -> None:
shutil.rmtree(self.out_dir)

def atest_process_document(self) -> None:
def test_process_document(self) -> None:
options = ConfluenceDocumentOptions(
ignore_invalid_url=False,
generated_by="Test Case",
Expand Down

0 comments on commit 854da28

Please sign in to comment.