-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_mermaid_graph.py
82 lines (72 loc) · 3.5 KB
/
generate_mermaid_graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
import re
def sanitize_node_id(term):
# Replace spaces with underscores and prepend 'id_'
return 'id_' + re.sub(r'\s+', '_', term.strip()).lower()
def format_node_label(term):
# Replace underscores with spaces and capitalize all words
return ' '.join(word.capitalize() for word in term.split('_'))
def format_node_link(term):
# Create the relative URL for the node
return f'../{term.lower().replace(" ", "_")}'
def extract_relevant_section(content):
# Extract the text between "## 1 Definition" and "### Notes"
pattern = re.compile(r'## 1 Definition(.*?)### Notes', re.DOTALL)
match = pattern.search(content)
if match:
return match.group(1)
return ""
def parse_markdown_files(directory, exclude_files):
graph = {}
md_link_pattern = re.compile(r'\[([^\]]+)\]\(([^\)]+)\)')
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('.md') and file not in exclude_files:
file_path = os.path.join(root, file)
with open(file_path, 'r') as f:
content = f.read()
relevant_section = extract_relevant_section(content)
matches = md_link_pattern.findall(relevant_section)
for match in matches:
term, link = match
# Skip internet links and links with file extensions such as .html
if link.startswith('http://') or link.startswith('https://') or link.endswith('.html'):
continue
link = os.path.normpath(link.strip('/'))
parent_file = link + '.md'
if os.path.basename(parent_file) not in exclude_files:
if parent_file not in graph:
graph[parent_file] = set()
graph[parent_file].add(file)
return graph
def create_mermaid_diagram(graph):
mermaid_diagram = "flowchart TD;\n"
node_ids = {} # Store the sanitized node ids
for parent, children in graph.items():
parent_node = os.path.splitext(os.path.basename(parent))[0]
parent_node_id = sanitize_node_id(parent_node)
parent_node_label = format_node_label(parent_node)
parent_node_link = format_node_link(parent_node)
node_ids[parent_node] = parent_node_id
for child in children:
child_node = os.path.splitext(os.path.basename(child))[0]
child_node_id = sanitize_node_id(child_node)
child_node_label = format_node_label(child_node)
child_node_link = format_node_link(child_node)
node_ids[child_node] = child_node_id
mermaid_diagram += (
f' {parent_node_id}(["<a href=\'{parent_node_link}\'>{parent_node_label}</a>"]) --> '
f'{child_node_id}(["<a href=\'{child_node_link}\'>{child_node_label}</a>"]);\n'
)
return mermaid_diagram
def main(directory, exclude_files):
graph = parse_markdown_files(directory, exclude_files)
mermaid_diagram = create_mermaid_diagram(graph)
with open('docs/mermaid.md', 'w') as f:
f.write('```mermaid\n')
f.write(mermaid_diagram)
f.write('```\n')
if __name__ == "__main__":
directory = './docs' # Replace with your directory path
exclude_files = ['changelog.md', 'impressum.md', 'index.md', 'glossary.md', 'introduction.md', 'mermaid.md'] # Files to be excluded
main(directory, exclude_files)