Skip to content

Commit

Permalink
Close #11: substitute improperly formatted Unicode literals before load
Browse files Browse the repository at this point in the history
  • Loading branch information
syntaxaire committed Aug 17, 2019
1 parent 99ff398 commit 23769e2
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 7 deletions.
6 changes: 5 additions & 1 deletion analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def print_wiki_nonwiki():
print(pre, '✅' if obj.is_wiki_eligible() else '❌', obj.displayname, f'({obj.name})')


def get_wikified_nonwiki():
def print_wikified_nonwiki():
"""Check the wiki for any articles that aren't supposed to exist."""
for name, qud_object in qindex.items():
if not qud_object.is_wiki_eligible():
Expand All @@ -55,6 +55,10 @@ def get_wikified_nonwiki():
pass


def print_value_weight_ratio():
"""Calculate the value/weight ratio for all items in wiki table format."""


qud_object_tree.load(FILE)

# get_bad_tiles()
Expand Down
19 changes: 13 additions & 6 deletions qud_object_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
For on-demand access to individual Qud objects by name, use the `qindex` from qudobject.py."""

import re
import time

from xml.etree import ElementTree as et

Expand All @@ -11,15 +12,20 @@

def load(path):
"""Load ObjectBlueprints.xml from the specified filepath and return a reference to the root."""
print("Repairing invalid XML characters...")
print("Repairing invalid XML characters... ", end='')
start = time.time()
# Do some repair of invalid XML:
# First, delete some invalid characters
pat_invalid = re.compile("()|()")
# First, replace some invalid control characters intended for CP437 with their Unicode equiv
with open(path, 'r', encoding='utf-8') as f:
contents = f.read()
contents = re.sub(pat_invalid, '', contents)

print("Repairing invalid XML line breaks...")
ch_re = re.compile("")
contents = re.sub(ch_re, '♂', contents)
ch_re = re.compile("")
contents = re.sub(ch_re, '☼', contents)
print(f"done in {time.time() - start:.2f} seconds")

print("Repairing invalid XML line breaks... ", end='')
start = time.time()
# Second, replace line breaks inside attributes with proper XML line breaks
# ^\s*<[^!][^>]*\n[^>]*>
pat_linebreaks = r"^\s*<[^!][^>]*\n.*?>"
Expand All @@ -33,6 +39,7 @@ def load(path):
# Uncomment to have a diff-able file to double check XML repairs.
# with open('test_output.xml', 'w', encoding='utf-8') as f:
# f.write(contents)
print(f"done in {time.time() - start:.2f} seconds")

raw = et.fromstring(contents)

Expand Down

0 comments on commit 23769e2

Please sign in to comment.