From 62dd7f901b29f7b3c8d1aa0113a7c8da69f100a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 3 Feb 2024 16:22:09 +0100 Subject: [PATCH 1/5] implement intersphinx v3 --- sphinx/util/inventory.py | 89 ++++++++++++++++++- tests/roots/test-ext-intersphinx-ws/conf.py | 4 + tests/roots/test-ext-intersphinx-ws/index.rst | 47 ++++++++++ tests/test_extensions/test_ext_intersphinx.py | 29 ++++++ 4 files changed, 166 insertions(+), 3 deletions(-) create mode 100644 tests/roots/test-ext-intersphinx-ws/conf.py create mode 100644 tests/roots/test-ext-intersphinx-ws/index.rst diff --git a/sphinx/util/inventory.py b/sphinx/util/inventory.py index 2b466b79db3..00e585c53d0 100644 --- a/sphinx/util/inventory.py +++ b/sphinx/util/inventory.py @@ -75,6 +75,8 @@ def read_compressed_lines(self) -> Iterator[str]: pos = buf.find(b'\n') + + class InventoryFile: @classmethod def load(cls: type[InventoryFile], stream: IO, uri: str, joinfunc: Callable) -> Inventory: @@ -84,6 +86,8 @@ def load(cls: type[InventoryFile], stream: IO, uri: str, joinfunc: Callable) -> return cls.load_v1(reader, uri, joinfunc) elif line == '# Sphinx inventory version 2': return cls.load_v2(reader, uri, joinfunc) + elif line == '# Sphinx inventory version 3': + return cls.load_v3(reader, uri, joinfunc) else: raise ValueError('invalid inventory header: %s' % line) @@ -143,6 +147,75 @@ def load_v2( invdata.setdefault(type, {})[name] = inv_item return invdata + @classmethod + def load_v3( + cls: type[InventoryFile], stream: InventoryFileReader, uri: str, join: Callable + ) -> Inventory: + invdata: Inventory = {} + projname = stream.readline().rstrip()[11:] + version = stream.readline().rstrip()[11:] + line = stream.readline() + if 'zlib' not in line: + raise ValueError('invalid inventory header (not compressed): %s' % line) + + data_before_name = re.compile(r'^(-?\d+)(:\d+)?\s', flags=re.VERBOSE) + # pattern when the name does not have spaces + name_pattern = re.compile(r'^(.+?)\s+\S+\s+?\S*\s+.*', flags=re.VERBOSE) + # pattern for the string after the name + data_after_name = re.compile( + r'^(?P\S+)\s+(?P\S*)\s+(?P.*)', + flags=re.VERBOSE + ) + + for line in stream.read_compressed_lines(): + line = line.rstrip() + + if (before_name := data_before_name.match(line)) is None: + continue + + priority_string, namesize = before_name.groups(None) + priority = int(priority_string) # currently unused + + # remove what was just matched + line = line[before_name.end():] + + if namesize is None: + if (name := name_pattern.match(line)) is None: + continue + + name = name.group(1) + namesize = len(name) + else: + namesize = int(namesize[1:]) # remove leading ':' + name = line[:namesize] + assert len(name) == namesize + + # remove the 'name' part + line = line[namesize + 1:] + + if (data := data_after_name.match(line)) is None: + continue + + reftype, location, dispname = data.groups() + + if ':' not in reftype: + # wrong type value. type should be in the form of "{domain}:{objtype}" + # + # Note: To avoid the regex DoS, this is implemented in python (refs: #8175) + continue + if reftype == 'py:module' and reftype in invdata and name in invdata[reftype]: + # due to a bug in 1.1 and below, + # two inventory entries are created + # for Python modules, and the first + # one is correct + continue + if location.endswith('$'): + location = location[:-1] + name + location = join(uri, location) + inv_item: InventoryItem = projname, version, location, dispname + invdata.setdefault(reftype, {})[name] = inv_item + return invdata + @classmethod def dump( cls: type[InventoryFile], filename: str, env: BuildEnvironment, builder: Builder, @@ -152,7 +225,7 @@ def escape(string: str) -> str: with open(os.path.join(filename), 'wb') as f: # header - f.write(('# Sphinx inventory version 2\n' + f.write(('# Sphinx inventory version 3\n' '# Project: %s\n' '# Version: %s\n' '# The remainder of this file is compressed using zlib.\n' % @@ -172,7 +245,17 @@ def escape(string: str) -> str: uri += '#' + anchor if dispname == name: dispname = '-' - entry = ('%s %s:%s %s %s %s\n' % - (name, domainname, typ, prio, uri, dispname)) + + # For names with spaces, we need to know exactly where + # the ref-type string starts. Technically, we should not + # have ':' inside domain or role names, but extensions + # may have some weird role names and they could handle + # them internally to be docutils compatible. As such, + # we encode the length of the name as the priority + # fractional part (so that we can easily extract it). + slen = f':{len(name)}' if ' ' in name else '' + entry = '%s%s %s %s:%s %s %s\n' % ( + prio, slen, name, domainname, typ, uri, dispname + ) f.write(compressor.compress(entry.encode())) f.write(compressor.flush()) diff --git a/tests/roots/test-ext-intersphinx-ws/conf.py b/tests/roots/test-ext-intersphinx-ws/conf.py new file mode 100644 index 00000000000..093d3756a3e --- /dev/null +++ b/tests/roots/test-ext-intersphinx-ws/conf.py @@ -0,0 +1,4 @@ +extensions = ['sphinx.ext.intersphinx', 'sphinx.ext.autosectionlabel'] +autosectionlabel_prefix_document = True +autosectionlabel_maxdepth = 0 +intersphinx_mapping = {} diff --git a/tests/roots/test-ext-intersphinx-ws/index.rst b/tests/roots/test-ext-intersphinx-ws/index.rst new file mode 100644 index 00000000000..934574e477a --- /dev/null +++ b/tests/roots/test-ext-intersphinx-ws/index.rst @@ -0,0 +1,47 @@ +1 OK +---- +:ref:`index:1 OK` + +OK 1 +---- +:ref:`index:OK 1` + +OK 1 OK +------- +:ref:`index:OK 1 OK` + +123 OK +------ +:ref:`index:123 OK` + +1 2 OK +------ +:ref:`index:1 2 OK` + +1 2 3 OK +-------- +:ref:`index:1 2 3 OK` + +OK OK 1 +------- +:ref:`index:OK OK 1` + +OK OK 2 OK OK +------------- +:ref:`index:OK OK 2 OK OK` + +OK 1 2 OK +--------- +:ref:`index:OK 1 2 OK` + +OK 1 OK 2 +--------- +:ref:`index:OK 1 OK 2` + +OK 1 2 3 +-------- +:ref:`index:OK 1 2 3` + +1 OK 1 +------ +:ref:`index:1 OK 1` diff --git a/tests/test_extensions/test_ext_intersphinx.py b/tests/test_extensions/test_ext_intersphinx.py index bbe08d66bd7..ffa33cd6079 100644 --- a/tests/test_extensions/test_ext_intersphinx.py +++ b/tests/test_extensions/test_ext_intersphinx.py @@ -1,6 +1,7 @@ """Test the intersphinx extension.""" import http.server +import posixpath from unittest import mock import pytest @@ -18,6 +19,7 @@ normalize_intersphinx_mapping, ) from sphinx.ext.intersphinx import setup as intersphinx_setup +from sphinx.util.inventory import InventoryFile from tests.test_util.test_util_inventory import inventory_v2, inventory_v2_not_having_version from tests.utils import http_server @@ -568,3 +570,30 @@ def test_intersphinx_role(app, warning): # explicit title assert html.format('index.html#foons') in content + + +@pytest.mark.sphinx('html', testroot='ext-intersphinx-ws') +def test_intersphinx_whitespace_targets(app): + app.build() + + with open(app.outdir / 'objects.inv', 'rb') as fp: + invdata = InventoryFile.load(fp, '', posixpath.join) + + assert invdata['std:label'] == { + 'genindex': ('Python', '', 'genindex.html', 'Index'), + 'index:1 2 3 ok': ('Python', '', 'index.html#id3', '1 2 3 OK'), + 'index:1 2 ok': ('Python', '', 'index.html#id2', '1 2 OK'), + 'index:1 ok': ('Python', '', 'index.html#ok', '1 OK'), + 'index:1 ok 1': ('Python', '', 'index.html#id4', '1 OK 1'), + 'index:123 ok': ('Python', '', 'index.html#id1', '123 OK'), + 'index:ok 1': ('Python', '', 'index.html#ok-1', 'OK 1'), + 'index:ok 1 2 3': ('Python', '', 'index.html#ok-1-2-3', 'OK 1 2 3'), + 'index:ok 1 2 ok': ('Python', '', 'index.html#ok-1-2-ok', 'OK 1 2 OK'), + 'index:ok 1 ok': ('Python', '', 'index.html#ok-1-ok', 'OK 1 OK'), + 'index:ok 1 ok 2': ('Python', '', 'index.html#ok-1-ok-2', 'OK 1 OK 2'), + 'index:ok ok 1': ('Python', '', 'index.html#ok-ok-1', 'OK OK 1'), + 'index:ok ok 2 ok ok': ('Python', '', 'index.html#ok-ok-2-ok-ok', 'OK OK 2 OK OK'), + 'modindex': ('Python', '', 'py-modindex.html', 'Module Index'), + 'py-modindex': ('Python', '', 'py-modindex.html', 'Python Module Index'), + 'search': ('Python', '', 'search.html', 'Search Page') + } From 87a9b3e2610991fd8ba0e099df13398fa29431b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 3 Feb 2024 16:35:33 +0100 Subject: [PATCH 2/5] fix lint --- sphinx/util/inventory.py | 20 +++++++++---------- tests/test_extensions/test_ext_intersphinx.py | 2 +- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/sphinx/util/inventory.py b/sphinx/util/inventory.py index 00e585c53d0..1b63ef25f3e 100644 --- a/sphinx/util/inventory.py +++ b/sphinx/util/inventory.py @@ -75,8 +75,6 @@ def read_compressed_lines(self) -> Iterator[str]: pos = buf.find(b'\n') - - class InventoryFile: @classmethod def load(cls: type[InventoryFile], stream: IO, uri: str, joinfunc: Callable) -> Inventory: @@ -149,7 +147,7 @@ def load_v2( @classmethod def load_v3( - cls: type[InventoryFile], stream: InventoryFileReader, uri: str, join: Callable + cls: type[InventoryFile], stream: InventoryFileReader, uri: str, join: Callable, ) -> Inventory: invdata: Inventory = {} projname = stream.readline().rstrip()[11:] @@ -164,7 +162,7 @@ def load_v3( # pattern for the string after the name data_after_name = re.compile( r'^(?P\S+)\s+(?P\S*)\s+(?P.*)', - flags=re.VERBOSE + flags=re.VERBOSE, ) for line in stream.read_compressed_lines(): @@ -173,20 +171,20 @@ def load_v3( if (before_name := data_before_name.match(line)) is None: continue - priority_string, namesize = before_name.groups(None) - priority = int(priority_string) # currently unused + s_priority, s_namesize = before_name.groups(None) + _priority = int(s_priority) # currently unused # remove what was just matched line = line[before_name.end():] - if namesize is None: - if (name := name_pattern.match(line)) is None: + if s_namesize is None: + if (m := name_pattern.match(line)) is None: continue - name = name.group(1) + name = m.group(1) namesize = len(name) else: - namesize = int(namesize[1:]) # remove leading ':' + namesize = int(s_namesize[1:]) # remove leading ':' name = line[:namesize] assert len(name) == namesize @@ -255,7 +253,7 @@ def escape(string: str) -> str: # fractional part (so that we can easily extract it). slen = f':{len(name)}' if ' ' in name else '' entry = '%s%s %s %s:%s %s %s\n' % ( - prio, slen, name, domainname, typ, uri, dispname + prio, slen, name, domainname, typ, uri, dispname, ) f.write(compressor.compress(entry.encode())) f.write(compressor.flush()) diff --git a/tests/test_extensions/test_ext_intersphinx.py b/tests/test_extensions/test_ext_intersphinx.py index ffa33cd6079..d623fef46e8 100644 --- a/tests/test_extensions/test_ext_intersphinx.py +++ b/tests/test_extensions/test_ext_intersphinx.py @@ -595,5 +595,5 @@ def test_intersphinx_whitespace_targets(app): 'index:ok ok 2 ok ok': ('Python', '', 'index.html#ok-ok-2-ok-ok', 'OK OK 2 OK OK'), 'modindex': ('Python', '', 'py-modindex.html', 'Module Index'), 'py-modindex': ('Python', '', 'py-modindex.html', 'Python Module Index'), - 'search': ('Python', '', 'search.html', 'Search Page') + 'search': ('Python', '', 'search.html', 'Search Page'), } From 8e1802dd0b1697424aadd49ef835872b21393811 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 3 Feb 2024 16:38:51 +0100 Subject: [PATCH 3/5] fix lint --- sphinx/util/inventory.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sphinx/util/inventory.py b/sphinx/util/inventory.py index 1b63ef25f3e..302e57b7586 100644 --- a/sphinx/util/inventory.py +++ b/sphinx/util/inventory.py @@ -171,8 +171,8 @@ def load_v3( if (before_name := data_before_name.match(line)) is None: continue - s_priority, s_namesize = before_name.groups(None) - _priority = int(s_priority) # currently unused + # currently, we do not use the priority, but maybe in the future + _, s_namesize = before_name.groups(None) # remove what was just matched line = line[before_name.end():] From 14286f848a8dd806c57eaa09c54c7f9dd58f2407 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 3 Feb 2024 16:43:19 +0100 Subject: [PATCH 4/5] update comment --- sphinx/util/inventory.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sphinx/util/inventory.py b/sphinx/util/inventory.py index 302e57b7586..568d534877a 100644 --- a/sphinx/util/inventory.py +++ b/sphinx/util/inventory.py @@ -249,8 +249,7 @@ def escape(string: str) -> str: # have ':' inside domain or role names, but extensions # may have some weird role names and they could handle # them internally to be docutils compatible. As such, - # we encode the length of the name as the priority - # fractional part (so that we can easily extract it). + # we encode the length of the name after the priority. slen = f':{len(name)}' if ' ' in name else '' entry = '%s%s %s %s:%s %s %s\n' % ( prio, slen, name, domainname, typ, uri, dispname, From 30598a4dc7e1520ecc4976ccd01ca2456e789fe3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sat, 23 Mar 2024 15:34:40 +0100 Subject: [PATCH 5/5] fixup --- sphinx/util/inventory.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sphinx/util/inventory.py b/sphinx/util/inventory.py index 0c2ee0cacd7..ddf70fc3b04 100644 --- a/sphinx/util/inventory.py +++ b/sphinx/util/inventory.py @@ -158,7 +158,10 @@ def load_v2( @classmethod def load_v3( - cls: type[InventoryFile], stream: InventoryFileReader, uri: str, join: Callable, + cls: type[InventoryFile], + stream: InventoryFileReader, + uri: str, + join: Callable[[str, str], str], ) -> Inventory: invdata: Inventory = {} projname = stream.readline().rstrip()[11:]