From 62dd7f901b29f7b3c8d1aa0113a7c8da69f100a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 3 Feb 2024 16:22:09 +0100
Subject: [PATCH 1/5] implement intersphinx v3

---
 sphinx/util/inventory.py                      | 89 ++++++++++++++++++-
 tests/roots/test-ext-intersphinx-ws/conf.py   |  4 +
 tests/roots/test-ext-intersphinx-ws/index.rst | 47 ++++++++++
 tests/test_extensions/test_ext_intersphinx.py | 29 ++++++
 4 files changed, 166 insertions(+), 3 deletions(-)
 create mode 100644 tests/roots/test-ext-intersphinx-ws/conf.py
 create mode 100644 tests/roots/test-ext-intersphinx-ws/index.rst
diff --git a/sphinx/util/inventory.py b/sphinx/util/inventory.py
index 2b466b79db3..00e585c53d0 100644
--- a/sphinx/util/inventory.py
+++ b/sphinx/util/inventory.py
@@ -75,6 +75,8 @@ def read_compressed_lines(self) -> Iterator[str]:
                 pos = buf.find(b'\n')
 
 
+
+
 class InventoryFile:
     @classmethod
     def load(cls: type[InventoryFile], stream: IO, uri: str, joinfunc: Callable) -> Inventory:
@@ -84,6 +86,8 @@ def load(cls: type[InventoryFile], stream: IO, uri: str, joinfunc: Callable) ->
             return cls.load_v1(reader, uri, joinfunc)
         elif line == '# Sphinx inventory version 2':
             return cls.load_v2(reader, uri, joinfunc)
+        elif line == '# Sphinx inventory version 3':
+            return cls.load_v3(reader, uri, joinfunc)
         else:
             raise ValueError('invalid inventory header: %s' % line)
 
@@ -143,6 +147,75 @@ def load_v2(
             invdata.setdefault(type, {})[name] = inv_item
         return invdata
 
+    @classmethod
+    def load_v3(
+        cls: type[InventoryFile], stream: InventoryFileReader, uri: str, join: Callable
+    ) -> Inventory:
+        invdata: Inventory = {}
+        projname = stream.readline().rstrip()[11:]
+        version = stream.readline().rstrip()[11:]
+        line = stream.readline()
+        if 'zlib' not in line:
+            raise ValueError('invalid inventory header (not compressed): %s' % line)
+
+        data_before_name = re.compile(r'^(-?\d+)(:\d+)?\s', flags=re.VERBOSE)
+        # pattern when the name does not have spaces
+        name_pattern = re.compile(r'^(.+?)\s+\S+\s+?\S*\s+.*', flags=re.VERBOSE)
+        # pattern for the string after the name
+        data_after_name = re.compile(
+            r'^(?P<reftype>\S+)\s+(?P<location>\S*)\s+(?P<dispname>.*)',
+            flags=re.VERBOSE
+        )
+
+        for line in stream.read_compressed_lines():
+            line = line.rstrip()
+
+            if (before_name := data_before_name.match(line)) is None:
+                continue
+
+            priority_string, namesize = before_name.groups(None)
+            priority = int(priority_string)  # currently unused
+
+            # remove what was just matched
+            line = line[before_name.end():]
+
+            if namesize is None:
+                if (name := name_pattern.match(line)) is None:
+                    continue
+
+                name = name.group(1)
+                namesize = len(name)
+            else:
+                namesize = int(namesize[1:])  # remove leading ':'
+                name = line[:namesize]
+            assert len(name) == namesize
+
+            # remove the 'name' part
+            line = line[namesize + 1:]
+
+            if (data := data_after_name.match(line)) is None:
+                continue
+
+            reftype, location, dispname = data.groups()
+
+            if ':' not in reftype:
+                # wrong type value. type should be in the form of "{domain}:{objtype}"
+                #
+                # Note: To avoid the regex DoS, this is implemented in python (refs: #8175)
+                continue
+            if reftype == 'py:module' and reftype in invdata and name in invdata[reftype]:
+                # due to a bug in 1.1 and below,
+                # two inventory entries are created
+                # for Python modules, and the first
+                # one is correct
+                continue
+            if location.endswith('$'):
+                location = location[:-1] + name
+            location = join(uri, location)
+            inv_item: InventoryItem = projname, version, location, dispname
+            invdata.setdefault(reftype, {})[name] = inv_item
+        return invdata
+
     @classmethod
     def dump(
         cls: type[InventoryFile], filename: str, env: BuildEnvironment, builder: Builder,
@@ -152,7 +225,7 @@ def escape(string: str) -> str:
 
         with open(os.path.join(filename), 'wb') as f:
             # header
-            f.write(('# Sphinx inventory version 2\n'
+            f.write(('# Sphinx inventory version 3\n'
                      '# Project: %s\n'
                      '# Version: %s\n'
                      '# The remainder of this file is compressed using zlib.\n' %
@@ -172,7 +245,17 @@ def escape(string: str) -> str:
                         uri += '#' + anchor
                     if dispname == name:
                         dispname = '-'
-                    entry = ('%s %s:%s %s %s %s\n' %
-                             (name, domainname, typ, prio, uri, dispname))
+
+                    # For names with spaces, we need to know exactly where
+                    # the ref-type string starts. Technically, we should not
+                    # have ':' inside domain or role names, but extensions
+                    # may have some weird role names and they could handle
+                    # them internally to be docutils compatible. As such,
+                    # we encode the length of the name as the priority
+                    # fractional part (so that we can easily extract it).
+                    slen = f':{len(name)}' if ' ' in name else ''
+                    entry = '%s%s %s %s:%s %s %s\n' % (
+                        prio, slen, name, domainname, typ, uri, dispname
+                    )
                     f.write(compressor.compress(entry.encode()))
             f.write(compressor.flush())
diff --git a/tests/roots/test-ext-intersphinx-ws/conf.py b/tests/roots/test-ext-intersphinx-ws/conf.py
new file mode 100644
index 00000000000..093d3756a3e
--- /dev/null
+++ b/tests/roots/test-ext-intersphinx-ws/conf.py
@@ -0,0 +1,4 @@
+extensions = ['sphinx.ext.intersphinx', 'sphinx.ext.autosectionlabel']
+autosectionlabel_prefix_document = True
+autosectionlabel_maxdepth = 0
+intersphinx_mapping = {}
diff --git a/tests/roots/test-ext-intersphinx-ws/index.rst b/tests/roots/test-ext-intersphinx-ws/index.rst
new file mode 100644
index 00000000000..934574e477a
--- /dev/null
+++ b/tests/roots/test-ext-intersphinx-ws/index.rst
@@ -0,0 +1,47 @@
+1 OK
+----
+:ref:`index:1 OK`
+
+OK 1
+----
+:ref:`index:OK 1`
+
+OK 1 OK
+-------
+:ref:`index:OK 1 OK`
+
+123 OK
+------
+:ref:`index:123 OK`
+
+1 2 OK
+------
+:ref:`index:1 2 OK`
+
+1 2 3 OK
+--------
+:ref:`index:1 2 3 OK`
+
+OK OK 1
+-------
+:ref:`index:OK OK 1`
+
+OK OK 2 OK OK
+-------------
+:ref:`index:OK OK 2 OK OK`
+
+OK 1 2 OK
+---------
+:ref:`index:OK 1 2 OK`
+
+OK 1 OK 2
+---------
+:ref:`index:OK 1 OK 2`
+
+OK 1 2 3
+--------
+:ref:`index:OK 1 2 3`
+
+1 OK 1
+------
+:ref:`index:1 OK 1`
diff --git a/tests/test_extensions/test_ext_intersphinx.py b/tests/test_extensions/test_ext_intersphinx.py
index bbe08d66bd7..ffa33cd6079 100644
--- a/tests/test_extensions/test_ext_intersphinx.py
+++ b/tests/test_extensions/test_ext_intersphinx.py
@@ -1,6 +1,7 @@
 """Test the intersphinx extension."""
 
 import http.server
+import posixpath
 from unittest import mock
 
 import pytest
@@ -18,6 +19,7 @@
     normalize_intersphinx_mapping,
 )
 from sphinx.ext.intersphinx import setup as intersphinx_setup
+from sphinx.util.inventory import InventoryFile
 
 from tests.test_util.test_util_inventory import inventory_v2, inventory_v2_not_having_version
 from tests.utils import http_server
@@ -568,3 +570,30 @@ def test_intersphinx_role(app, warning):
 
     # explicit title
     assert html.format('index.html#foons') in content
+
+
+@pytest.mark.sphinx('html', testroot='ext-intersphinx-ws')
+def test_intersphinx_whitespace_targets(app):
+    app.build()
+
+    with open(app.outdir / 'objects.inv', 'rb') as fp:
+        invdata = InventoryFile.load(fp, '', posixpath.join)
+
+    assert invdata['std:label'] == {
+        'genindex': ('Python', '', 'genindex.html', 'Index'),
+        'index:1 2 3 ok': ('Python', '', 'index.html#id3', '1 2 3 OK'),
+        'index:1 2 ok': ('Python', '', 'index.html#id2', '1 2 OK'),
+        'index:1 ok': ('Python', '', 'index.html#ok', '1 OK'),
+        'index:1 ok 1': ('Python', '', 'index.html#id4', '1 OK 1'),
+        'index:123 ok': ('Python', '', 'index.html#id1', '123 OK'),
+        'index:ok 1': ('Python', '', 'index.html#ok-1', 'OK 1'),
+        'index:ok 1 2 3': ('Python', '', 'index.html#ok-1-2-3', 'OK 1 2 3'),
+        'index:ok 1 2 ok': ('Python', '', 'index.html#ok-1-2-ok', 'OK 1 2 OK'),
+        'index:ok 1 ok': ('Python', '', 'index.html#ok-1-ok', 'OK 1 OK'),
+        'index:ok 1 ok 2': ('Python', '', 'index.html#ok-1-ok-2', 'OK 1 OK 2'),
+        'index:ok ok 1': ('Python', '', 'index.html#ok-ok-1', 'OK OK 1'),
+        'index:ok ok 2 ok ok': ('Python', '', 'index.html#ok-ok-2-ok-ok', 'OK OK 2 OK OK'),
+        'modindex': ('Python', '', 'py-modindex.html', 'Module Index'),
+        'py-modindex': ('Python', '', 'py-modindex.html', 'Python Module Index'),
+        'search': ('Python', '', 'search.html', 'Search Page')
+    }

From 87a9b3e2610991fd8ba0e099df13398fa29431b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 3 Feb 2024 16:35:33 +0100
Subject: [PATCH 2/5] fix lint

---
 sphinx/util/inventory.py                      | 20 +++++++++----------
 tests/test_extensions/test_ext_intersphinx.py |  2 +-
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/sphinx/util/inventory.py b/sphinx/util/inventory.py
index 00e585c53d0..1b63ef25f3e 100644
--- a/sphinx/util/inventory.py
+++ b/sphinx/util/inventory.py
@@ -75,8 +75,6 @@ def read_compressed_lines(self) -> Iterator[str]:
                 pos = buf.find(b'\n')
 
 
-
-
 class InventoryFile:
     @classmethod
     def load(cls: type[InventoryFile], stream: IO, uri: str, joinfunc: Callable) -> Inventory:
@@ -149,7 +147,7 @@ def load_v2(
 
     @classmethod
     def load_v3(
-        cls: type[InventoryFile], stream: InventoryFileReader, uri: str, join: Callable
+        cls: type[InventoryFile], stream: InventoryFileReader, uri: str, join: Callable,
     ) -> Inventory:
         invdata: Inventory = {}
         projname = stream.readline().rstrip()[11:]
@@ -164,7 +162,7 @@ def load_v3(
         # pattern for the string after the name
         data_after_name = re.compile(
             r'^(?P<reftype>\S+)\s+(?P<location>\S*)\s+(?P<dispname>.*)',
-            flags=re.VERBOSE
+            flags=re.VERBOSE,
         )
 
         for line in stream.read_compressed_lines():
@@ -173,20 +171,20 @@ def load_v3(
             if (before_name := data_before_name.match(line)) is None:
                 continue
 
-            priority_string, namesize = before_name.groups(None)
-            priority = int(priority_string)  # currently unused
+            s_priority, s_namesize = before_name.groups(None)
+            _priority = int(s_priority)  # currently unused
 
             # remove what was just matched
             line = line[before_name.end():]
 
-            if namesize is None:
-                if (name := name_pattern.match(line)) is None:
+            if s_namesize is None:
+                if (m := name_pattern.match(line)) is None:
                     continue
 
-                name = name.group(1)
+                name = m.group(1)
                 namesize = len(name)
             else:
-                namesize = int(namesize[1:])  # remove leading ':'
+                namesize = int(s_namesize[1:])  # remove leading ':'
                 name = line[:namesize]
             assert len(name) == namesize
 
@@ -255,7 +253,7 @@ def escape(string: str) -> str:
                     # fractional part (so that we can easily extract it).
                     slen = f':{len(name)}' if ' ' in name else ''
                     entry = '%s%s %s %s:%s %s %s\n' % (
-                        prio, slen, name, domainname, typ, uri, dispname
+                        prio, slen, name, domainname, typ, uri, dispname,
                     )
                     f.write(compressor.compress(entry.encode()))
             f.write(compressor.flush())
diff --git a/tests/test_extensions/test_ext_intersphinx.py b/tests/test_extensions/test_ext_intersphinx.py
index ffa33cd6079..d623fef46e8 100644
--- a/tests/test_extensions/test_ext_intersphinx.py
+++ b/tests/test_extensions/test_ext_intersphinx.py
@@ -595,5 +595,5 @@ def test_intersphinx_whitespace_targets(app):
         'index:ok ok 2 ok ok': ('Python', '', 'index.html#ok-ok-2-ok-ok', 'OK OK 2 OK OK'),
         'modindex': ('Python', '', 'py-modindex.html', 'Module Index'),
         'py-modindex': ('Python', '', 'py-modindex.html', 'Python Module Index'),
-        'search': ('Python', '', 'search.html', 'Search Page')
+        'search': ('Python', '', 'search.html', 'Search Page'),
     }

From 8e1802dd0b1697424aadd49ef835872b21393811 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 3 Feb 2024 16:38:51 +0100
Subject: [PATCH 3/5] fix lint

---
 sphinx/util/inventory.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sphinx/util/inventory.py b/sphinx/util/inventory.py
index 1b63ef25f3e..302e57b7586 100644
--- a/sphinx/util/inventory.py
+++ b/sphinx/util/inventory.py
@@ -171,8 +171,8 @@ def load_v3(
             if (before_name := data_before_name.match(line)) is None:
                 continue
 
-            s_priority, s_namesize = before_name.groups(None)
-            _priority = int(s_priority)  # currently unused
+            # currently, we do not use the priority, but maybe in the future
+            _, s_namesize = before_name.groups(None)
 
             # remove what was just matched
             line = line[before_name.end():]

From 14286f848a8dd806c57eaa09c54c7f9dd58f2407 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 3 Feb 2024 16:43:19 +0100
Subject: [PATCH 4/5] update comment

---
 sphinx/util/inventory.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sphinx/util/inventory.py b/sphinx/util/inventory.py
index 302e57b7586..568d534877a 100644
--- a/sphinx/util/inventory.py
+++ b/sphinx/util/inventory.py
@@ -249,8 +249,7 @@ def escape(string: str) -> str:
                     # have ':' inside domain or role names, but extensions
                     # may have some weird role names and they could handle
                     # them internally to be docutils compatible. As such,
-                    # we encode the length of the name as the priority
-                    # fractional part (so that we can easily extract it).
+                    # we encode the length of the name after the priority.
                     slen = f':{len(name)}' if ' ' in name else ''
                     entry = '%s%s %s %s:%s %s %s\n' % (
                         prio, slen, name, domainname, typ, uri, dispname,

From 30598a4dc7e1520ecc4976ccd01ca2456e789fe3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 23 Mar 2024 15:34:40 +0100
Subject: [PATCH 5/5] fixup

---
 sphinx/util/inventory.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/sphinx/util/inventory.py b/sphinx/util/inventory.py
index 0c2ee0cacd7..ddf70fc3b04 100644
--- a/sphinx/util/inventory.py
+++ b/sphinx/util/inventory.py
@@ -158,7 +158,10 @@ def load_v2(
 
     @classmethod
     def load_v3(
-        cls: type[InventoryFile], stream: InventoryFileReader, uri: str, join: Callable,
+        cls: type[InventoryFile],
+        stream: InventoryFileReader,
+        uri: str,
+        join: Callable[[str, str], str],
     ) -> Inventory:
         invdata: Inventory = {}
         projname = stream.readline().rstrip()[11:]