Skip to content

Commit

Permalink
Improve error message in util.parse() by incrementally using regexes (
Browse files Browse the repository at this point in the history
#301)

* Improve error message in `util.parse()` by incrementally using regexes

This loses a bit of performance, but allows us to improve the error message
to the user by attaching information about which part of the URI was invalid.

Since we consider the regexes an implementation detail, we do not return
them in the error message.

Changes the failure test to include the expected invalid parts to harden the test case.
  • Loading branch information
nicholasjng authored Dec 20, 2024
1 parent 06134be commit 968652b
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 16 deletions.
39 changes: 27 additions & 12 deletions src/lakefs_spec/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os
import re
import sys
from collections import namedtuple
from collections.abc import Callable, Generator, Iterable, Iterator
from typing import Any, Protocol

Expand All @@ -16,6 +17,8 @@
lakefs_sdk_version = tuple(int(v) for v in __lakefs_sdk_version__.split("."))
del __lakefs_sdk_version__

_ParsedUri = namedtuple("_ParsedUri", ("repository", "ref", "resource"))


class PaginatedApiResponse(Protocol):
pagination: Pagination
Expand Down Expand Up @@ -115,16 +118,28 @@ def parse(path: str) -> tuple[str, str, str]:
If the path does not conform to the lakeFS URI format.
"""

# First regex reflects the lakeFS repository naming rules:
# only lowercase letters, digits and dash, no leading dash, minimum 3, maximum 63 characters
# https://docs.lakefs.io/understand/model.html#repository
# Second regex is the branch: Only letters, digits, underscores and dash, no leading dash.
path_regex = re.compile(r"(?:lakefs://)?([a-z0-9][a-z0-9\-]{2,62})/(\w[\w\-]*)/(.*)")
results = path_regex.fullmatch(path)
if results is None:
raise ValueError(
f"expected path with structure lakefs://<repo>/<ref>/<resource>, got {path!r}"
)

repo, ref, resource = results.groups()
uri_parts = {
"protocol": r"^(?:lakefs://)?", # leading lakefs:// protocol (optional)
"repository": r"(?P<repository>[a-z0-9][a-z0-9\-]{2,62})/",
"ref expression": r"(?P<ref>\w[\w\-]*)/",
"resource": r"(?P<resource>.*)",
}

groups: dict[str, str] = {}
start = 0
for group, regex in uri_parts.items():
# we parse iteratively to improve the error message for the user if an invalid URI is given.
# by going front to back and parsing each part successively, we obtain the current path segment,
# and print it out to the user if it does not conform to our assumption of the lakeFS URI spec.
match = re.match(regex, path[start:])
# the next part of the URI is marked by a slash, or the end if we're parsing the resource.
segment = path[start : path.find("/", start)]
if match is None:
raise ValueError(
f"not a valid lakeFS URI: {path!r} (hint: invalid {group} {segment!r})"
)
groups.update(match.groupdict())
start += match.end()

repo, ref, resource = _ParsedUri(**groups)
return repo, ref, resource
8 changes: 4 additions & 4 deletions tests/test_spec_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ def test_path_parsing_success(path: str, repo: str, ref: str, resource: str) ->
"path,expected_exception",
[
# repo name illegally begins with hyphen
("-repo/my-ref/resource.txt", pytest.raises(ValueError, match="expected path .*")),
("-repo/my-ref/resource.txt", pytest.raises(ValueError, match="invalid repository")),
# repo name contains an illegal uppercase letter
("Repo/my-ref/resource.txt", pytest.raises(ValueError, match="expected path .*")),
("Repo/my-ref/resource.txt", pytest.raises(ValueError, match="invalid repository")),
# missing repo name
("my-ref/resource.txt", pytest.raises(ValueError, match="expected path .*")),
("my-ref/resource.txt", pytest.raises(ValueError, match="invalid ref expression")),
# illegal branch name
("repo/my-ref$$$/resource.txt", pytest.raises(ValueError, match="expected path .*")),
("repo/my-ref$$$/resource.txt", pytest.raises(ValueError, match="invalid ref expression")),
],
)
def test_path_parsing_failure(path: str, expected_exception: AbstractContextManager) -> None:
Expand Down

0 comments on commit 968652b

Please sign in to comment.