diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f4c60b4b3..4ba132b62c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,6 +57,7 @@ Unreleased changes template. {#v0-0-0-fixed} ### Fixed * (gazelle) Providing multiple input requirements files to `gazelle_python_manifest` now works correctly. +* (bazel downloader) Handle trailing slashes in pip index URLs in environment variables {#v0-0-0-added} ### Added diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl index 6401a066c2..b633df70d5 100644 --- a/python/private/pypi/simpleapi_download.bzl +++ b/python/private/pypi/simpleapi_download.bzl @@ -169,11 +169,11 @@ def _read_simpleapi(ctx, url, attr, cache, **download_kwargs): # them to ctx.download if we want to correctly handle the relative URLs. # TODO: Add a test that env subbed index urls do not leak into the lock file. - real_url = envsubst( + real_url = strip_empty_path_segments(envsubst( url, attr.envsubst, ctx.getenv if hasattr(ctx, "getenv") else ctx.os.environ.get, - ) + )) cache_key = real_url if cache_key in cache: @@ -194,11 +194,13 @@ def _read_simpleapi(ctx, url, attr, cache, **download_kwargs): output = ctx.path(output_str.strip("_").lower() + ".html") + _get_auth = ctx.get_auth if hasattr(ctx, "get_auth") else get_auth + # NOTE: this may have block = True or block = False in the download_kwargs download = ctx.download( url = [real_url], output = output, - auth = get_auth(ctx, [real_url], ctx_attr = attr), + auth = _get_auth(ctx, [real_url], ctx_attr = attr), allow_fail = True, **download_kwargs ) @@ -211,6 +213,27 @@ def _read_simpleapi(ctx, url, attr, cache, **download_kwargs): return _read_index_result(ctx, download, output, real_url, cache, cache_key) +def strip_empty_path_segments(url): + """Removes empty path segments from a URL. Does nothing for urls with no scheme. + + Public only for testing. + + Args: + url: The url to remove empty path segments from + + Returns: + The url with empty path segments removed and any trailing slash preserved. + If the url had no scheme it is returned unchanged. + """ + scheme, _, rest = url.partition("://") + if rest == "": + return url + stripped = "/".join([p for p in rest.split("/") if p]) + if url.endswith("/"): + return "{}://{}/".format(scheme, stripped) + else: + return "{}://{}".format(scheme, stripped) + def _read_index_result(ctx, result, output, url, cache, cache_key): if not result.success: return struct(success = False) diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl index 9b2967b0da..e99662dec0 100644 --- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl +++ b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl @@ -15,7 +15,7 @@ "" load("@rules_testing//lib:test_suite.bzl", "test_suite") -load("//python/private/pypi:simpleapi_download.bzl", "simpleapi_download") # buildifier: disable=bzl-visibility +load("//python/private/pypi:simpleapi_download.bzl", "simpleapi_download", "strip_empty_path_segments") # buildifier: disable=bzl-visibility _tests = [] @@ -119,6 +119,121 @@ def _test_fail(env): _tests.append(_test_fail) +def _test_download_url(env): + downloads = {} + + def download(url, output, **kwargs): + _ = kwargs # buildifier: disable=unused-variable + downloads[url[0]] = output + return struct(success = True) + + simpleapi_download( + ctx = struct( + os = struct(environ = {}), + download = download, + read = lambda i: "contents of " + i, + path = lambda i: "path/for/" + i, + get_auth = lambda ctx, urls, ctx_attr: struct(), + ), + attr = struct( + index_url_overrides = {}, + index_url = "https://example.com/main/simple/", + extra_index_urls = [], + sources = ["foo", "bar", "baz"], + envsubst = [], + ), + cache = {}, + parallel_download = False, + ) + + env.expect.that_dict(downloads).contains_exactly({ + "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html", + "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html", + "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html", + }) + +_tests.append(_test_download_url) + +def _test_download_url_parallel(env): + downloads = {} + + def download(url, output, **kwargs): + _ = kwargs # buildifier: disable=unused-variable + downloads[url[0]] = output + return struct(wait = lambda: struct(success = True)) + + simpleapi_download( + ctx = struct( + os = struct(environ = {}), + download = download, + read = lambda i: "contents of " + i, + path = lambda i: "path/for/" + i, + get_auth = lambda ctx, urls, ctx_attr: struct(), + ), + attr = struct( + index_url_overrides = {}, + index_url = "https://example.com/main/simple/", + extra_index_urls = [], + sources = ["foo", "bar", "baz"], + envsubst = [], + ), + cache = {}, + parallel_download = True, + ) + + env.expect.that_dict(downloads).contains_exactly({ + "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html", + "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html", + "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html", + }) + +_tests.append(_test_download_url_parallel) + +def _test_download_envsubst_url(env): + downloads = {} + + def download(url, output, **kwargs): + _ = kwargs # buildifier: disable=unused-variable + downloads[url[0]] = output + return struct(success = True) + + simpleapi_download( + ctx = struct( + os = struct(environ = {"INDEX_URL": "https://example.com/main/simple/"}), + download = download, + read = lambda i: "contents of " + i, + path = lambda i: "path/for/" + i, + get_auth = lambda ctx, urls, ctx_attr: struct(), + ), + attr = struct( + index_url_overrides = {}, + index_url = "$INDEX_URL", + extra_index_urls = [], + sources = ["foo", "bar", "baz"], + envsubst = ["INDEX_URL"], + ), + cache = {}, + parallel_download = False, + ) + + env.expect.that_dict(downloads).contains_exactly({ + "https://example.com/main/simple/bar/": "path/for/~index_url~_bar.html", + "https://example.com/main/simple/baz/": "path/for/~index_url~_baz.html", + "https://example.com/main/simple/foo/": "path/for/~index_url~_foo.html", + }) + +_tests.append(_test_download_envsubst_url) + +def _test_strip_empty_path_segments(env): + env.expect.that_str(strip_empty_path_segments("no/scheme//is/unchanged")).equals("no/scheme//is/unchanged") + env.expect.that_str(strip_empty_path_segments("scheme://with/no/empty/segments")).equals("scheme://with/no/empty/segments") + env.expect.that_str(strip_empty_path_segments("scheme://with//empty/segments")).equals("scheme://with/empty/segments") + env.expect.that_str(strip_empty_path_segments("scheme://with///multiple//empty/segments")).equals("scheme://with/multiple/empty/segments") + env.expect.that_str(strip_empty_path_segments("scheme://with//trailing/slash/")).equals("scheme://with/trailing/slash/") + env.expect.that_str(strip_empty_path_segments("scheme://with/trailing/slashes///")).equals("scheme://with/trailing/slashes/") + +_tests.append(_test_strip_empty_path_segments) + def simpleapi_download_test_suite(name): """Create the test suite.