From d6b28688910955ae28258f73cf610960401bc8b4 Mon Sep 17 00:00:00 2001 From: Javier Maestro Date: Wed, 11 Dec 2024 14:45:04 +0000 Subject: [PATCH] feat: support multiple URLs in sources Fixes #113 The Debian snapshot mirror fails quite often and, while the Cloudflare mirror has had some issues in the past (e.g. it was lagging replication for months) it's much more reliable. This commit adds support for multiple URLs in the manifest and the lockfile. See examples/debian_snapshot/bullseye.yaml for a sample usage. --- apt/extensions.bzl | 2 +- apt/private/apt_deb_repository.bzl | 60 ++++++++++++++------------ apt/private/deb_resolve.bzl | 9 +++- apt/private/deb_translate_lock.bzl | 4 +- apt/private/lockfile.bzl | 8 +++- e2e/smoke/bullseye.yaml | 5 ++- examples/debian_snapshot/bullseye.yaml | 4 +- 7 files changed, 57 insertions(+), 35 deletions(-) diff --git a/apt/extensions.bzl b/apt/extensions.bzl index 2368cc4..0e388c4 100644 --- a/apt/extensions.bzl +++ b/apt/extensions.bzl @@ -35,7 +35,7 @@ def _distroless_extension(module_ctx): deb_import( name = "%s_%s" % (install.name, package_key), - urls = [package["url"]], + urls = package["urls"], sha256 = package["sha256"], ) diff --git a/apt/private/apt_deb_repository.bzl b/apt/private/apt_deb_repository.bzl index 36a0ceb..27c4541 100644 --- a/apt/private/apt_deb_repository.bzl +++ b/apt/private/apt_deb_repository.bzl @@ -3,7 +3,7 @@ load(":util.bzl", "util") load(":version_constraint.bzl", "version_constraint") -def _fetch_package_index(rctx, url, dist, comp, arch, integrity): +def _fetch_package_index(rctx, urls, dist, comp, arch, integrity): target_triple = "{dist}/{comp}/{arch}".format(dist = dist, comp = comp, arch = arch) # See https://linux.die.net/man/1/xz , https://linux.die.net/man/1/gzip , and https://linux.die.net/man/1/bzip2 @@ -20,34 +20,40 @@ def _fetch_package_index(rctx, url, dist, comp, arch, integrity): failed_attempts = [] - for (ext, cmd) in supported_extensions: - output = "{}/Packages{}".format(target_triple, ext) - dist_url = "{}/dists/{}/{}/binary-{}/Packages{}".format(url, dist, comp, arch, ext) - download = rctx.download( - url = dist_url, - output = output, - integrity = integrity, - allow_fail = True, - ) - decompress_r = None - if download.success: - decompress_r = rctx.execute(cmd + [output]) - if decompress_r.return_code == 0: - integrity = download.integrity - break + url = None + for url in urls: + download = None + for (ext, cmd) in supported_extensions: + output = "{}/Packages{}".format(target_triple, ext) + dist_url = "{}/dists/{}/{}/binary-{}/Packages{}".format(url, dist, comp, arch, ext) + download = rctx.download( + url = dist_url, + output = output, + integrity = integrity, + allow_fail = True, + ) + decompress_r = None + if download.success: + decompress_r = rctx.execute(cmd + [output]) + if decompress_r.return_code == 0: + integrity = download.integrity + break + + failed_attempts.append((dist_url, download, decompress_r)) - failed_attempts.append((dist_url, download, decompress_r)) + if download.success: + break - if len(failed_attempts) == len(supported_extensions): + if len(failed_attempts) == len(supported_extensions) * len(urls): attempt_messages = [] - for (url, download, decompress) in failed_attempts: + for (failed_url, download, decompress) in failed_attempts: reason = "unknown" if not download.success: reason = "Download failed. See warning above for details." elif decompress.return_code != 0: reason = "Decompression failed with non-zero exit code.\n\n{}\n{}".format(decompress.stderr, decompress.stdout) - attempt_messages.append("""\n*) Failed '{}'\n\n{}""".format(url, reason)) + attempt_messages.append("""\n*) Failed '{}'\n\n{}""".format(failed_url, reason)) fail(""" ** Tried to download {} different package indices and all failed. @@ -55,9 +61,9 @@ def _fetch_package_index(rctx, url, dist, comp, arch, integrity): {} """.format(len(failed_attempts), "\n".join(attempt_messages))) - return ("{}/Packages".format(target_triple), integrity) + return ("{}/Packages".format(target_triple), url, integrity) -def _parse_repository(state, contents, root): +def _parse_repository(state, contents, roots): last_key = "" pkg = {} for group in contents.split("\n\n"): @@ -86,7 +92,7 @@ def _parse_repository(state, contents, root): pkg[key] = value if len(pkg.keys()) != 0: - pkg["Root"] = root + pkg["Roots"] = roots _add_package(state, pkg) last_key = "" pkg = {} @@ -117,20 +123,20 @@ def _create(rctx, sources, archs): ) for arch in archs: - for (url, dist, comp) in sources: + for (urls, dist, comp) in sources: # We assume that `url` does not contain a trailing forward slash when passing to # functions below. If one is present, remove it. Some HTTP servers do not handle # redirects properly when a path contains "//" # (ie. https://mymirror.com/ubuntu//dists/noble/stable/... may return a 404 # on misconfigured HTTP servers) - url = url.rstrip("/") + urls = [url.rstrip("/") for url in urls] rctx.report_progress("Fetching package index: {}/{} for {}".format(dist, comp, arch)) - (output, _) = _fetch_package_index(rctx, url, dist, comp, arch, "") + (output, _, _) = _fetch_package_index(rctx, urls, dist, comp, arch, "") # TODO: this is expensive to perform. rctx.report_progress("Parsing package index: {}/{} for {}".format(dist, comp, arch)) - _parse_repository(state, rctx.read(output), url) + _parse_repository(state, rctx.read(output), urls) return struct( package_versions = lambda **kwargs: _package_versions(state, **kwargs), diff --git a/apt/private/deb_resolve.bzl b/apt/private/deb_resolve.bzl index 169c4fb..baadaed 100644 --- a/apt/private/deb_resolve.bzl +++ b/apt/private/deb_resolve.bzl @@ -44,8 +44,15 @@ def internal_resolve(rctx, yq_toolchain_prefix, manifest, include_transitive): for src in manifest["sources"]: distr, components = src["channel"].split(" ", 1) for comp in components.split(" "): + if "urls" in src: + urls = src["urls"] + elif "url" in src: + urls = [src["url"]] + else: + fail("Source missing 'url' or 'urls' field") + sources.append(( - src["url"], + urls, distr, comp, )) diff --git a/apt/private/deb_translate_lock.bzl b/apt/private/deb_translate_lock.bzl index c126236..80adb17 100644 --- a/apt/private/deb_translate_lock.bzl +++ b/apt/private/deb_translate_lock.bzl @@ -168,7 +168,7 @@ def _deb_translate_lock_impl(rctx): _DEB_IMPORT_TMPL.format( name = "%s_%s" % (rctx.attr.name, package_key), package_name = package["name"], - urls = [package["url"]], + urls = package["urls"], sha256 = package["sha256"], ), ) @@ -184,7 +184,7 @@ def _deb_translate_lock_impl(rctx): "//%s/%s" % (dep["name"], package["arch"]) for dep in package["dependencies"] ]), - urls = [package["url"]], + urls = package["urls"], name = package["name"], arch = package["arch"], sha256 = package["sha256"], diff --git a/apt/private/lockfile.bzl b/apt/private/lockfile.bzl index 09e2fd9..73b68f1 100644 --- a/apt/private/lockfile.bzl +++ b/apt/private/lockfile.bzl @@ -20,7 +20,10 @@ def _add_package(lock, package, arch): "key": k, "name": package["Package"], "version": package["Version"], - "url": "%s/%s" % (package["Root"], package["Filename"]), + "urls": [ + "%s/%s" % (root, package["Filename"]) + for root in package["Roots"] + ], "sha256": package["SHA256"], "arch": arch, "dependencies": [], @@ -71,6 +74,9 @@ def _from_json(rctx, content): fast_package_lookup = dict(), ) for (i, package) in enumerate(lock.packages): + if "url" in package: + package["urls"] = [package.pop("url")] + lock.packages[i] = package lock.fast_package_lookup[package["key"]] = i return _create(rctx, lock) diff --git a/e2e/smoke/bullseye.yaml b/e2e/smoke/bullseye.yaml index 0e89d4c..60384bf 100644 --- a/e2e/smoke/bullseye.yaml +++ b/e2e/smoke/bullseye.yaml @@ -11,12 +11,13 @@ version: 1 sources: - channel: bullseye main contrib - url: https://snapshot-cloudflare.debian.org/archive/debian/20240210T223313Z + urls: + - https://snapshot-cloudflare.debian.org/archive/debian/20240210T223313Z + - https://snapshot.debian.org/archive/debian/20240210T223313Z - channel: bullseye-security main url: https://snapshot-cloudflare.debian.org/archive/debian-security/20240210T223313Z - channel: bullseye-updates main url: https://snapshot-cloudflare.debian.org/archive/debian/20240210T223313Z/ - # This channel contains an uncompressed Packages file - channel: cloud-sdk main url: https://packages.cloud.google.com/apt diff --git a/examples/debian_snapshot/bullseye.yaml b/examples/debian_snapshot/bullseye.yaml index 7bac5e7..60384bf 100644 --- a/examples/debian_snapshot/bullseye.yaml +++ b/examples/debian_snapshot/bullseye.yaml @@ -11,7 +11,9 @@ version: 1 sources: - channel: bullseye main contrib - url: https://snapshot-cloudflare.debian.org/archive/debian/20240210T223313Z + urls: + - https://snapshot-cloudflare.debian.org/archive/debian/20240210T223313Z + - https://snapshot.debian.org/archive/debian/20240210T223313Z - channel: bullseye-security main url: https://snapshot-cloudflare.debian.org/archive/debian-security/20240210T223313Z - channel: bullseye-updates main