Skip to content

Commit

Permalink
feat: support multiple URLs in sources
Browse files Browse the repository at this point in the history
Fixes #113

The Debian snapshot mirror fails quite often and, while the Cloudflare
mirror has had some issues in the past (e.g. it was lagging replication
for months) it's much more reliable.

This commit adds support for multiple URLs in the manifest and the
lockfile. See examples/debian_snapshot/bullseye.yaml for a sample usage.
  • Loading branch information
jjmaestro committed Jan 22, 2025
1 parent eb669b3 commit d6b2868
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 35 deletions.
2 changes: 1 addition & 1 deletion apt/extensions.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def _distroless_extension(module_ctx):

deb_import(
name = "%s_%s" % (install.name, package_key),
urls = [package["url"]],
urls = package["urls"],
sha256 = package["sha256"],
)

Expand Down
60 changes: 33 additions & 27 deletions apt/private/apt_deb_repository.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
load(":util.bzl", "util")
load(":version_constraint.bzl", "version_constraint")

def _fetch_package_index(rctx, url, dist, comp, arch, integrity):
def _fetch_package_index(rctx, urls, dist, comp, arch, integrity):
target_triple = "{dist}/{comp}/{arch}".format(dist = dist, comp = comp, arch = arch)

# See https://linux.die.net/man/1/xz , https://linux.die.net/man/1/gzip , and https://linux.die.net/man/1/bzip2
Expand All @@ -20,44 +20,50 @@ def _fetch_package_index(rctx, url, dist, comp, arch, integrity):

failed_attempts = []

for (ext, cmd) in supported_extensions:
output = "{}/Packages{}".format(target_triple, ext)
dist_url = "{}/dists/{}/{}/binary-{}/Packages{}".format(url, dist, comp, arch, ext)
download = rctx.download(
url = dist_url,
output = output,
integrity = integrity,
allow_fail = True,
)
decompress_r = None
if download.success:
decompress_r = rctx.execute(cmd + [output])
if decompress_r.return_code == 0:
integrity = download.integrity
break
url = None
for url in urls:
download = None
for (ext, cmd) in supported_extensions:
output = "{}/Packages{}".format(target_triple, ext)
dist_url = "{}/dists/{}/{}/binary-{}/Packages{}".format(url, dist, comp, arch, ext)
download = rctx.download(
url = dist_url,
output = output,
integrity = integrity,
allow_fail = True,
)
decompress_r = None
if download.success:
decompress_r = rctx.execute(cmd + [output])
if decompress_r.return_code == 0:
integrity = download.integrity
break

failed_attempts.append((dist_url, download, decompress_r))

failed_attempts.append((dist_url, download, decompress_r))
if download.success:
break

if len(failed_attempts) == len(supported_extensions):
if len(failed_attempts) == len(supported_extensions) * len(urls):
attempt_messages = []
for (url, download, decompress) in failed_attempts:
for (failed_url, download, decompress) in failed_attempts:
reason = "unknown"
if not download.success:
reason = "Download failed. See warning above for details."
elif decompress.return_code != 0:
reason = "Decompression failed with non-zero exit code.\n\n{}\n{}".format(decompress.stderr, decompress.stdout)

attempt_messages.append("""\n*) Failed '{}'\n\n{}""".format(url, reason))
attempt_messages.append("""\n*) Failed '{}'\n\n{}""".format(failed_url, reason))

fail("""
** Tried to download {} different package indices and all failed.
{}
""".format(len(failed_attempts), "\n".join(attempt_messages)))

return ("{}/Packages".format(target_triple), integrity)
return ("{}/Packages".format(target_triple), url, integrity)

def _parse_repository(state, contents, root):
def _parse_repository(state, contents, roots):
last_key = ""
pkg = {}
for group in contents.split("\n\n"):
Expand Down Expand Up @@ -86,7 +92,7 @@ def _parse_repository(state, contents, root):
pkg[key] = value

if len(pkg.keys()) != 0:
pkg["Root"] = root
pkg["Roots"] = roots
_add_package(state, pkg)
last_key = ""
pkg = {}
Expand Down Expand Up @@ -117,20 +123,20 @@ def _create(rctx, sources, archs):
)

for arch in archs:
for (url, dist, comp) in sources:
for (urls, dist, comp) in sources:
# We assume that `url` does not contain a trailing forward slash when passing to
# functions below. If one is present, remove it. Some HTTP servers do not handle
# redirects properly when a path contains "//"
# (ie. https://mymirror.com/ubuntu//dists/noble/stable/... may return a 404
# on misconfigured HTTP servers)
url = url.rstrip("/")
urls = [url.rstrip("/") for url in urls]

rctx.report_progress("Fetching package index: {}/{} for {}".format(dist, comp, arch))
(output, _) = _fetch_package_index(rctx, url, dist, comp, arch, "")
(output, _, _) = _fetch_package_index(rctx, urls, dist, comp, arch, "")

# TODO: this is expensive to perform.
rctx.report_progress("Parsing package index: {}/{} for {}".format(dist, comp, arch))
_parse_repository(state, rctx.read(output), url)
_parse_repository(state, rctx.read(output), urls)

return struct(
package_versions = lambda **kwargs: _package_versions(state, **kwargs),
Expand Down
9 changes: 8 additions & 1 deletion apt/private/deb_resolve.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,15 @@ def internal_resolve(rctx, yq_toolchain_prefix, manifest, include_transitive):
for src in manifest["sources"]:
distr, components = src["channel"].split(" ", 1)
for comp in components.split(" "):
if "urls" in src:
urls = src["urls"]
elif "url" in src:
urls = [src["url"]]
else:
fail("Source missing 'url' or 'urls' field")

sources.append((
src["url"],
urls,
distr,
comp,
))
Expand Down
4 changes: 2 additions & 2 deletions apt/private/deb_translate_lock.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def _deb_translate_lock_impl(rctx):
_DEB_IMPORT_TMPL.format(
name = "%s_%s" % (rctx.attr.name, package_key),
package_name = package["name"],
urls = [package["url"]],
urls = package["urls"],
sha256 = package["sha256"],
),
)
Expand All @@ -184,7 +184,7 @@ def _deb_translate_lock_impl(rctx):
"//%s/%s" % (dep["name"], package["arch"])
for dep in package["dependencies"]
]),
urls = [package["url"]],
urls = package["urls"],
name = package["name"],
arch = package["arch"],
sha256 = package["sha256"],
Expand Down
8 changes: 7 additions & 1 deletion apt/private/lockfile.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ def _add_package(lock, package, arch):
"key": k,
"name": package["Package"],
"version": package["Version"],
"url": "%s/%s" % (package["Root"], package["Filename"]),
"urls": [
"%s/%s" % (root, package["Filename"])
for root in package["Roots"]
],
"sha256": package["SHA256"],
"arch": arch,
"dependencies": [],
Expand Down Expand Up @@ -71,6 +74,9 @@ def _from_json(rctx, content):
fast_package_lookup = dict(),
)
for (i, package) in enumerate(lock.packages):
if "url" in package:
package["urls"] = [package.pop("url")]

lock.packages[i] = package
lock.fast_package_lookup[package["key"]] = i
return _create(rctx, lock)
Expand Down
5 changes: 3 additions & 2 deletions e2e/smoke/bullseye.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@ version: 1

sources:
- channel: bullseye main contrib
url: https://snapshot-cloudflare.debian.org/archive/debian/20240210T223313Z
urls:
- https://snapshot-cloudflare.debian.org/archive/debian/20240210T223313Z
- https://snapshot.debian.org/archive/debian/20240210T223313Z
- channel: bullseye-security main
url: https://snapshot-cloudflare.debian.org/archive/debian-security/20240210T223313Z
- channel: bullseye-updates main
url: https://snapshot-cloudflare.debian.org/archive/debian/20240210T223313Z/
# This channel contains an uncompressed Packages file
- channel: cloud-sdk main
url: https://packages.cloud.google.com/apt

Expand Down
4 changes: 3 additions & 1 deletion examples/debian_snapshot/bullseye.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ version: 1

sources:
- channel: bullseye main contrib
url: https://snapshot-cloudflare.debian.org/archive/debian/20240210T223313Z
urls:
- https://snapshot-cloudflare.debian.org/archive/debian/20240210T223313Z
- https://snapshot.debian.org/archive/debian/20240210T223313Z
- channel: bullseye-security main
url: https://snapshot-cloudflare.debian.org/archive/debian-security/20240210T223313Z
- channel: bullseye-updates main
Expand Down

0 comments on commit d6b2868

Please sign in to comment.