From 339a1791b1f6930c1bbe0c58bb670a0a62496f1b Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sun, 6 Aug 2023 09:29:28 -0400 Subject: [PATCH] fix reproducibility --- pex/common.py | 4 +++- pex/pex_builder.py | 6 +++--- tests/integration/test_reproducible.py | 3 +++ 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/pex/common.py b/pex/common.py index 898d71964..622eb6c3d 100644 --- a/pex/common.py +++ b/pex/common.py @@ -627,11 +627,13 @@ def zip( # If labels are provided, respect the given ordering, but still sort the files # within each label to get deterministic output. sorted(self.filesets.get(label, ())) + # NB: An iterable of labels with non-deterministic order is not reproducible! for label in labels ) ) else: - selected_files = OrderedSet(self.files()) + # Otherwise, sort the files to get reproducible output by default. + selected_files = OrderedSet(sorted(self.files())) compression = zipfile.ZIP_DEFLATED if compress else zipfile.ZIP_STORED with open_zip(filename, mode, compression) as zf: diff --git a/pex/pex_builder.py b/pex/pex_builder.py index 5290e3a4d..926796b46 100644 --- a/pex/pex_builder.py +++ b/pex/pex_builder.py @@ -719,7 +719,7 @@ def iter_bootstrap_script_labels(cls): def iter_metadata_labels(cls): # type: () -> Iterator[str] """``Chroot`` labels covering metadata files.""" - # PEX-INFO: This is accessed after unpacking the zip. + # PEX-INFO yield "manifest" @classmethod @@ -733,8 +733,8 @@ def iter_bootstrap_libs_labels(cls): def iter_deps_libs_labels(cls, pex_info): # type: (PexInfo) -> Iterator[str] """``Chroot`` labels covering the third-party code that was resolved into dists.""" - # Subdirectories of .deps: - for dist_label in pex_info.distributions.keys(): + # Subdirectories of .deps/: Keys need to be sorted for deterministic output. + for dist_label in sorted(pex_info.distributions.keys()): yield dist_label @classmethod diff --git a/tests/integration/test_reproducible.py b/tests/integration/test_reproducible.py index aab05c84b..b43970e7b 100644 --- a/tests/integration/test_reproducible.py +++ b/tests/integration/test_reproducible.py @@ -97,6 +97,9 @@ def explode_pex(path): ), "{} and {} have different content.".format(member1, member2) # Check that the entire file is equal, including metadata. assert filecmp.cmp(member1, member2, shallow=False) + # Check that the file list is identical. + with ZipFile(pex1) as zfp1, ZipFile(pex2) as zfp2: + assert zfp1.namelist() == zfp2.namelist() # Finally, check that the .pex files are byte-for-byte identical. assert filecmp.cmp(pex1, pex2, shallow=False)