diff --git a/CHANGELOG.md b/CHANGELOG.md index a451e4cb..c05c8ce7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,19 +7,20 @@ and this project adheres to [Semantic Versioning][semver]. ## [Unreleased] - ### Added - ### Changed +- Implement updater pipeline ([374]) +- Improve error messages and error logging ([374]) +- Update target repositories in a breadth-first way ([374]) ### Fixed +[374]: https://github.com/openlawlibrary/taf/pull/374 ## [0.28.0] - 11/10/2023 - ### Added - Implement tests for the functions which are directly called by the cli (API package) ([362]) @@ -83,9 +84,7 @@ and this project adheres to [Semantic Versioning][semver]. [357]: https://github.com/openlawlibrary/taf/pull/357 [355]: https://github.com/openlawlibrary/taf/pull/355 [354]: https://github.com/openlawlibrary/taf/pull/354 -[353]: https://github.com/openlawlibrary/taf/pull/353 [352]: https://github.com/openlawlibrary/taf/pull/352 -[351]: https://github.com/openlawlibrary/taf/pull/351 [349]: https://github.com/openlawlibrary/taf/pull/349 [346]: https://github.com/openlawlibrary/taf/pull/346 [343]: https://github.com/openlawlibrary/taf/pull/343 @@ -107,7 +106,6 @@ and this project adheres to [Semantic Versioning][semver]. ### Fixed -[349]: https://github.com/openlawlibrary/taf/pull/349 ## [0.26.0] - 07/12/2023 @@ -126,7 +124,6 @@ and this project adheres to [Semantic Versioning][semver]. - Fix create repository ([325]) - [325]: https://github.com/openlawlibrary/taf/pull/325 [321]: https://github.com/openlawlibrary/taf/pull/321 [320]: https://github.com/openlawlibrary/taf/pull/320 @@ -144,11 +141,9 @@ and this project adheres to [Semantic Versioning][semver]. - Fix execution of scripts ([311]) - [313]: https://github.com/openlawlibrary/taf/pull/313 [311]: https://github.com/openlawlibrary/taf/pull/311 - ## [0.24.0] - 02/21/2023 ### Added @@ -162,7 +157,6 @@ and this project adheres to [Semantic Versioning][semver]. - Use `generate_and_write_unencrypted_rsa_keypair` for no provided password ([305]) - [309]: https://github.com/openlawlibrary/taf/pull/309 [308]: https://github.com/openlawlibrary/taf/pull/308 [305]: https://github.com/openlawlibrary/taf/pull/305 @@ -174,8 +168,8 @@ and this project adheres to [Semantic Versioning][semver]. ### Changed ### Fixed -- Fix `clone_or_pull` method ([303]) +- Fix `clone_or_pull` method ([303]) [303]: https://github.com/openlawlibrary/taf/pull/303 @@ -201,6 +195,7 @@ and this project adheres to [Semantic Versioning][semver]. ### Changed ### Fixed + - Pin `pyOpenSSL` to newer version ([299]) [299]: https://github.com/openlawlibrary/taf/pull/299 @@ -212,7 +207,8 @@ and this project adheres to [Semantic Versioning][semver]. ### Changed ### Fixed - - Add missing tuf import in `log.py` ([298]) + +- Add missing tuf import in `log.py` ([298]) [298]: https://github.com/openlawlibrary/taf/pull/298 @@ -226,17 +222,14 @@ and this project adheres to [Semantic Versioning][semver]. - Remove _tuf_patches in `__init__.py` ([297]) - [297]: https://github.com/openlawlibrary/taf/pull/297 ### Added ### Changed - ### Fixed - ## [0.22.1] - 12/14/2022 ### Added @@ -247,10 +240,8 @@ and this project adheres to [Semantic Versioning][semver]. - Move _tuf_patches to repository lib ([296]) - [296]: https://github.com/openlawlibrary/taf/pull/296 - ## [0.22.0] - 12/09/2022 ### Added @@ -276,7 +267,6 @@ and this project adheres to [Semantic Versioning][semver]. - Fix `all_commits_since_commit` to validate provided commit ([278]) - Remove pin for `PyOpenSSL` ([273]) - [294]: https://github.com/openlawlibrary/taf/pull/294 [293]: https://github.com/openlawlibrary/taf/pull/293 [292]: https://github.com/openlawlibrary/taf/pull/292 @@ -1043,8 +1033,7 @@ and this project adheres to [Semantic Versioning][semver]. [0.13.2]: https://github.com/openlawlibrary/taf/compare/v0.13.1...v0.13.2 [0.13.1]: https://github.com/openlawlibrary/taf/compare/v0.13.0...v0.13.1 [0.13.0]: https://github.com/openlawlibrary/taf/compare/v0.12.0...v0.13.0 -[0.12.0]: https://github.com/openlawlibrary/taf/compare/v0.11.2...v0.12.0 -[0.11.1]: https://github.com/openlawlibrary/taf/compare/v0.11.1...v0.11.2 +[0.12.0]: https://github.com/openlawlibrary/taf/compare/v0.11.1...v0.12.0 [0.11.1]: https://github.com/openlawlibrary/taf/compare/v0.11.0...v0.11.1 [0.11.0]: https://github.com/openlawlibrary/taf/compare/v0.10.1...v0.11.0 [0.10.1]: https://github.com/openlawlibrary/taf/compare/v0.10.0...v0.10.1 diff --git a/taf/auth_repo.py b/taf/auth_repo.py index fdcb0554..a1334902 100644 --- a/taf/auth_repo.py +++ b/taf/auth_repo.py @@ -3,7 +3,7 @@ import tempfile import fnmatch -from typing import Callable, Dict, List, Optional, Union +from typing import Any, Callable, Dict, List, Optional, Union from collections import defaultdict from contextlib import contextmanager from pathlib import Path @@ -36,6 +36,7 @@ def __init__( conf_directory_root: Optional[str] = None, out_of_band_authentication: Optional[str] = None, path: Optional[Union[Path, str]] = None, + alias: Optional[str] = None, *args, **kwargs, ): @@ -51,6 +52,7 @@ def __init__( custom (dict): a dictionary containing other data default_branch (str): repository's default branch, automatically determined if not specified out_of_band_authentication (str): manually specified initial commit + alias: Repository's alias, which will be used in logging statements to reference it """ super().__init__( library_dir, @@ -60,6 +62,7 @@ def __init__( default_branch, allow_unsafe, path, + alias, *args, **kwargs, ) @@ -142,6 +145,8 @@ def last_validated_commit(self) -> Optional[str]: @property def log_prefix(self) -> str: + if self.alias: + return f"{self.alias}: " return f"Auth repo {self.name}: " def get_target(self, target_name, commit=None, safely=True) -> Optional[Dict]: @@ -213,6 +218,61 @@ def set_last_validated_commit(self, commit: str): self._log_debug(f"setting last validated commit to: {commit}") Path(self.conf_dir, self.LAST_VALIDATED_FILENAME).write_text(commit) + def targets_data_by_auth_commits( + self, + commits: List[str], + target_repos: Optional[List[str]] = None, + custom_fns: Optional[Dict[str, Callable]] = None, + default_branch: Optional[str] = None, + excluded_target_globs: Optional[List[str]] = None, + ) -> Dict[str, Dict[str, Dict[str, Any]]]: + """ + Return a dictionary where each target repository has associated authentication commits, + and for each authentication commit, there's a dictionary of the branch, commit and custom data. + + { + 'target_repo1': { + 'auth_commit1': {'branch': 'branch1', 'commit': 'commit1', 'custom': {}}, + 'auth_commit2': {'branch': 'branch1', 'commit': 'commit2', 'custom': {}}, + ... + }, + 'target_repo2': { + ... + }, + ... + } + + """ + repositories_commits: Dict[str, Dict[str, Dict[str, Any]]] = {} + targets = self.targets_at_revisions( + *commits, target_repos=target_repos, default_branch=default_branch + ) + excluded_target_globs = excluded_target_globs or [] + for commit in commits: + for target_path, target_data in targets[commit].items(): + if any( + fnmatch.fnmatch(target_path, excluded_target_glob) + for excluded_target_glob in excluded_target_globs + ): + continue + + target_branch = target_data.get("branch") + target_commit = target_data.get("commit") + target_data.setdefault("custom", {}) + if custom_fns is not None and target_path in custom_fns: + target_data["custom"].update(custom_fns[target_path](target_commit)) + + repositories_commits.setdefault(target_path, {})[commit] = { + "branch": target_branch, + "commit": target_commit, + "custom": target_data.get("custom"), + } + + self._log_debug( + f"new commits per repositories according to target files: {repositories_commits}" + ) + return repositories_commits + def sorted_commits_and_branches_per_repositories( self, commits: List[str], @@ -220,7 +280,7 @@ def sorted_commits_and_branches_per_repositories( custom_fns: Optional[Dict[str, Callable]] = None, default_branch: Optional[str] = None, excluded_target_globs: Optional[List[str]] = None, - ) -> Dict[str, Dict[str, List[Dict]]]: + ) -> Dict[str, Dict[str, List[Dict[str, Any]]]]: """Return a dictionary consisting of branches and commits belonging to it for every target repository: { diff --git a/taf/git.py b/taf/git.py index ca554628..49e7364c 100644 --- a/taf/git.py +++ b/taf/git.py @@ -1,4 +1,5 @@ from __future__ import annotations +import datetime import json import itertools import os @@ -36,6 +37,7 @@ def __init__( default_branch: Optional[str] = None, allow_unsafe: Optional[bool] = False, path: Optional[Union[Path, str]] = None, + alias: Optional[str] = None, *args, **kwargs, ): @@ -52,6 +54,7 @@ def __init__( default_branch (str): repository's default branch, automatically determined if not specified allow_unsafe: allow a git's security mechanism which prevents execution of git commands if the containing directory is owned by a different user to be ignored + alias: Repository's alias, which will be used in logging statements to reference it """ if isinstance(library_dir, str): library_dir = Path(library_dir) @@ -62,7 +65,6 @@ def __init__( raise InvalidRepositoryError( "Both library_dir and name need to be specified" ) - if name is not None and library_dir is not None: self.name = self._validate_repo_name(name) self.path = self._validate_repo_path(library_dir, name, path) @@ -88,6 +90,7 @@ def __init__( self.library_dir = self.library_dir.resolve() self.path = self._validate_repo_path(path) + self.alias = alias self.urls = self._validate_urls(urls) self.allow_unsafe = allow_unsafe self.custom = custom or {} @@ -167,6 +170,8 @@ def initial_commit(self) -> str: @property def log_prefix(self) -> str: + if self.alias: + return f"{self.alias}: " return f"Repo {self.name}: " @property @@ -241,7 +246,12 @@ def _get_default_branch_from_local(self) -> str: branch = self._git("symbolic-ref HEAD --short", reraise_error=True) return branch - def _get_default_branch_from_remote(self, url: str) -> str: + def _get_default_branch_from_remote(self, url: str) -> Optional[str]: + if not self.is_git_repository: + self._log_debug( + "Repository does not exist. Could not determined default branch from remote" + ) + return None branch = self._git( f"ls-remote --symref {url} HEAD", log_error=True, @@ -283,7 +293,7 @@ def all_commits_on_branch( ) if branch: branch_obj = repo.branches.get(branch) - if branch is None: + if branch_obj is None: raise GitError( self, message=f"Error occurred while getting commits of branch {branch}. Branch does not exist", @@ -307,8 +317,9 @@ def all_commits_since_commit( specified or currently checked out branch Raises: - exceptions.GitError: An error occured with provided commit SHA + exceptions.GitError: An error occurred with provided commit SHA """ + if since_commit is None: return self.all_commits_on_branch(branch=branch, reverse=reverse) @@ -798,6 +809,20 @@ def delete_remote_branch( remote = self.remotes[0] self._git(f"push {remote} --delete {branch_name}", log_error=True) + def get_commit_date(self, commit_sha: str) -> str: + """Returns commit date of the given commit""" + repo = self.pygit_repo + if repo is None: + raise GitError( + "Could not get commit message. pygit repository could not be instantiated." + ) + commit = repo.get(commit_sha) + date = datetime.datetime.utcfromtimestamp( + commit.commit_time + commit.commit_time_offset + ) + formatted_date = date.strftime("%Y-%m-%d") + return formatted_date + def get_commit_message(self, commit_sha: str) -> str: """Returns commit message of the given commit""" repo = self.pygit_repo @@ -812,7 +837,7 @@ def get_commit_sha(self, behind_head: str) -> str: """Get commit sha of HEAD~{behind_head}""" return self._git("rev-parse HEAD~{}", behind_head) - def get_default_branch(self, url: Optional[str] = None) -> str: + def get_default_branch(self, url: Optional[str] = None) -> Optional[str]: """Get the default branch of the repository. If url is provided, return the default branch from the remote. Otherwise, return the default branch from the local repository.""" @@ -1290,7 +1315,9 @@ def _determine_default_branch(self) -> Optional[str]: # try to get the default branch from the local repository errors = [] try: - return self.get_default_branch() + branch = self.get_default_branch() + if branch is not None: + return branch except GitError as e: errors.append(e) pass diff --git a/taf/tests/test_api/test_create_repository.py b/taf/tests/test_api/test_create_repository.py index 326ff700..fbf011c9 100644 --- a/taf/tests/test_api/test_create_repository.py +++ b/taf/tests/test_api/test_create_repository.py @@ -120,4 +120,5 @@ def test_create_repository_when_add_repositories_json( for role in ("targets", "delegated_role", "inner_role"): assert role in targets_roles check_if_targets_signed(auth_repo, "targets", "repositories.json", "mirrors.json") - validate_repository(repo_path) + # we are not validating target repositories, just the authentication repository + validate_repository(repo_path, excluded_target_globs="*") diff --git a/taf/tests/test_updater/test_repo_update/test_updater.py b/taf/tests/test_updater/test_repo_update/test_updater.py index eeeafa7d..d463e7fc 100644 --- a/taf/tests/test_updater/test_repo_update/test_updater.py +++ b/taf/tests/test_updater/test_repo_update/test_updater.py @@ -71,9 +71,14 @@ AUTH_REPO_REL_PATH = "organization/auth_repo" TARGET_REPO_REL_PATH = "namespace/TargetRepo1" -TARGET1_SHA_MISMATCH = "Mismatch between target commits specified in authentication repository and target repository namespace/TargetRepo1" -TARGET2_SHA_MISMATCH = "Mismatch between target commits specified in authentication repository and target repository namespace/TargetRepo2" -TARGETS_MISMATCH_ANY = "Mismatch between target commits specified in authentication repository and target repository" + +TARGET_MISSMATCH_PATTERN = r"Update of organization\/auth_repo failed due to error: Failure to validate organization\/auth_repo commit ([0-9a-f]{40}) committed on (\d{4}-\d{2}-\d{2}): data repository ([\w\/-]+) was supposed to be at commit ([0-9a-z]{40}) but repo was at ([0-9a-f]{40})" +TARGET_ADDITIONAL_COMMIT = r"Update of organization\/auth_repo failed due to error: Failure to validate organization\/auth_repo commit ([0-9a-f]{40}) committed on (\d{4}-\d{2}-\d{2}): data repository ([\w\/-]+) was supposed to be at commit ([0-9a-f]{40}) but commit not on branch (\w+)" +TARGET_COMMIT_AFTER_LAST_VALIDATED = r"Update of organization\/auth_repo failed due to error: Target repository ([\w\/-]+) does not allow unauthenticated commits, but contains commit\(s\) ([0-9a-f]{40}(?:, [0-9a-f]{40})*) on branch (\w+)" +TARGET_MISSING_COMMIT = r"Update of organization/auth_repo failed due to error: Failure to validate organization/auth_repo commit ([0-9a-f]{40}) committed on (\d{4}-\d{2}-\d{2}): data repository ([\w\/-]+) was supposed to be at commit ([0-9a-f]{40}) but commit not on branch (\w+)" +INDEX_LOCKED = r"Update of organization/auth_repo failed due to error: Repo ([\w\/-]+): the index is locked; this might be due to a concurrent or crashed process" + + NO_WORKING_MIRRORS = ( f"Validation of authentication repository {AUTH_REPO_REL_PATH} failed at revision" ) @@ -239,26 +244,57 @@ def test_no_update_necessary( @pytest.mark.parametrize( - "test_name, expected_error, auth_repo_name_exists", + "test_name, expected_error, auth_repo_name_exists, expect_partial_update, should_last_validated_exist", [ - ("test-updater-invalid-target-sha", TARGET1_SHA_MISMATCH, True), - ("test-updater-additional-target-commit", TARGET1_SHA_MISMATCH, True), - ("test-updater-missing-target-commit", TARGET1_SHA_MISMATCH, True), - ("test-updater-wrong-key", NO_WORKING_MIRRORS, True), - ("test-updater-invalid-version-number", REPLAYED_METADATA, True), - ("test-updater-delegated-roles-wrong-sha", TARGET2_SHA_MISMATCH, True), - ("test-updater-updated-root-n-root-missing", NO_WORKING_MIRRORS, True), - ("test-updater-updated-root-invalid-metadata", NO_WORKING_MIRRORS, True), - ("test-updater-info-missing", NO_REPOSITORY_INFO_JSON, False), + ("test-updater-invalid-target-sha", TARGET_MISSMATCH_PATTERN, True, True, True), + ( + "test-updater-additional-target-commit", + TARGET_COMMIT_AFTER_LAST_VALIDATED, + True, + True, + True, + ), + ( + "test-updater-missing-target-commit", + TARGET_ADDITIONAL_COMMIT, + True, + True, + True, + ), + ("test-updater-wrong-key", NO_WORKING_MIRRORS, True, True, False), + ("test-updater-invalid-version-number", REPLAYED_METADATA, True, True, False), + ( + "test-updater-delegated-roles-wrong-sha", + TARGET_MISSMATCH_PATTERN, + True, + True, + True, + ), + ( + "test-updater-updated-root-invalid-metadata", + NO_WORKING_MIRRORS, + True, + True, + False, + ), + ("test-updater-info-missing", NO_REPOSITORY_INFO_JSON, False, True, False), ( "test-updater-invalid-snapshot-meta-field-missing", METADATA_FIELD_MISSING, False, + True, + False, ), ], ) def test_updater_invalid_update( - test_name, expected_error, auth_repo_name_exists, updater_repositories, client_dir + test_name, + expected_error, + auth_repo_name_exists, + updater_repositories, + client_dir, + expect_partial_update, + should_last_validated_exist, ): repositories = updater_repositories[test_name] clients_auth_repo_path = client_dir / AUTH_REPO_REL_PATH @@ -267,17 +303,20 @@ def test_updater_invalid_update( client_dir, repositories, expected_error, + expect_partial_update, auth_repo_name_exists=auth_repo_name_exists, ) # make sure that the last validated commit does not exist - _check_if_last_validated_commit_exists(clients_auth_repo_path) + _check_if_last_validated_commit_exists( + clients_auth_repo_path, should_last_validated_exist + ) @pytest.mark.parametrize( "test_name, expected_error", [ - ("test-updater-invalid-target-sha", TARGET1_SHA_MISMATCH), - ("test-updater-missing-target-commit", TARGET1_SHA_MISMATCH), + ("test-updater-invalid-target-sha", TARGET_MISSMATCH_PATTERN), + ("test-updater-missing-target-commit", TARGET_MISSING_COMMIT), ], ) def test_valid_update_no_auth_repo_one_invalid_target_repo_exists( @@ -288,10 +327,10 @@ def test_valid_update_no_auth_repo_one_invalid_target_repo_exists( origin_dir = origin_dir / test_name _clone_client_repo(TARGET_REPO_REL_PATH, origin_dir, client_dir) _update_invalid_repos_and_check_if_repos_exist( - client_dir, repositories, expected_error + client_dir, repositories, expected_error, True ) # make sure that the last validated commit does not exist - _check_if_last_validated_commit_exists(clients_auth_repo_path) + _check_if_last_validated_commit_exists(clients_auth_repo_path, True) def test_updater_expired_metadata(updater_repositories, origin_dir, client_dir): @@ -303,17 +342,17 @@ def test_updater_expired_metadata(updater_repositories, origin_dir, client_dir): repositories = updater_repositories["test-updater-expired-metadata"] clients_auth_repo_path = client_dir / AUTH_REPO_REL_PATH _update_invalid_repos_and_check_if_repos_exist( - client_dir, repositories, ROOT_EXPIRED, set_time=False, strict=True + client_dir, repositories, ROOT_EXPIRED, False, set_time=False, strict=True ) # make sure that the last validated commit does not exist - _check_if_last_validated_commit_exists(clients_auth_repo_path) + _check_if_last_validated_commit_exists(clients_auth_repo_path, False) @pytest.mark.parametrize( "test_name, num_of_commits_to_revert, expected_error", [ - ("test-updater-invalid-target-sha", 1, TARGET1_SHA_MISMATCH), - ("test-updater-delegated-roles-wrong-sha", 4, TARGET2_SHA_MISMATCH), + ("test-updater-invalid-target-sha", 1, TARGET_MISSMATCH_PATTERN), + ("test-updater-delegated-roles-wrong-sha", 4, TARGET_MISSMATCH_PATTERN), ], ) def test_updater_invalid_target_sha_existing_client_repos( @@ -344,15 +383,7 @@ def test_no_target_repositories(updater_repositories, origin_dir, client_dir): origin_dir = origin_dir / "test-updater-valid" client_auth_repo = _clone_client_repo(AUTH_REPO_REL_PATH, origin_dir, client_dir) _create_last_validated_commit(client_dir, client_auth_repo.head_commit_sha()) - client_repos = {AUTH_REPO_REL_PATH: client_auth_repo} - _update_invalid_repos_and_check_if_remained_same( - client_repos, client_dir, repositories, TARGETS_MISMATCH_ANY - ) - # make sure that the target repositories still do not exist - for repository_rel_path in repositories: - if repository_rel_path != AUTH_REPO_REL_PATH: - client_repo_path = client_dir / repository_rel_path - assert client_repo_path.exists() is False + _update_and_check_commit_shas(None, repositories, origin_dir, client_dir, False) def test_no_last_validated_commit(updater_repositories, origin_dir, client_dir): @@ -389,7 +420,7 @@ def test_update_test_repo_no_flag(updater_repositories, origin_dir, client_dir): origin_dir = origin_dir / "test-updater-test-repo" # try to update a test repo, set update type to official _update_invalid_repos_and_check_if_repos_exist( - client_dir, repositories, IS_A_TEST_REPO, UpdateType.OFFICIAL + client_dir, repositories, IS_A_TEST_REPO, False, UpdateType.OFFICIAL ) @@ -398,7 +429,7 @@ def test_update_repo_wrong_flag(updater_repositories, origin_dir, client_dir): origin_dir = origin_dir / "test-updater-valid" # try to update without setting the last validated commit _update_invalid_repos_and_check_if_repos_exist( - client_dir, repositories, NOT_A_TEST_REPO, UpdateType.TEST + client_dir, repositories, NOT_A_TEST_REPO, False, UpdateType.TEST ) @@ -421,7 +452,7 @@ def test_update_repo_target_in_indeterminate_state( index_lock.touch() _update_invalid_repos_and_check_if_repos_exist( - client_dir, repositories, UNCOMMITTED_TARGET_CHANGES + client_dir, repositories, INDEX_LOCKED, True ) @@ -433,10 +464,16 @@ def _check_last_validated_commit(clients_auth_repo_path): assert head_sha == last_validated_commit -def _check_if_last_validated_commit_exists(clients_auth_repo_path): +def _check_if_last_validated_commit_exists(clients_auth_repo_path, should_exist): client_auth_repo = AuthenticationRepository(path=clients_auth_repo_path) last_validated_commit = client_auth_repo.last_validated_commit - assert last_validated_commit is None + if not should_exist: + assert last_validated_commit is None + else: + assert ( + client_auth_repo.top_commit_of_branch(client_auth_repo.default_branch) + == last_validated_commit + ) def _check_if_commits_match( @@ -619,6 +656,7 @@ def _update_invalid_repos_and_check_if_repos_exist( client_dir, repositories, expected_error, + expect_partial_update, expected_repo_type=UpdateType.EITHER, set_time=True, auth_repo_name_exists=True, @@ -650,10 +688,11 @@ def _update_expect_error(client_dir, expected_repo_type): else: _update_expect_error(client_dir, expected_repo_type) - # the client repositories should not exits - for repository_rel_path in repositories: - path = client_dir / repository_rel_path - if str(path) in repositories_which_existed: - assert path.exists() - else: - assert not path.exists() + if not expect_partial_update: + # the client repositories should not exits + for repository_rel_path in repositories: + path = client_dir / repository_rel_path + if str(path) in repositories_which_existed: + assert path.exists() + else: + assert not path.exists() diff --git a/taf/tools/repo/__init__.py b/taf/tools/repo/__init__.py index 933e07fc..4be515b0 100644 --- a/taf/tools/repo/__init__.py +++ b/taf/tools/repo/__init__.py @@ -1,6 +1,8 @@ import click import json from taf.api.repository import create_repository +from taf.exceptions import TAFError, UpdateFailedError +from taf.tools.cli import catch_cli_exception from taf.updater.updater import update_repository, validate_repository, UpdateType @@ -11,6 +13,7 @@ def repo(): pass @repo.command() + @catch_cli_exception(handle=TAFError) @click.option("--path", default=".", help="Authentication repository's location. If not specified, set to the current directory") @click.option("--keys-description", help="A dictionary containing information about the " "keys or a path to a json file which stores the needed information") @@ -71,6 +74,7 @@ def create(path, keys_description, keystore, no_commit, test): ) @repo.command() + @catch_cli_exception(handle=UpdateFailedError) @click.option("--path", default=".", help="Authentication repository's location. If not specified, set to the current directory") @click.option("--url", default=None, help="Authentication repository's url") @click.option("--clients-library-dir", default=None, help="Directory where target repositories and, " diff --git a/taf/updater/handlers.py b/taf/updater/handlers.py index 1c9bb631..fae5812c 100644 --- a/taf/updater/handlers.py +++ b/taf/updater/handlers.py @@ -182,6 +182,20 @@ def _init_commits(self): else: raise e + # check if the last validated commit exists in the remote repository + # last_successful_commit could've been manually update to an invalid value + # or set to a commit that exists in the local authentication repository + # that was not pushed + branches_containing_last_validated_commit = ( + self.validation_auth_repo.branches_containing_commit(last_validated_commit) + ) + default_branch = self.validation_auth_repo.default_branch + if default_branch not in branches_containing_last_validated_commit: + msg = f"""Last validated commit not on the {default_branch} of the authentication repository. +This could mean that the a commit was removed from the remote repository or that the last_validated_commit file was manually updated.""" + taf_logger.error(msg) + raise UpdateFailedError(msg) + # Check if the user's head commit matches the saved one # That should always be the case # If it is not, it means that someone, accidentally or maliciously made manual changes diff --git a/taf/updater/lifecycle_handlers.py b/taf/updater/lifecycle_handlers.py index f1a5f625..ba5ec504 100644 --- a/taf/updater/lifecycle_handlers.py +++ b/taf/updater/lifecycle_handlers.py @@ -29,6 +29,7 @@ class Event(enum.Enum): CHANGED = "changed" UNCHANGED = "unchanged" FAILED = "failed" + PARTIAL = "partial" COMPLETED = "completed" @@ -103,9 +104,12 @@ def _execute_scripts(repos_and_data, lifecycle_stage, event): for script_repo, script_data in repos_and_data.items(): data = script_data["data"] last_commit = script_data["commit"] - repos_and_data[script_repo]["data"] = execute_scripts( - script_repo, last_commit, scripts_rel_path, data, scripts_root_dir - ) + # there is no reason to try executing the scripts if last_commit is None + # that means that update was not even starterd + if last_commit is not None: + repos_and_data[script_repo]["data"] = execute_scripts( + script_repo, last_commit, scripts_rel_path, data, scripts_root_dir + ) return repos_and_data if event in (Event.CHANGED, Event.UNCHANGED, Event.SUCCEEDED): @@ -154,7 +158,6 @@ def execute_scripts(auth_repo, last_commit, scripts_rel_path, data, scripts_root # load from filesystem in development mode so that the scripts can be updated without # having to commit and push development_mode = settings.development_mode - if development_mode: if scripts_root_dir is not None: path = Path(scripts_root_dir) / auth_repo.name / scripts_rel_path diff --git a/taf/updater/types/update.py b/taf/updater/types/update.py index fb3a6a28..cdaf22ab 100644 --- a/taf/updater/types/update.py +++ b/taf/updater/types/update.py @@ -1,3 +1,4 @@ +import enum from attrs import define, field from typing import Dict @@ -9,3 +10,9 @@ class Update: error_msg: str = field(default="") auth_repos: Dict = field(factory=dict) auth_repo_name: str = field(default="") + + +class UpdateType(enum.Enum): + TEST = "test" + OFFICIAL = "official" + EITHER = "either" diff --git a/taf/updater/updater.py b/taf/updater/updater.py index ec7e7864..688937ee 100644 --- a/taf/updater/updater.py +++ b/taf/updater/updater.py @@ -1,29 +1,25 @@ import json -import shutil -import enum -import tempfile +from logging import ERROR from typing import Dict, Tuple, Any +from logdecorator import log_on_error from taf.git import GitRepository +from taf.updater.types.update import UpdateType +from taf.updater.updater_pipeline import ( + AuthenticationRepositoryUpdatePipeline, + _merge_commit, +) -from tuf.ngclient.updater import Updater -from tuf.repository_tool import TARGETS_DIRECTORY_NAME - -from collections import defaultdict from pathlib import Path from taf.log import taf_logger, disable_tuf_console_logging import taf.repositoriesdb as repositoriesdb -from taf.auth_repo import AuthenticationRepository from taf.utils import timed_run import taf.settings as settings from taf.exceptions import ( ScriptExecutionError, UpdateFailedError, - GitError, ValidationFailedError, ) -from taf.updater.handlers import GitUpdater -from taf.utils import on_rm_error from taf.updater.lifecycle_handlers import ( handle_repo_event, @@ -32,19 +28,10 @@ ) from cattr import unstructure -EXPIRED_METADATA_ERROR = "ExpiredMetadataError" -PROTECTED_DIRECTORY_NAME = "protected" -INFO_JSON_PATH = f"{TARGETS_DIRECTORY_NAME}/{PROTECTED_DIRECTORY_NAME}/info.json" disable_tuf_console_logging() -class UpdateType(enum.Enum): - TEST = "test" - OFFICIAL = "official" - EITHER = "either" - - def _check_update_status(repos_update_data: Dict[str, Any]) -> Tuple[Event, str]: # helper function to set update status of update handler based on repo status. # if repo handlers event status changed, @@ -67,39 +54,6 @@ def _check_update_status(repos_update_data: Dict[str, Any]) -> Tuple[Event, str] return update_status, errors -def _clone_validation_repo(url, repository_name): - """ - Clones the authentication repository based on the url specified using the - mirrors parameter. The repository is cloned as a bare repository - to a the temp directory and will be deleted one the update is done. - - If repository_name isn't provided (default value), extract it from info.json. - """ - temp_dir = tempfile.mkdtemp() - path = Path(temp_dir, "auth_repo").absolute() - validation_auth_repo = AuthenticationRepository(path=path, urls=[url]) - validation_auth_repo.clone(bare=True) - validation_auth_repo.fetch(fetch_all=True) - - settings.validation_repo_path = validation_auth_repo.path - - validation_head_sha = validation_auth_repo.top_commit_of_branch( - validation_auth_repo.default_branch - ) - - if repository_name is None: - try: - info = validation_auth_repo.get_json(validation_head_sha, INFO_JSON_PATH) - repository_name = f'{info["namespace"]}/{info["name"]}' - except Exception: - raise UpdateFailedError( - "Error during info.json parse. When specifying --clients-library-dir check if info.json metadata exists in targets/protected or provide full path to auth repo" - ) - - validation_auth_repo.cleanup() - return repository_name - - def _execute_repo_handlers( update_status, auth_repo, @@ -170,6 +124,13 @@ def _reset_repository(repo, commits_data): _reset_repository(repo, branch_data) +@log_on_error( + ERROR, + "{e}", + logger=taf_logger, + on_exceptions=UpdateFailedError, + reraise=True, +) @timed_run("Updating repository") def update_repository( url, @@ -241,6 +202,7 @@ def update_repository( else None ) + taf_logger.info(f"Updating repository {auth_repo_name}") clients_auth_library_dir = clients_library_dir repos_update_data = {} transient_data = {} @@ -280,7 +242,7 @@ def update_repository( ) except Exception as e: root_error = UpdateFailedError( - f"Update of {auth_repo_name} failed due to error {e}" + f"Update of {auth_repo_name} failed due to error: {e}" ) update_data = {} @@ -489,10 +451,19 @@ def _update_named_repository( # use last validated commit - if the repository contains it # all repositories that can be updated will be updated - if not only_validate and len(commits) and update_status == Event.CHANGED: + if ( + not only_validate + and len(commits) + and (update_status == Event.CHANGED or update_status == Event.PARTIAL) + ): + # when performing breadth-first update, validation might fail at some point + # but we want to update all repository up to it + # so set last validated commit to this last valid commit last_commit = commits[-1] # if there were no errors, merge the last validated authentication repository commit - _merge_commit(auth_repo, auth_repo.default_branch, last_commit, checkout) + _merge_commit( + auth_repo, auth_repo.default_branch, last_commit, checkout, True + ) # update the last validated commit if not excluded_target_globs: auth_repo.set_last_validated_commit(last_commit) @@ -542,280 +513,31 @@ def _update_current_repository( checkout, excluded_target_globs, ): - settings.update_from_filesystem = update_from_filesystem - settings.conf_directory_root = conf_directory_root - - def _commits_ret(commits, existing_repo, update_successful): - if commits is None: - commit_before_pull = None - new_commits = [] - commit_after_pull = None - else: - commit_before_pull = commits[0] if existing_repo and len(commits) else None - commit_after_pull = commits[-1] if update_successful else commits[0] - - if not existing_repo: - new_commits = commits - else: - new_commits = commits[1:] if len(commits) else [] - return { - "before_pull": commit_before_pull, - "new": new_commits, - "after_pull": commit_after_pull, - } - - try: - commits = None - # check whether the directory that runs clone exists or contains additional files. - # we need to check the state of folder before running tuf. Resolves issue #22 - # if auth_repo_name isn't specified then the current directory doesn't contain additional files. - users_repo_existed = ( - Path(clients_auth_library_dir, auth_repo_name).exists() - if auth_repo_name is not None - else True - ) - # first clone the validation repository in temp. this is needed because tuf expects auth_repo_name to be valid (not None) - # and in the right format (seperated by '/'). this approach covers a case where we don't know authentication repo path upfront. - auth_repo_name = _clone_validation_repo(url, auth_repo_name) - git_updater = GitUpdater(url, clients_auth_library_dir, auth_repo_name) - _run_tuf_updater(git_updater) - except Exception as e: - # Instantiation of the handler failed - this can happen if the url is not correct - # of if the saved last validated commit does not match the current head commit - # do not return any commits data if that is the case - # TODO in case of last validated issue, think about returning commits up to the last validated one - # the problem is that that could indicate that the history was changed - users_auth_repo = None - - if auth_repo_name is not None: - users_auth_repo = AuthenticationRepository( - clients_auth_library_dir, - auth_repo_name, - urls=[url], - conf_directory_root=conf_directory_root, - ) - # make sure that all update affects are deleted if the repository did not exist - if not users_repo_existed: - shutil.rmtree(users_auth_repo.path, onerror=on_rm_error) - shutil.rmtree(users_auth_repo.conf_dir) - return ( - Event.FAILED, - users_auth_repo, - auth_repo_name, - _commits_ret(None, False, False), - e, - {}, - ) - try: - - users_auth_repo = git_updater.users_auth_repo - existing_repo = users_auth_repo.is_git_repository_root - - ( - commits, - error_msg, - last_validated_commit, - ) = _validate_authentication_repository( - git_updater, - users_auth_repo, - out_of_band_authentication, - auth_repo_name, - expected_repo_type, - ) - - if error_msg is not None: - raise error_msg - - if not only_validate: - # fetch the latest commit or clone the repository without checkout - # do not merge before targets are validated as well - if users_auth_repo.is_git_repository_root: - users_auth_repo.fetch(fetch_all=True) - else: - users_auth_repo.clone() - - # load target repositories and validate them - repositoriesdb.load_repositories( - users_auth_repo, - repo_classes=target_repo_classes, - factory=target_factory, - library_dir=targets_library_dir, - commits=commits, - only_load_targets=False, - excluded_target_globs=excluded_target_globs, - ) - repositories = repositoriesdb.get_deduplicated_repositories( - users_auth_repo, commits[-1::] - ) - repositories_branches_and_commits = ( - users_auth_repo.sorted_commits_and_branches_per_repositories( - commits, - default_branch=users_auth_repo.default_branch, - excluded_target_globs=excluded_target_globs, - ) - ) - - targets_data = _update_target_repositories( - repositories, - repositories_branches_and_commits, - last_validated_commit, - only_validate, - checkout, - ) - except Exception as e: - if not existing_repo: - shutil.rmtree(users_auth_repo.path, onerror=on_rm_error) - shutil.rmtree(users_auth_repo.conf_dir) - commits = None - return ( - Event.FAILED, - users_auth_repo, - auth_repo_name, - _commits_ret(commits, existing_repo, False), - e, - {}, - ) - - # commits list will always contain the previous top commit of the repository - event = Event.CHANGED if len(commits) > 1 else Event.UNCHANGED - return ( - event, - users_auth_repo, + updater_pipeline = AuthenticationRepositoryUpdatePipeline( + url, + clients_auth_library_dir, + targets_library_dir, auth_repo_name, - _commits_ret(commits, existing_repo, True), - None, - targets_data, + update_from_filesystem, + expected_repo_type, + target_repo_classes, + target_factory, + only_validate, + validate_from_commit, + conf_directory_root, + out_of_band_authentication, + checkout, + excluded_target_globs, ) - - -def _update_target_repositories( - repositories, - repositories_branches_and_commits, - last_validated_commit, - only_validate, - checkout, -): - taf_logger.info("Validating target repositories") - # keep track of the repositories which were cloned - # so that they can be removed if the update fails - cloned_repositories = [] - allow_unauthenticated = {} - new_commits = defaultdict(dict) - additional_commits_per_repo = {} - top_commits_of_branches_before_pull = {} - for path, repository in repositories.items(): - taf_logger.info("Validating repository {}", repository.name) - allow_unauthenticated_for_repo = repository.custom.get( - "allow-unauthenticated-commits", False - ) - allow_unauthenticated[path] = allow_unauthenticated_for_repo - is_git_repository = repository.is_git_repository_root - if not is_git_repository: - if only_validate: - taf_logger.info( - "Target repositories must already exist when only validating repositories" - ) - continue - repository.clone(no_checkout=True) - cloned_repositories.append(repository) - - # if no commits were published, repositories_branches_and_commits will be empty - # if unauthenticared commits are allowed, we also want to check if there are - # new commits which - # only check the default branch - if ( - not len(repositories_branches_and_commits[path]) - and allow_unauthenticated_for_repo - and not only_validate - ): - repositories_branches_and_commits[path][repository.default_branch] = [] - for branch in repositories_branches_and_commits[path]: - taf_logger.info("Validating branch {}", branch) - # if last_validated_commit is None or if the target repository didn't exist prior - # to calling update, start the update from the beginning - # otherwise, for each branch, start with the last validated commit of the local - # branch - branch_exists = repository.branch_exists(branch, include_remotes=False) - if not branch_exists and only_validate: - taf_logger.error( - "{} does not contain a local branch named {} and cannot be validated. Please update the repositories", - repository.name, - branch, - ) - return [], {} - repo_branch_commits = repositories_branches_and_commits[path][branch] - repo_branch_commits = [ - commit_info["commit"] for commit_info in repo_branch_commits - ] - - old_head = _set_target_old_head_and_validate( - repository, - branch, - branch_exists, - last_validated_commit, - is_git_repository, - repo_branch_commits, - allow_unauthenticated_for_repo, - ) - - # the repository was cloned if it didn't exist - # if it wasn't cloned, fetch the current branch - new_commits_on_repo_branch = _get_commits( - repository, - is_git_repository, - branch, - only_validate, - old_head, - branch_exists, - allow_unauthenticated_for_repo, - ) - top_commits_of_branches_before_pull.setdefault(path, {})[branch] = old_head - new_commits[path].setdefault(branch, []).extend(new_commits_on_repo_branch) - try: - additional_commits_on_branch = _update_target_repository( - repository, - new_commits_on_repo_branch, - repo_branch_commits, - allow_unauthenticated_for_repo, - branch, - ) - if len(additional_commits_on_branch): - additional_commits_per_repo.setdefault(repository.name, {})[ - branch - ] = additional_commits_on_branch - - except UpdateFailedError as e: - taf_logger.error("Updated failed due to error {}", str(e)) - # delete all repositories that were cloned - for repo in cloned_repositories: - taf_logger.debug("Removing cloned repository {}", repo.path) - shutil.rmtree(repo.path, onerror=on_rm_error) - # TODO is it important to undo a fetch if the repository was not cloned? - raise e - - taf_logger.info("Successfully validated all target repositories.") - # do not merge commits if there there are - if not only_validate: - # if update is successful, merge the commits - for path, repository in repositories.items(): - for branch in repositories_branches_and_commits[path]: - branch_commits = repositories_branches_and_commits[path][branch] - if not len(branch_commits): - continue - _merge_branch_commits( - repository, - branch, - branch_commits, - allow_unauthenticated[path], - additional_commits_per_repo.get(path, {}).get(branch), - new_commits[path][branch], - checkout, - ) - return _set_target_repositories_data( - repositories, - repositories_branches_and_commits, - top_commits_of_branches_before_pull, - additional_commits_per_repo, + updater_pipeline.run() + output = updater_pipeline.output + return ( + output.event, + output.users_auth_repo, + output.auth_repo_name, + output.commits_data, + output.error, + output.targets_data, ) @@ -829,363 +551,6 @@ def _update_transient_data( return update_transient_data -def _set_target_old_head_and_validate( - repository, - branch, - branch_exists, - last_validated_commit, - is_git_repository, - repo_branch_commits, - allow_unauthenticated_for_repo, -): - if ( - last_validated_commit is None - or not is_git_repository - or not branch_exists - or not len(repo_branch_commits) - ): - old_head = None - else: - old_head = repo_branch_commits[0] - if not allow_unauthenticated_for_repo: - repo_old_head = repository.top_commit_of_branch(branch) - # do the same as when checking the top and last_validated_commit of the authentication repository - if repo_old_head != old_head: - commits_since = repository.all_commits_since_commit(old_head) - if repo_old_head not in commits_since: - msg = f"Top commit of repository {repository.name} {repo_old_head} and is not equal to or newer than commit defined in auth repo {old_head}" - taf_logger.error(msg) - raise UpdateFailedError(msg) - return old_head - - -def _run_tuf_updater(git_updater): - def _init_updater(): - try: - return Updater( - git_updater.metadata_dir, - "metadata/", - git_updater.targets_dir, - "targets/", - fetcher=git_updater, - ) - except Exception as e: - taf_logger.error(f"Failed to instantiate TUF Updater due to error {e}") - raise e - - def _update_tuf_current_revision(): - current_commit = git_updater.current_commit - try: - updater.refresh() - taf_logger.debug("Validated metadata files at revision {}", current_commit) - # using refresh, we have updated all main roles - # we still need to update the delegated roles (if there are any) - # and validate any target files - current_targets = git_updater.get_current_targets() - for target_path in current_targets: - target_filepath = target_path.replace("\\", "/") - - targetinfo = updater.get_targetinfo(target_filepath) - target_data = git_updater.get_current_target_data( - target_filepath, raw=True - ) - targetinfo.verify_length_and_hashes(target_data) - - taf_logger.debug( - "Successfully validated target file {} at {}", - target_filepath, - current_commit, - ) - except Exception as e: - metadata_expired = EXPIRED_METADATA_ERROR in type( - e - ).__name__ or EXPIRED_METADATA_ERROR in str(e) - if not metadata_expired or settings.strict: - taf_logger.error( - "Validation of authentication repository {} failed at revision {} due to error {}", - git_updater.users_auth_repo.name, - current_commit, - e, - ) - raise UpdateFailedError( - f"Validation of authentication repository {git_updater.users_auth_repo.name}" - f" failed at revision {current_commit} due to error: {e}" - ) - taf_logger.warning( - f"WARNING: Could not validate authentication repository {git_updater.users_auth_repo.name} at revision {current_commit} due to error {e}" - ) - - while not git_updater.update_done(): - updater = _init_updater() - _update_tuf_current_revision() - - taf_logger.info( - "Successfully validated authentication repository {}", - git_updater.users_auth_repo.name, - ) - - -def _get_commits( - repository, - existing_repository, - branch, - only_validate, - old_head, - branch_exists, - allow_unauthenticated_commits, -): - """Returns a list of newly fetched commits belonging to the specified branch.""" - if existing_repository: - repository.fetch(branch=branch) - - if old_head is not None: - if not only_validate: - fetched_commits = repository.all_commits_on_branch( - branch=f"origin/{branch}" - ) - - # if the local branch does not exist (the branch was not checked out locally) - # fetched commits will include already validated commits - # check which commits are newer that the previous head commit - if old_head in fetched_commits: - new_commits_on_repo_branch = fetched_commits[ - fetched_commits.index(old_head) + 1 : : - ] - else: - new_commits_on_repo_branch = repository.all_commits_since_commit( - old_head, branch - ) - for commit in fetched_commits: - if commit not in new_commits_on_repo_branch: - new_commits_on_repo_branch.append(commit) - else: - new_commits_on_repo_branch = repository.all_commits_since_commit( - old_head, branch - ) - new_commits_on_repo_branch.insert(0, old_head) - else: - if branch_exists: - # this happens in the case when last_validated_commit does not exist - # we want to validate all commits, so combine existing commits and - # fetched commits - new_commits_on_repo_branch = repository.all_commits_on_branch( - branch=branch, reverse=True - ) - else: - new_commits_on_repo_branch = [] - if not only_validate: - try: - fetched_commits = repository.all_commits_on_branch( - branch=f"origin/{branch}" - ) - # if the local branch does not exist (the branch was not checked out locally) - # fetched commits will include already validated commits - # check which commits are newer that the previous head commit - for commit in fetched_commits: - if commit not in new_commits_on_repo_branch: - new_commits_on_repo_branch.append(commit) - except GitError: - pass - return new_commits_on_repo_branch - - -def _merge_branch_commits( - repository, - branch, - branch_commits, - allow_unauthenticated, - additional_commits, - new_branch_commits, - checkout=True, -): - """Determines which commits needs to be merged into the specified branch and - merge it. - """ - if additional_commits is not None: - allow_unauthenticated = False - last_commit = branch_commits[-1]["commit"] - - last_validated_commit = last_commit - commit_to_merge = ( - last_validated_commit if not allow_unauthenticated else new_branch_commits[-1] - ) - taf_logger.info("Merging {} into {}", commit_to_merge, repository.name) - _merge_commit(repository, branch, commit_to_merge, checkout) - - -def _merge_commit(repository, branch, commit_to_merge, checkout=True): - """Merge the specified commit into the given branch and check out the branch. - If the repository cannot contain unauthenticated commits, check out the merged commit. - """ - taf_logger.info("Merging commit {} into {}", commit_to_merge, repository.name) - try: - repository.checkout_branch(branch, raise_anyway=True) - except GitError as e: - # two scenarios: - # current git repository is in an inconsistent state: - # - .git/index.lock exists (git partial update got applied) - # should get addressed in https://github.com/openlawlibrary/taf/issues/210 - # current git repository has uncommitted changes: - # we do not want taf to lose any repo data, so we do not reset the repository. - # for now, raise an update error and let the user manually reset the repository - taf_logger.error( - "Could not checkout branch {} during commit merge. Error {}", branch, e - ) - raise UpdateFailedError( - f"Repository {repository.name} should contain only committed changes. \n" - + f"Please update the repository at {repository.path} manually and try again." - ) - - repository.merge_commit(commit_to_merge) - if checkout: - taf_logger.info("{}: checking out branch {}", repository.name, branch) - repository.checkout_branch(branch) - - -def _set_target_repositories_data( - repositories, - repositories_branches_and_commits, - top_commits_of_branches_before_pull, - additional_commits_per_repo, -): - targets_data = {} - for repo_name, repo in repositories.items(): - targets_data[repo_name] = {"repo_data": repo.to_json_dict()} - commits_data = {} - for branch, commits_with_custom in repositories_branches_and_commits[ - repo_name - ].items(): - branch_commits_data = {} - previous_top_of_branch = top_commits_of_branches_before_pull[repo_name][ - branch - ] - - branch_commits_data["before_pull"] = None - - if previous_top_of_branch is not None: - # this needs to be the same - implementation error otherwise - branch_commits_data["before_pull"] = ( - commits_with_custom[0] if len(commits_with_custom) else None - ) - - branch_commits_data["after_pull"] = ( - commits_with_custom[-1] if len(commits_with_custom) else None - ) - - if branch_commits_data["before_pull"] is not None: - commits_with_custom.pop(0) - branch_commits_data["new"] = commits_with_custom - additional_commits = ( - additional_commits_per_repo[repo_name].get(branch, []) - if repo_name in additional_commits_per_repo - else [] - ) - branch_commits_data["unauthenticated"] = additional_commits - commits_data[branch] = branch_commits_data - targets_data[repo_name]["commits"] = commits_data - return targets_data - - -def _update_target_repository( - repository, - new_commits, - target_commits, - allow_unauthenticated, - branch, -): - taf_logger.info( - "Validating target repository {} {} branch", repository.name, branch - ) - # if authenticated commits are allowed, return a list of all fetched commits which - # are newer tham the last authenticated commits - additional_commits = [] - # A new commit might have been pushed after the update process - # started and before fetch was called - # So, the number of new commits, pushed to the target repository, could - # be greater than the number of these commits according to the authentication - # repository. The opposite cannot be the case. - # In general, if there are additional commits in the target repositories, - # the updater will finish the update successfully, but will only update the - # target repositories until the latest validated commit - if not allow_unauthenticated: - update_successful = len(new_commits) >= len(target_commits) - if update_successful: - for target_commit, repo_commit in zip(target_commits, new_commits): - if target_commit != repo_commit: - taf_logger.error( - "Mismatch between commits {} and {}", target_commit, repo_commit - ) - update_successful = False - break - if len(new_commits) > len(target_commits): - additional_commits = new_commits[len(target_commits) :] - taf_logger.error( - "Found commits {} in repository {} that are not accounted for in the authentication repo. Unauthenticated commits are not allowed in this repo.", - additional_commits, - repository.name, - ) - update_successful = False - else: - taf_logger.info( - "Unauthenticated commits allowed in repository {}", repository.name - ) - update_successful = False - if not len(target_commits): - update_successful = True - additional_commits = new_commits - else: - target_commits_index = 0 - for new_commit_index, commit in enumerate(new_commits): - if commit in target_commits: - if commit != target_commits[target_commits_index]: - taf_logger.error( - "Mismatch between commits {} and {}", - commit, - target_commits[target_commits_index], - ) - break - else: - target_commits_index += 1 - if commit == target_commits[-1]: - update_successful = True - if commit != new_commits[-1]: - additional_commits = new_commits[new_commit_index + 1 :] - break - if len(additional_commits): - taf_logger.warning( - "Found commits {} in repository {} which are newer than the last authenticable commit." - "Repository will be updated up to commit {}", - additional_commits, - repository.name, - commit, - ) - - if not update_successful: - taf_logger.error( - "Mismatch between target commits specified in authentication repository and the " - "target repository {}", - repository.name, - ) - raise UpdateFailedError( - "Mismatch between target commits specified in authentication repository" - f" and target repository {repository.name} on branch {branch}" - ) - taf_logger.info("Successfully validated {}", repository.name) - - if len(additional_commits): - # these commits include all commits newer than last authenticated commit (if unauthenticated commits are allowed) - # that does not necessarily mean that the local repository is not up to date with the remote - # pull could've been run manually - # check where the current local head is - branch_current_head = repository.top_commit_of_branch(branch) - if branch_current_head in additional_commits: - additional_commits = additional_commits[ - additional_commits.index(branch_current_head) + 1 : - ] - - return additional_commits - - @timed_run("Validating repository") def validate_repository( clients_auth_path, @@ -1226,61 +591,7 @@ def validate_repository( ) except Exception as e: raise ValidationFailedError( - f"Validation or repository {auth_repo_name} failed due to error {e}" + f"Validation or repository {auth_repo_name} failed due to error: {e}" ) settings.overwrite_last_validated_commit = False settings.last_validated_commit = None - - -def _validate_authentication_repository( - repository_updater, - users_auth_repo, - out_of_band_authentication, - auth_repo_name, - expected_repo_type, -): - error_msg = None - # this is the repository cloned inside the temp directory - # we validate it before updating the actual authentication repository - validation_auth_repo = repository_updater.validation_auth_repo - commits = repository_updater.commits - - if ( - out_of_band_authentication is not None - and users_auth_repo.last_validated_commit is None - and commits[0] != out_of_band_authentication - ): - error_msg = UpdateFailedError( - f"First commit of repository {auth_repo_name} does not match " - "out of band authentication commit" - ) - # used for testing purposes - if settings.overwrite_last_validated_commit: - last_validated_commit = settings.last_validated_commit - else: - last_validated_commit = users_auth_repo.last_validated_commit - - if expected_repo_type != UpdateType.EITHER: - # check if the repository being updated is a test repository - if validation_auth_repo.is_test_repo and expected_repo_type != UpdateType.TEST: - error_msg = UpdateFailedError( - f"Repository {users_auth_repo.name} is a test repository. " - 'Call update with "--expected-repo-type" test to update a test ' - "repository" - ) - elif ( - not validation_auth_repo.is_test_repo - and expected_repo_type == UpdateType.TEST - ): - error_msg = UpdateFailedError( - f"Repository {users_auth_repo.name} is not a test repository," - ' but update was called with the "--expected-repo-type" test' - ) - # always cleanup repository updater - repository_updater.cleanup() - - return ( - commits, - error_msg, - last_validated_commit, - ) diff --git a/taf/updater/updater_pipeline.py b/taf/updater/updater_pipeline.py new file mode 100644 index 00000000..e865991a --- /dev/null +++ b/taf/updater/updater_pipeline.py @@ -0,0 +1,1099 @@ +from collections import defaultdict +from enum import Enum +import functools +from logging import DEBUG, INFO +from pathlib import Path +import shutil +import tempfile +from typing import Any, Dict, List, Optional +from attr import attrs, define, field +from git import GitError +from logdecorator import log_on_end, log_on_start +from taf.git import GitRepository + +import taf.settings as settings +import taf.repositoriesdb as repositoriesdb +from taf.auth_repo import AuthenticationRepository +from taf.exceptions import UpdateFailedError +from taf.updater.handlers import GitUpdater +from taf.updater.lifecycle_handlers import Event +from taf.updater.types.update import UpdateType +from taf.utils import on_rm_error +from taf.log import taf_logger +from tuf.ngclient.updater import Updater +from tuf.repository_tool import TARGETS_DIRECTORY_NAME + + +EXPIRED_METADATA_ERROR = "ExpiredMetadataError" +PROTECTED_DIRECTORY_NAME = "protected" +INFO_JSON_PATH = f"{TARGETS_DIRECTORY_NAME}/{PROTECTED_DIRECTORY_NAME}/info.json" + + +class UpdateStatus(Enum): + SUCCESS = 1 + PARTIAL = 2 + FAILED = 3 + + +class RunMode(Enum): + UPDATE = 1 + LOCAL_VALIDATION = 2 + ALL = 3 + + +@define +class UpdateState: + auth_commits_since_last_validated: List[Any] = field(factory=list) + existing_repo: bool = field(default=False) + update_status: UpdateStatus = field(default=None) + update_successful: bool = field(default=False) + event: Optional[str] = field(default=None) + users_auth_repo: Optional["AuthenticationRepository"] = field(default=None) + validation_auth_repo: Optional["AuthenticationRepository"] = field(default=None) + auth_repo_name: Optional[str] = field(default=None) + error: Optional[Exception] = field(default=None) + targets_data: Dict[str, Any] = field(factory=dict) + last_validated_commit: str = field(factory=str) + target_repositories: Dict[str, "GitRepository"] = field(factory=dict) + cloned_target_repositories: List["GitRepository"] = field(factory=list) + target_branches_data_from_auth_repo: Dict = field(factory=dict) + targets_data_by_auth_commits: Dict = field(factory=dict) + old_heads_per_target_repos_branches: Dict[str, Dict[str, str]] = field(factory=dict) + fetched_commits_per_target_repos_branches: Dict[str, Dict[str, List[str]]] = field( + factory=dict + ) + validated_commits_per_target_repos_branches: Dict[str, Dict[str, str]] = field( + factory=dict + ) + additional_commits_per_target_repos_branches: Dict[ + str, Dict[str, List[str]] + ] = field(factory=dict) + validated_auth_commits: List[str] = field(factory=list) + + +@attrs +class UpdateOutput: + event: str = field() + users_auth_repo: Any = field() + auth_repo_name: str = field() + commits_data: Dict[str, Any] = field() + error: Optional[Exception] = field(default=None) + targets_data: Dict[str, Any] = field(factory=dict) + + +def cleanup_decorator(pipeline_function): + @functools.wraps(pipeline_function) + def wrapper(self, *args, **kwargs): + try: + result = pipeline_function(self, *args, **kwargs) + return result + finally: + if self.state.event == Event.FAILED and not self.state.existing_repo: + shutil.rmtree(self.state.users_auth_repo.path, onerror=on_rm_error) + shutil.rmtree(self.state.users_auth_repo.conf_dir) + + return wrapper + + +class Pipeline: + def __init__(self, steps, run_mode): + self.steps = steps + self.current_step = None + self.run_mode = run_mode + + def run(self): + for step, step_run_mode in self.steps: + try: + if step_run_mode == RunMode.ALL or step_run_mode == self.run_mode: + self.current_step = step + update_status = step() + if update_status == UpdateStatus.FAILED: + raise UpdateFailedError(self.state.error) + self.state.update_status = update_status + + except Exception as e: + self.handle_error(e) + break + + self.set_output() + + def handle_error(self, e): + taf_logger.error( + "An error occurred while updating repository {} while running step {}: {}", + self.state.auth_repo_name, + self.current_step.__name__, + str(e), + ) + raise e + + def set_output(self): + pass + + +class AuthenticationRepositoryUpdatePipeline(Pipeline): + def __init__( + self, + url, + clients_auth_library_dir, + targets_library_dir, + auth_repo_name, + update_from_filesystem, + expected_repo_type, + target_repo_classes, + target_factory, + only_validate, + validate_from_commit, + conf_directory_root, + out_of_band_authentication, + checkout, + excluded_target_globs, + ): + + super().__init__( + steps=[ + (self.clone_remote_and_run_tuf_updater, RunMode.ALL), + (self.clone_or_fetch_users_auth_repo, RunMode.UPDATE), + (self.load_target_repositories, RunMode.ALL), + (self.check_if_repositories_on_disk, RunMode.LOCAL_VALIDATION), + (self.clone_target_repositories_if_not_on_disk, RunMode.UPDATE), + (self.determine_start_commits, RunMode.ALL), + (self.get_targets_data_from_auth_repo, RunMode.ALL), + (self.get_target_repositories_commits, RunMode.ALL), + (self.validate_target_repositories, RunMode.ALL), + ( + self.validate_and_set_additional_commits_of_target_repositories, + RunMode.ALL, + ), + (self.merge_commits, RunMode.UPDATE), + (self.set_target_repositories_data, RunMode.UPDATE), + ], + run_mode=RunMode.LOCAL_VALIDATION if only_validate else RunMode.UPDATE, + ) + + self.url = url + self.clients_auth_library_dir = clients_auth_library_dir + self.targets_library_dir = targets_library_dir + self.update_from_filesystem = update_from_filesystem + self.expected_repo_type = expected_repo_type + self.target_repo_classes = target_repo_classes + self.target_factory = target_factory + self.only_validate = only_validate + self.validate_from_commit = validate_from_commit + self.conf_directory_root = conf_directory_root + self.out_of_band_authentication = out_of_band_authentication + self.checkout = checkout + self.excluded_target_globs = excluded_target_globs + + self.state = UpdateState() + self.state.auth_repo_name = auth_repo_name + self.state.targets_data = {} + self._output = None + + @property + def output(self): + if not self._output: + raise ValueError( + "Pipeline has not been run yet. Please run the pipeline first." + ) + return self._output + + @log_on_start( + INFO, "Cloning repository and running TUF updater...", logger=taf_logger + ) + @cleanup_decorator + def clone_remote_and_run_tuf_updater(self): + settings.update_from_filesystem = self.update_from_filesystem + settings.conf_directory_root = self.conf_directory_root + git_updater = None + try: + self.state.auth_commits_since_last_validated = None + self.state.existing_repo = ( + Path(self.clients_auth_library_dir, self.state.auth_repo_name).exists() + if self.state.auth_repo_name is not None + else UpdateStatus.SUCCESS + ) + + # Clone the validation repository in temp. + self.state.auth_repo_name = _clone_validation_repo( + self.url, self.state.auth_repo_name + ) + git_updater = GitUpdater( + self.url, self.clients_auth_library_dir, self.state.auth_repo_name + ) + self.state.users_auth_repo = git_updater.users_auth_repo + _run_tuf_updater(git_updater) + self.state.existing_repo = self.state.users_auth_repo.is_git_repository_root + self.state.validation_auth_repo = git_updater.validation_auth_repo + self.state.auth_commits_since_last_validated = list(git_updater.commits) + self._validate_out_of_band_and_update_type() + + self.state.event = ( + Event.CHANGED + if len(self.state.auth_commits_since_last_validated) > 1 + else Event.UNCHANGED + ) + + # used for testing purposes + if settings.overwrite_last_validated_commit: + self.state.last_validated_commit = settings.last_validated_commit + else: + self.state.last_validated_commit = ( + self.state.users_auth_repo.last_validated_commit + ) + + return UpdateStatus.SUCCESS + + except Exception as e: + self.state.error = e + self.state.users_auth_repo = None + + if self.state.auth_repo_name is not None: + self.state.users_auth_repo = AuthenticationRepository( + self.clients_auth_library_dir, + self.state.auth_repo_name, + urls=[self.url], + conf_directory_root=self.conf_directory_root, + ) + self.state.event = Event.FAILED + return UpdateStatus.FAILED + finally: + # always clean up repository updater + if git_updater is not None: + git_updater.cleanup() + + @log_on_start( + INFO, "Validating out of band commit and update type", logger=taf_logger + ) + def _validate_out_of_band_and_update_type(self): + # this is the repository cloned inside the temp directory + # we validate it before updating the actual authentication repository + if ( + self.out_of_band_authentication is not None + and self.state.users_auth_repo.last_validated_commit is None + and self.state.auth_commits_since_last_validated[0] + != self.out_of_band_authentication + ): + raise UpdateFailedError( + f"First commit of repository {self.state.auth_repo_name} does not match " + "out of band authentication commit" + ) + + if self.expected_repo_type != UpdateType.EITHER: + # check if the repository being updated is a test repository + if ( + self.state.validation_auth_repo.is_test_repo + and self.expected_repo_type != UpdateType.TEST + ): + raise UpdateFailedError( + f"Repository {self.state.users_auth_repo.name} is a test repository. " + 'Call update with "--expected-repo-type" test to update a test ' + "repository" + ) + elif ( + not self.state.validation_auth_repo.is_test_repo + and self.expected_repo_type == UpdateType.TEST + ): + raise UpdateFailedError( + f"Repository {self.state.users_auth_repo.name} is not a test repository," + ' but update was called with the "--expected-repo-type" test' + ) + + @log_on_start( + INFO, + "Cloning or updating user's authentication repository...", + logger=taf_logger, + ) + def clone_or_fetch_users_auth_repo(self): + if not self.only_validate: + # fetch the latest commit or clone the repository without checkout + # do not merge before targets are validated as well + try: + if self.state.existing_repo: + self.state.users_auth_repo.fetch(fetch_all=True) + else: + self.state.users_auth_repo.clone() + except Exception as e: + self.state.error = e + self.state.event = Event.FAILED + taf_logger.error(e) + return UpdateStatus.FAILED + return UpdateStatus.SUCCESS + + @log_on_start(DEBUG, "Loading target repositories", logger=taf_logger) + def load_target_repositories(self): + try: + repositoriesdb.load_repositories( + self.state.users_auth_repo, + repo_classes=self.target_repo_classes, + factory=self.target_factory, + library_dir=self.targets_library_dir, + commits=self.state.auth_commits_since_last_validated, + only_load_targets=False, + excluded_target_globs=self.excluded_target_globs, + ) + self.state.target_repositories = ( + repositoriesdb.get_deduplicated_repositories( + self.state.users_auth_repo, + self.state.auth_commits_since_last_validated[-1::], + ) + ) + return UpdateStatus.SUCCESS + except Exception as e: + self.state.error = e + self.state.event = Event.FAILED + taf_logger.error(e) + return UpdateStatus.FAILED + + @log_on_start( + INFO, + "Checking if all target repositories are already on disk...", + logger=taf_logger, + ) + def check_if_repositories_on_disk(self): + for repository in self.state.target_repositories.values(): + if not repository.is_git_repository_root: + is_git_repository = repository.is_git_repository_root + if not is_git_repository: + if self.only_validate: + self.state.targets_data = {} + msg = f"{repository.name} not on disk. Please run update to clone the repositories." + taf_logger.error(msg) + raise UpdateFailedError(msg) + return UpdateStatus.SUCCESS + + @log_on_start( + INFO, "Cloning target repositories which are not on disk...", logger=taf_logger + ) + @log_on_end(INFO, "Finished cloning target repositories", logger=taf_logger) + def clone_target_repositories_if_not_on_disk(self): + try: + self.state.cloned_target_repositories = [] + for repository in self.state.target_repositories.values(): + is_git_repository = repository.is_git_repository_root + if not is_git_repository: + repository.clone(no_checkout=True) + self.state.cloned_target_repositories.append(repository) + return UpdateStatus.SUCCESS + except Exception as e: + self.state.error = e + self.state.event = Event.FAILED + taf_logger.error(e) + return UpdateStatus.FAILED + + @log_on_start( + INFO, "Validating initial state of target repositories...", logger=taf_logger + ) + @log_on_end( + INFO, + "Checking initial state of repositories", + logger=taf_logger, + ) + def determine_start_commits(self): + try: + self.state.targets_data_by_auth_commits = ( + self.state.users_auth_repo.targets_data_by_auth_commits( + self.state.auth_commits_since_last_validated + ) + ) + self.state.old_heads_per_target_repos_branches = defaultdict(dict) + is_initial_state_in_sync = True + # if last validated commit was not manually modified (set to a newer commit) + # target repositories data that is extracted to them (commit and branch) + # should be present in the local repository + # if the local repository was manually modified (say, something was committed) + # we still expect the last validated target commit to exist + # and the remaining commits will be validated afterwards + # if the last validated target commit does not exist, start the validation from scratch + if self.state.last_validated_commit is not None: + for repository in self.state.target_repositories.values(): + self.state.old_heads_per_target_repos_branches[repository.name] = {} + last_validated_repository_commits_data = ( + self.state.targets_data_by_auth_commits[repository.name].get( + self.state.last_validated_commit, {} + ) + ) + + if last_validated_repository_commits_data: + # if this is not set, it means that the repository did not exist in this revision + if repository in self.state.cloned_target_repositories: + is_initial_state_in_sync = False + break + current_branch = last_validated_repository_commits_data.get( + "branch", repository.default_branch + ) + last_validated_commit = last_validated_repository_commits_data[ + "commit" + ] + + branch_exists = repository.branch_exists( + current_branch, include_remotes=False + ) + if not branch_exists: + is_initial_state_in_sync = False + break + top_commit_of_branch = repository.top_commit_of_branch( + current_branch + ) + if top_commit_of_branch != last_validated_commit: + # check if top commit is newer (which is fine, it will be validated) + # or older, meaning that the authentication repository contains + # additional commits, so it would be necessary to find older auth repo + # commit and start the validation from there + if ( + current_branch + not in repository.branches_containing_commit( + last_validated_commit + ) + ): + is_initial_state_in_sync = False + break + + self.state.old_heads_per_target_repos_branches[repository.name][ + current_branch + ] = last_validated_commit + + if not is_initial_state_in_sync: + taf_logger.info( + f"Repository {self.state.users_auth_repo.name}: states of target repositories are not in sync with last validated commit. Starting the update from the beginning" + ) + self.state.last_validated_commit = None + self.state.auth_commits_since_last_validated = ( + self.state.users_auth_repo.all_commits_on_branch( + self.state.users_auth_repo.default_branch + ) + ) + self.state.targets_data_by_auth_commits = ( + self.state.users_auth_repo.targets_data_by_auth_commits( + self.state.auth_commits_since_last_validated + ) + ) + + return UpdateStatus.SUCCESS + except Exception as e: + self.state.error = e + self.state.event = Event.FAILED + taf_logger.error(e) + return UpdateStatus.FAILED + + def get_targets_data_from_auth_repo(self): + repo_branches = {} + for repo_name, commits_data in self.state.targets_data_by_auth_commits.items(): + branches = set() # using a set to avoid duplicate branches + for commit_data in commits_data.values(): + branches.add(commit_data["branch"]) + repo_branches[repo_name] = sorted(list(branches)) + self.state.target_branches_data_from_auth_repo = repo_branches + return UpdateStatus.SUCCESS + + @log_on_start(DEBUG, "Fetching commits of target repositories", logger=taf_logger) + def get_target_repositories_commits(self): + """Returns a list of newly fetched commits belonging to the specified branch.""" + self.state.fetched_commits_per_target_repos_branches = defaultdict(dict) + for repository in self.state.target_repositories.values(): + if repository.name not in self.state.target_branches_data_from_auth_repo: + # exists in repositories.json, not target files + continue + for branch in self.state.target_branches_data_from_auth_repo[ + repository.name + ]: + if repository not in self.state.cloned_target_repositories: + if self.only_validate: + branch_exists = repository.branch_exists( + branch, include_remotes=False + ) + if not branch_exists: + self.state.targets_data = {} + msg = f"{repository.name} does not contain a local branch named {branch} and cannot be validated. Please update the repositories." + taf_logger.error(msg) + raise UpdateFailedError(msg) + else: + repository.fetch(branch=branch) + + old_head = self.state.old_heads_per_target_repos_branches[ + repository.name + ].get(branch) + if old_head is not None: + if not self.only_validate: + fetched_commits = repository.all_commits_on_branch( + branch=f"origin/{branch}" + ) + + # if the local branch does not exist (the branch was not checked out locally) + # fetched commits will include already validated commits + # check which commits are newer that the previous head commit + if old_head in fetched_commits: + fetched_commits_on_target_repo_branch = fetched_commits[ + fetched_commits.index(old_head) + 1 : : + ] + else: + fetched_commits_on_target_repo_branch = ( + repository.all_commits_since_commit(old_head, branch) + ) + for commit in fetched_commits: + if commit not in fetched_commits_on_target_repo_branch: + fetched_commits_on_target_repo_branch.append(commit) + else: + fetched_commits_on_target_repo_branch = ( + repository.all_commits_since_commit(old_head, branch) + ) + fetched_commits_on_target_repo_branch.insert(0, old_head) + else: + branch_exists = repository.branch_exists( + branch, include_remotes=False + ) + if branch_exists: + # this happens in the case when last_validated_commit does not exist + # we want to validate all commits, so combine existing commits and + # fetched commits + fetched_commits_on_target_repo_branch = ( + repository.all_commits_on_branch( + branch=branch, reverse=True + ) + ) + else: + fetched_commits_on_target_repo_branch = [] + if not self.only_validate: + try: + fetched_commits = repository.all_commits_on_branch( + branch=f"origin/{branch}" + ) + + # if the local branch does not exist (the branch was not checked out locally) + # fetched commits will include already validated commits + # check which commits are newer that the previous head commit + for commit in fetched_commits: + if commit not in fetched_commits_on_target_repo_branch: + fetched_commits_on_target_repo_branch.append(commit) + except GitError: + pass + self.state.fetched_commits_per_target_repos_branches[repository.name][ + branch + ] = fetched_commits_on_target_repo_branch + return UpdateStatus.SUCCESS + + @log_on_start(INFO, "Validating target repositories...", logger=taf_logger) + @log_on_end(INFO, "Validation of target repositories finished", logger=taf_logger) + def validate_target_repositories(self): + """ + Breadth-first update of target repositories + Merge last valid commits at the end of the update + """ + try: + # need to be set to old head since that is the last validated target + self.state.validated_commits_per_target_repos_branches = defaultdict(dict) + + last_validated_data_per_repositories = defaultdict(dict) + self.state.validated_auth_commits = [] + for auth_commit in self.state.auth_commits_since_last_validated: + for repository in self.state.target_repositories.values(): + if repository.name not in self.state.targets_data_by_auth_commits: + continue + if ( + auth_commit + not in self.state.targets_data_by_auth_commits[repository.name] + ): + continue + current_targets_data = self.state.targets_data_by_auth_commits[ + repository.name + ][auth_commit] + + current_branch = current_targets_data.get( + "branch", repository.default_branch + ) + current_commit = current_targets_data["commit"] + + if not len(last_validated_data_per_repositories[repository.name]): + current_head_commit_and_branch = ( + self.state.targets_data_by_auth_commits[ + repository.name + ].get(self.state.last_validated_commit, {}) + ) + previous_branch = current_head_commit_and_branch.get("branch") + previous_commit = current_head_commit_and_branch.get("commit") + if previous_commit is not None and previous_branch is None: + previous_branch = repository.default_branch + else: + previous_branch = last_validated_data_per_repositories[ + repository.name + ].get("branch") + previous_commit = last_validated_data_per_repositories[ + repository.name + ]["commit"] + + target_commits_from_target_repo = ( + self.state.fetched_commits_per_target_repos_branches[ + repository.name + ] + ) + validated_commit = self._validate_current_repo_commit( + repository, + self.state.users_auth_repo, + previous_branch, + previous_commit, + current_branch, + current_commit, + target_commits_from_target_repo, + auth_commit, + ) + + last_validated_data_per_repositories[repository.name] = { + "commit": validated_commit, + "branch": current_branch, + } + + self.state.validated_commits_per_target_repos_branches[ + repository.name + ].setdefault(current_branch, []).append(validated_commit) + + # commit processed without an error + self.state.validated_auth_commits.append(auth_commit) + return UpdateStatus.SUCCESS + except Exception as e: + self.state.error = e + taf_logger.error(e) + if len(self.state.validated_auth_commits): + self.state.event = Event.PARTIAL + return UpdateStatus.PARTIAL + self.state.event = Event.FAILED + return UpdateStatus.FAILED + + def _validate_current_repo_commit( + self, + repository, + users_auth_repo, + previous_branch, + previous_commit, + current_branch, + current_commit, + target_commits_from_target_repo, + current_auth_commit, + ): + target_commits_from_target_repos_on_branch = target_commits_from_target_repo[ + current_branch + ] + if previous_commit == current_commit: + # target not updated in this revision + return current_commit + if previous_branch == current_branch: + # same branch + current_target_commit = _find_next_value( + previous_commit, target_commits_from_target_repos_on_branch + ) + else: + # next branch + current_target_commit = target_commits_from_target_repos_on_branch[0] + + if current_target_commit is None: + # there are commits missing from the target repository + commit_date = users_auth_repo.get_commit_date(current_auth_commit) + raise UpdateFailedError( + f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}: \ +data repository {repository.name} was supposed to be at commit {current_commit} \ +but commit not on branch {current_branch}" + ) + + if current_commit == current_target_commit: + return current_target_commit + if not _is_unauthenticated_allowed(repository): + commit_date = users_auth_repo.get_commit_date(current_auth_commit) + raise UpdateFailedError( + f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}: \ +data repository {repository.name} was supposed to be at commit {current_commit} \ +but repo was at {current_target_commit}" + ) + # unauthenticated commits are allowed, try to skip them + # if commits of the target repositories were swapped, commit which is expected to be found + # after the current one will be skipped and it won't be found later, so validation will fail + remaining_commits = target_commits_from_target_repos_on_branch[ + target_commits_from_target_repos_on_branch.index(current_target_commit) : + ] + for target_commit in remaining_commits: + if current_commit == target_commit: + return target_commit + taf_logger.debug( + f"{repository.name}: skipping target commit {target_commit}. Looking for commit {current_commit}" + ) + raise UpdateFailedError( + f"Failure to validate {users_auth_repo.name} commit {current_auth_commit} committed on {commit_date}: \ +data repository {repository.name} was supposed to be at commit {current_commit} \ +but commit not on branch {current_branch}" + ) + + @log_on_start( + DEBUG, + "Validating and setting additional commits of target repositories", + logger=taf_logger, + ) + def validate_and_set_additional_commits_of_target_repositories(self): + """ + For target repository and for each branch, extract commits following the last validated commit + These commits are not invalid. In case of repositories which cannot contain unauthenticated commits + all of these commits will have to get signed if at least one commits on that branch needs to get signed + However, no error will get reported if there are commits which have not yet been signed + In case of repositories which can contain unauthenticated commits, they do not even need to get signed + """ + # only get additional commits if the validation was complete (not partial, up to a commit) + self.state.additional_commits_per_target_repos_branches = defaultdict(dict) + if self.state.update_status != UpdateStatus.SUCCESS: + return self.state.update_status + try: + for repository in self.state.target_repositories.values(): + # this will only include branches that were, at least partially, validated (up until a certain point) + for ( + branch, + validated_commits, + ) in self.state.validated_commits_per_target_repos_branches[ + repository.name + ].items(): + last_validated_commit = validated_commits[-1] + branch_commits = ( + self.state.fetched_commits_per_target_repos_branches[ + repository.name + ][branch] + ) + additional_commits = branch_commits[ + branch_commits.index(last_validated_commit) + 1 : + ] + if len(additional_commits): + if not _is_unauthenticated_allowed(repository): + raise UpdateFailedError( + f"Target repository {repository.name} does not allow unauthenticated commits, but contains commit(s) {', '.join(additional_commits)} on branch {branch}" + ) + + taf_logger.info( + f"Repository {repository.name}: found commits succeeding the last authenticated commit on branch {branch}: {','.join(additional_commits)}" + ) + + # these commits include all commits newer than last authenticated commit (if unauthenticated commits are allowed) + # that does not necessarily mean that the local repository is not up to date with the remote + # pull could've been run manually + # check where the current local head is + branch_current_head = repository.top_commit_of_branch(branch) + if branch_current_head in additional_commits: + additional_commits = additional_commits[ + additional_commits.index(branch_current_head) + 1 : + ] + self.state.additional_commits_per_target_repos_branches[ + repository.name + ][branch] = additional_commits + return self.state.update_status + except UpdateFailedError as e: + self.state.error = e + taf_logger.error(e) + self.state.event = Event.PARTIAL + return UpdateStatus.PARTIAL + except Exception as e: + self.state.error = e + taf_logger.error(e) + self.state.event = Event.FAILED + return UpdateStatus.FAILED + + @log_on_start( + INFO, "Merging commits into target repositories...", logger=taf_logger + ) + def merge_commits(self): + """Determines which commits needs to be merged into the specified branch and + merge it. + """ + try: + if self.only_validate: + return self.state.update_status + + if self.state.update_status == UpdateStatus.FAILED: + # couldn't validate the first new commit + # there is nothing to merge + # remove cloned repositories if the initial commit was incorrect + for repository in self.state.cloned_target_repositories: + taf_logger.debug("Removing cloned repository {}", repository.path) + shutil.rmtree(repository.path, onerror=on_rm_error) + else: + for repository in self.state.target_repositories.values(): + # this will only include branches that were, at least partially, validated (up until a certain point) + for ( + branch, + validated_commits, + ) in self.state.validated_commits_per_target_repos_branches[ + repository.name + ].items(): + last_validated_commit = validated_commits[-1] + commit_to_merge = ( + last_validated_commit + if not _is_unauthenticated_allowed(repository) + else self.state.fetched_commits_per_target_repos_branches[ + repository.name + ][branch][-1] + ) + taf_logger.info( + "Repository {}: merging {} into branch {}", + repository.name, + commit_to_merge, + branch, + ) + _merge_commit(repository, branch, commit_to_merge) + return self.state.update_status + except Exception as e: + self.state.error = e + taf_logger.error(e) + self.state.event = Event.FAILED + return UpdateStatus.FAILED + + def set_target_repositories_data(self): + try: + targets_data = {} + for repo_name, repo in self.state.target_repositories.items(): + targets_data[repo_name] = {"repo_data": repo.to_json_dict()} + if repo_name not in self.state.targets_data_by_auth_commits: + continue + commits_data = self.state.targets_data_by_auth_commits[repo_name] + + branch_data = defaultdict(dict) + + # Iterate through auth_commits in the specified order + for auth_commit in self.state.validated_auth_commits: + commit_info = commits_data.get(auth_commit) + if not commit_info or "branch" not in commit_info: + continue + + branch = commit_info.pop("branch") + + # Update the before_pull, after_pull, and new values + if branch not in branch_data: + old_head = self.state.old_heads_per_target_repos_branches.get( + repo_name, {} + ).get(branch) + if old_head is not None: + branch_data[branch]["before_pull"] = old_head + branch_data[branch]["new"] = [] + branch_data[branch][ + "unauthenticated" + ] = self.state.additional_commits_per_target_repos_branches.get( + repo_name, {} + ).get( + branch, [] + ) + else: + branch_data[branch]["new"].append(commit_info) + branch_data[branch]["after_pull"] = commit_info + + targets_data[repo_name]["commits"] = branch_data + self.state.targets_data = targets_data + return self.state.update_status + except Exception as e: + self.state.error = e + taf_logger.error(e) + self.state.event = Event.FAILED + return UpdateStatus.FAILED + + def set_output(self): + if self.state.auth_commits_since_last_validated is None: + commit_before_pull = None + new_commits = [] + commit_after_pull = None + else: + commit_before_pull = ( + self.state.validated_auth_commits[0] + if self.state.existing_repo and len(self.state.validated_auth_commits) + else None + ) + + if len(self.state.validated_auth_commits): + commit_after_pull = self.state.validated_auth_commits[-1] + else: + commit_after_pull = None + + if not self.state.existing_repo: + new_commits = self.state.validated_auth_commits + else: + new_commits = ( + self.state.validated_auth_commits[1:] + if len(self.state.validated_auth_commits) + else [] + ) + commits_data = { + "before_pull": commit_before_pull, + "new": new_commits, + "after_pull": commit_after_pull, + } + self._output = UpdateOutput( + event=self.state.event, + users_auth_repo=self.state.users_auth_repo, + auth_repo_name=self.state.auth_repo_name, + commits_data=commits_data, + error=self.state.error, + targets_data=self.state.targets_data, + ) + + +def _clone_validation_repo(url, repository_name): + """ + Clones the authentication repository based on the url specified using the + mirrors parameter. The repository is cloned as a bare repository + to a the temp directory and will be deleted one the update is done. + + If repository_name isn't provided (default value), extract it from info.json. + """ + temp_dir = tempfile.mkdtemp() + path = Path(temp_dir, "auth_repo").absolute() + validation_auth_repo = AuthenticationRepository( + path=path, urls=[url], alias="Validation repository" + ) + validation_auth_repo.clone(bare=True) + validation_auth_repo.fetch(fetch_all=True) + + settings.validation_repo_path = validation_auth_repo.path + + validation_head_sha = validation_auth_repo.top_commit_of_branch( + validation_auth_repo.default_branch + ) + + if repository_name is None: + try: + info = validation_auth_repo.get_json(validation_head_sha, INFO_JSON_PATH) + repository_name = f'{info["namespace"]}/{info["name"]}' + except Exception: + raise UpdateFailedError( + "Error during info.json parse. When specifying --clients-library-dir check if info.json metadata exists in targets/protected or provide full path to auth repo" + ) + + validation_auth_repo.cleanup() + return repository_name + + +def _is_unauthenticated_allowed(repository): + return repository.custom.get("allow-unauthenticated-commits", False) + + +@log_on_start( + INFO, + "Running TUF validation of the authentication repository...", + logger=taf_logger, +) +def _run_tuf_updater(git_updater): + def _init_updater(): + try: + return Updater( + git_updater.metadata_dir, + "metadata/", + git_updater.targets_dir, + "targets/", + fetcher=git_updater, + ) + except Exception as e: + taf_logger.error(f"Failed to instantiate TUF Updater due to error: {e}") + raise e + + def _update_tuf_current_revision(): + current_commit = git_updater.current_commit + try: + updater.refresh() + taf_logger.debug("Validated metadata files at revision {}", current_commit) + # using refresh, we have updated all main roles + # we still need to update the delegated roles (if there are any) + # and validate any target files + current_targets = git_updater.get_current_targets() + for target_path in current_targets: + target_filepath = target_path.replace("\\", "/") + + targetinfo = updater.get_targetinfo(target_filepath) + target_data = git_updater.get_current_target_data( + target_filepath, raw=True + ) + targetinfo.verify_length_and_hashes(target_data) + + taf_logger.debug( + "Successfully validated target file {} at {}", + target_filepath, + current_commit, + ) + except Exception as e: + metadata_expired = EXPIRED_METADATA_ERROR in type( + e + ).__name__ or EXPIRED_METADATA_ERROR in str(e) + if not metadata_expired or settings.strict: + taf_logger.error( + "Validation of authentication repository {} failed at revision {} due to error: {}", + git_updater.users_auth_repo.name, + current_commit, + e, + ) + raise UpdateFailedError( + f"Validation of authentication repository {git_updater.users_auth_repo.name}" + f" failed at revision {current_commit} due to error: {e}" + ) + taf_logger.warning( + f"WARNING: Could not validate authentication repository {git_updater.users_auth_repo.name} at revision {current_commit} due to error: {e}" + ) + + while not git_updater.update_done(): + updater = _init_updater() + _update_tuf_current_revision() + + taf_logger.info( + "Successfully validated authentication repository {}", + git_updater.users_auth_repo.name, + ) + + +def _find_next_value(value, values_list): + """ + Find the next value in the list after the given value. + + Parameters: + - value: The value to look for. + - values_list: The list of values. + + Returns: + - The next value in the list after the given value, or None if there isn't one. + """ + try: + index = values_list.index(value) + if index < len(values_list) - 1: # check if there are remaining values + return values_list[index + 1] + except ValueError: + pass # value not in list + return None + + +def _merge_commit( + repository, branch, commit_to_merge, checkout=True, force_revert=False +): + """Merge the specified commit into the given branch and check out the branch. + If the repository cannot contain unauthenticated commits, check out the merged commit. + """ + taf_logger.info( + "{} Merging commit {} into branch {}", repository.name, commit_to_merge, branch + ) + try: + repository.checkout_branch(branch, raise_anyway=True) + except GitError as e: + # two scenarios: + # current git repository is in an inconsistent state: + # - .git/index.lock exists (git partial update got applied) + # should get addressed in https://github.com/openlawlibrary/taf/issues/210 + # current git repository has uncommitted changes: + # we do not want taf to lose any repo data, so we do not reset the repository. + # for now, raise an update error and let the user manually reset the repository + taf_logger.error( + "Could not checkout branch {} during commit merge. Error {}", branch, e + ) + raise UpdateFailedError( + f"Repository {repository.name} should contain only committed changes. \n" + f"Please update the repository at {repository.path} manually and try again." + ) + + commit_merged = False + if force_revert: + # check if repository already contains this commit that needs to be merged + # and commits following it + commits_since_last_validated = repository.all_commits_since_commit( + commit_to_merge + ) + if len(commits_since_last_validated): + repository.reset_to_commit(commit_to_merge) + commit_merged = True + + if not commit_merged: + repository.merge_commit(commit_to_merge) + if checkout: + taf_logger.info("{}: checking out branch {}", repository.name, branch) + repository.checkout_branch(branch)