From f928959b7595f0dc27d402e159c5a4ac77947904 Mon Sep 17 00:00:00 2001 From: Benoit Perigaud <8754100+b-per@users.noreply.github.com> Date: Mon, 18 Nov 2024 19:28:18 +0100 Subject: [PATCH 1/5] Rename changeset to a more generic name --- .../{changeset => cloud_yaml_mapping}/change_set.py | 0 tests/{changeset => cloud_yaml_mapping}/test_filter_config.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename src/dbt_jobs_as_code/{changeset => cloud_yaml_mapping}/change_set.py (100%) rename tests/{changeset => cloud_yaml_mapping}/test_filter_config.py (97%) diff --git a/src/dbt_jobs_as_code/changeset/change_set.py b/src/dbt_jobs_as_code/cloud_yaml_mapping/change_set.py similarity index 100% rename from src/dbt_jobs_as_code/changeset/change_set.py rename to src/dbt_jobs_as_code/cloud_yaml_mapping/change_set.py diff --git a/tests/changeset/test_filter_config.py b/tests/cloud_yaml_mapping/test_filter_config.py similarity index 97% rename from tests/changeset/test_filter_config.py rename to tests/cloud_yaml_mapping/test_filter_config.py index 2723ed3..b187bd4 100644 --- a/tests/changeset/test_filter_config.py +++ b/tests/cloud_yaml_mapping/test_filter_config.py @@ -2,7 +2,7 @@ from io import StringIO import pytest -from dbt_jobs_as_code.changeset.change_set import filter_config +from dbt_jobs_as_code.cloud_yaml_mapping.change_set import filter_config from loguru import logger From c1a1731ba5949c2106ff9075b40fce39cda3d067 Mon Sep 17 00:00:00 2001 From: Benoit Perigaud <8754100+b-per@users.noreply.github.com> Date: Mon, 18 Nov 2024 19:28:40 +0100 Subject: [PATCH 2/5] Return is not Optional --- src/dbt_jobs_as_code/client/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dbt_jobs_as_code/client/__init__.py b/src/dbt_jobs_as_code/client/__init__.py index 6de739c..05c7974 100644 --- a/src/dbt_jobs_as_code/client/__init__.py +++ b/src/dbt_jobs_as_code/client/__init__.py @@ -141,7 +141,7 @@ def delete_job(self, job: JobDefinition) -> None: else: logger.success("Job deleted successfully.") - def get_job(self, job_id: int) -> Optional[JobDefinition]: + def get_job(self, job_id: int) -> JobDefinition: """Generate a Job based on a dbt Cloud job.""" self._check_for_creds() From 7d75948b8cc7054caac4a80c3fc7c7f6d61fd432 Mon Sep 17 00:00:00 2001 From: Benoit Perigaud <8754100+b-per@users.noreply.github.com> Date: Mon, 18 Nov 2024 19:28:56 +0100 Subject: [PATCH 3/5] Fix pylance issue --- src/dbt_jobs_as_code/schemas/custom_environment_variable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dbt_jobs_as_code/schemas/custom_environment_variable.py b/src/dbt_jobs_as_code/schemas/custom_environment_variable.py index 6f3c68b..847df13 100644 --- a/src/dbt_jobs_as_code/schemas/custom_environment_variable.py +++ b/src/dbt_jobs_as_code/schemas/custom_environment_variable.py @@ -26,7 +26,7 @@ class CustomEnvironmentVariablePayload(CustomEnvironmentVariable): project_id: int account_id: int raw_value: Optional[str] = None - value: Optional[str] = Field(None, exclude=True) + value: Optional[str] = Field(default=None, exclude=True) def __init__(self, **data: Any): data["raw_value"] = data["value"] if "value" in data else data["display_value"] From 566df490f52ff2e8874ad673be45882b9e22df05 Mon Sep 17 00:00:00 2001 From: Benoit Perigaud <8754100+b-per@users.noreply.github.com> Date: Mon, 18 Nov 2024 19:30:48 +0100 Subject: [PATCH 4/5] Add link subcommand and new tests --- .../cloud_yaml_mapping/validate_link.py | 37 +++++++++ src/dbt_jobs_as_code/exporter/export.py | 4 +- src/dbt_jobs_as_code/main.py | 79 +++++++++++++++--- src/dbt_jobs_as_code/schemas/__init__.py | 1 + src/dbt_jobs_as_code/schemas/job.py | 42 ++++++---- .../schemas/load_job_schema.json | 14 ++++ .../cloud_yaml_mapping/test_validate_link.py | 80 +++++++++++++++++++ tests/loader/test_loader.py | 2 + 8 files changed, 233 insertions(+), 26 deletions(-) create mode 100644 src/dbt_jobs_as_code/cloud_yaml_mapping/validate_link.py create mode 100644 tests/cloud_yaml_mapping/test_validate_link.py diff --git a/src/dbt_jobs_as_code/cloud_yaml_mapping/validate_link.py b/src/dbt_jobs_as_code/cloud_yaml_mapping/validate_link.py new file mode 100644 index 0000000..045c38a --- /dev/null +++ b/src/dbt_jobs_as_code/cloud_yaml_mapping/validate_link.py @@ -0,0 +1,37 @@ +from dataclasses import dataclass +from typing import Optional + +from dbt_jobs_as_code.client import DBTCloud, DBTCloudException +from dbt_jobs_as_code.schemas.job import JobDefinition + + +@dataclass +class LinkableCheck: + can_be_linked: bool + message: str + linked_job: Optional[JobDefinition] = None + + +def can_be_linked( + job_identifier: str, job_definition: JobDefinition, dbt_cloud: DBTCloud +) -> LinkableCheck: + if job_definition.linked_id is None: + return LinkableCheck( + False, f"Job '{job_identifier}' doesn't have an ID in YAML. It cannot be linked" + ) + + try: + cloud_job = dbt_cloud.get_job(job_id=job_definition.linked_id) + except DBTCloudException as e: + return LinkableCheck( + False, + f"Job {job_definition.linked_id} doesn't exist in dbt Cloud. It cannot be linked", + ) + + if cloud_job.identifier is not None: + return LinkableCheck( + False, + f"Job {job_definition.linked_id} is already linked with the identifier {cloud_job.identifier}. You should unlink it before if you want to link it to a new identifier.", + ) + + return LinkableCheck(True, "", cloud_job) diff --git a/src/dbt_jobs_as_code/exporter/export.py b/src/dbt_jobs_as_code/exporter/export.py index 68b1ade..50d5887 100644 --- a/src/dbt_jobs_as_code/exporter/export.py +++ b/src/dbt_jobs_as_code/exporter/export.py @@ -5,12 +5,12 @@ from dbt_jobs_as_code.schemas.job import JobDefinition -def export_jobs_yml(jobs: list[JobDefinition]): +def export_jobs_yml(jobs: list[JobDefinition], include_linked_id: bool = False): """Export a list of job definitions to YML""" export_yml = {"jobs": {}} for id, cloud_job in enumerate(jobs): - export_yml["jobs"][f"import_{id + 1}"] = cloud_job.to_load_format() + export_yml["jobs"][f"import_{id + 1}"] = cloud_job.to_load_format(include_linked_id) print( "# yaml-language-server: $schema=https://raw.githubusercontent.com/dbt-labs/dbt-jobs-as-code/main/src/dbt_jobs_as_code/schemas/load_job_schema.json" diff --git a/src/dbt_jobs_as_code/main.py b/src/dbt_jobs_as_code/main.py index 33123c2..b5ce49d 100644 --- a/src/dbt_jobs_as_code/main.py +++ b/src/dbt_jobs_as_code/main.py @@ -1,17 +1,21 @@ import os import sys +from importlib.metadata import version from pathlib import Path import click from loguru import logger from rich.console import Console -from dbt_jobs_as_code.changeset.change_set import build_change_set -from dbt_jobs_as_code.client import DBTCloud +from dbt_jobs_as_code.client import DBTCloud, DBTCloudException +from dbt_jobs_as_code.cloud_yaml_mapping.change_set import build_change_set +from dbt_jobs_as_code.cloud_yaml_mapping.validate_link import LinkableCheck, can_be_linked from dbt_jobs_as_code.exporter.export import export_jobs_yml from dbt_jobs_as_code.loader.load import load_job_configuration from dbt_jobs_as_code.schemas.config import generate_config_schema +VERSION = version("dbt-jobs-as-code") + # adding the ability to disable ssl verification, useful for self-signed certificates and local testing option_disable_ssl_verification = click.option( "--disable-ssl-verification", @@ -52,7 +56,11 @@ ) -@click.group() +@click.group( + help=f"dbt-jobs-as-code {VERSION}\n\nA CLI to allow defining dbt Cloud jobs as code", + context_settings={"max_content_width": 120}, +) +@click.version_option(version=VERSION) def cli() -> None: pass @@ -277,6 +285,11 @@ def validate(config, vars_yml, online, disable_ssl_verification): help="Check if the job model has missing fields.", hidden=True, ) +@click.option( + "--include-linked-id", + is_flag=True, + help="Include the job ID when exporting jobs.", +) def import_jobs( config, account_id, @@ -285,6 +298,7 @@ def import_jobs( job_id, disable_ssl_verification, check_missing_fields=False, + include_linked_id=False, ): """ Generate YML file for import. @@ -354,7 +368,57 @@ def import_jobs( cloud_job.custom_environment_variables.append(env_var) logger.success(f"YML file for the current dbt Cloud jobs") - export_jobs_yml(cloud_jobs) + export_jobs_yml(cloud_jobs, include_linked_id) + + +@cli.command() +@option_disable_ssl_verification +@click.argument("config", type=click.File("r")) +@click.option("--dry-run", is_flag=True, help="In dry run mode we don't update dbt Cloud.") +def link(config, dry_run, disable_ssl_verification): + """ + Link the YML file to dbt Cloud by adding the identifier to the job name. + All relevant jobs get the part [[...]] added to their name + """ + + yaml_jobs = load_job_configuration(config, None).jobs + account_id = list(yaml_jobs.values())[0].account_id + + dbt_cloud = DBTCloud( + account_id=account_id, + api_key=os.environ.get("DBT_API_KEY"), + base_url=os.environ.get("DBT_BASE_URL", "https://cloud.getdbt.com"), + disable_ssl_verification=disable_ssl_verification, + ) + + some_jobs_updated = False + for current_identifier, job_details in yaml_jobs.items(): + linkable_check = can_be_linked(current_identifier, job_details, dbt_cloud) + if not linkable_check.can_be_linked: + logger.error(linkable_check.message) + continue + + # impossible according to the check but needed to fix type checking + assert linkable_check.linked_job is not None + + cloud_job = linkable_check.linked_job + cloud_job.identifier = current_identifier + if dry_run: + logger.info( + f"Would link/rename the job {cloud_job.id}:{cloud_job.name} [[{current_identifier}]]" + ) + else: + logger.info( + f"Linking/Renaming the job {cloud_job.id}:{cloud_job.name} [[{current_identifier}]]" + ) + dbt_cloud.update_job(job=cloud_job) + some_jobs_updated = True + + if not dry_run: + if some_jobs_updated: + logger.success(f"Updated all jobs!") + else: + logger.info(f"No jobs to link") @cli.command() @@ -379,15 +443,12 @@ def unlink(config, account_id, dry_run, identifier, disable_ssl_verification): if account_id: cloud_account_id = account_id elif config: + # we get the account id from the config file defined_jobs = load_job_configuration(config, None).jobs.values() cloud_account_id = list(defined_jobs)[0].account_id else: raise click.BadParameter("Either --config or --account-id must be provided") - # we get the account id from the config file - defined_jobs = load_job_configuration(config, None).jobs.values() - cloud_account_id = list(defined_jobs)[0].account_id - dbt_cloud = DBTCloud( account_id=cloud_account_id, api_key=os.environ.get("DBT_API_KEY"), @@ -407,7 +468,7 @@ def unlink(config, account_id, dry_run, identifier, disable_ssl_verification): cloud_job.identifier = None if dry_run: logger.info( - f"Would unlink/rename the job {cloud_job.id}:{cloud_job.name} [[{current_identifier}]]" + f"Would link/rename the job {cloud_job.id}:{cloud_job.name} [[{current_identifier}]]" ) else: logger.info( diff --git a/src/dbt_jobs_as_code/schemas/__init__.py b/src/dbt_jobs_as_code/schemas/__init__.py index 641559f..a28aa6b 100644 --- a/src/dbt_jobs_as_code/schemas/__init__.py +++ b/src/dbt_jobs_as_code/schemas/__init__.py @@ -22,6 +22,7 @@ def _job_to_dict(job: JobDefinition): exclude={ "id", # we want to exclude id because our YAML file will not have it "custom_environment_variables", # TODO: Add this back in. Requires extra API calls. + "linked_id", # we want to exclude linked_id because dbt Cloud doesn't save it } ) return dict_vals diff --git a/src/dbt_jobs_as_code/schemas/job.py b/src/dbt_jobs_as_code/schemas/job.py index a480c08..31e82a9 100644 --- a/src/dbt_jobs_as_code/schemas/job.py +++ b/src/dbt_jobs_as_code/schemas/job.py @@ -19,8 +19,15 @@ class JobDefinition(BaseModel): """A definition for a dbt Cloud job.""" + linked_id: Optional[int] = Field( + default=None, + description="The ID of the job in dbt Cloud that we want to link. Only used for the 'link' command.", + ) id: Optional[int] = None - identifier: Optional[str] = None + identifier: Optional[str] = Field( + default=None, + description="The internal job identifier for the job for dbt-jobs-as-code. Will be added at the end of the job name.", + ) account_id: int = field_mandatory_int_allowed_as_string_in_schema project_id: int = field_mandatory_int_allowed_as_string_in_schema environment_id: int = field_mandatory_int_allowed_as_string_in_schema @@ -100,23 +107,28 @@ def to_payload(self): # otherwise, it means that we are "unlinking" the job from the job.yml if self.identifier: payload.name = f"{self.name} [[{self.identifier}]]" - return payload.model_dump_json(exclude={"identifier", "custom_environment_variables"}) + return payload.model_dump_json( + exclude={"linked_id", "identifier", "custom_environment_variables"} + ) - def to_load_format(self): + def to_load_format(self, include_linked_id: bool = False): """Generate a dict following our YML format to dump as YML later.""" - data = self.model_dump( - exclude={ - "identifier": True, - "schedule": { - "date": True, - "time": True, - }, - "custom_environment_variables": True, - "id": True, - "state": True, - } - ) + self.linked_id = self.id + exclude_dict = { + "identifier": True, + "schedule": { + "date": True, + "time": True, + }, + "id": True, + "custom_environment_variables": True, + "state": True, + } + if not include_linked_id: + exclude_dict["linked_id"] = True + + data = self.model_dump(exclude=exclude_dict) data["custom_environment_variables"] = [] for env_var in self.custom_environment_variables: data["custom_environment_variables"].append({env_var.name: env_var.value}) diff --git a/src/dbt_jobs_as_code/schemas/load_job_schema.json b/src/dbt_jobs_as_code/schemas/load_job_schema.json index 19b5648..60a5ca8 100644 --- a/src/dbt_jobs_as_code/schemas/load_job_schema.json +++ b/src/dbt_jobs_as_code/schemas/load_job_schema.json @@ -101,6 +101,19 @@ "JobDefinition": { "description": "A definition for a dbt Cloud job.", "properties": { + "linked_id": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The ID of the job in dbt Cloud that we want to link. Only used for the 'link' command.", + "title": "Linked Id" + }, "id": { "anyOf": [ { @@ -123,6 +136,7 @@ } ], "default": null, + "description": "The internal job identifier for the job for dbt-jobs-as-code. Will be added at the end of the job name.", "title": "Identifier" }, "account_id": { diff --git a/tests/cloud_yaml_mapping/test_validate_link.py b/tests/cloud_yaml_mapping/test_validate_link.py new file mode 100644 index 0000000..d491627 --- /dev/null +++ b/tests/cloud_yaml_mapping/test_validate_link.py @@ -0,0 +1,80 @@ +import copy +from typing import Optional +from unittest.mock import Mock + +import pytest +from dbt_jobs_as_code.client import DBTCloud, DBTCloudException +from dbt_jobs_as_code.cloud_yaml_mapping.validate_link import LinkableCheck, can_be_linked +from dbt_jobs_as_code.schemas.job import JobDefinition + + +@pytest.fixture +def mock_dbt_cloud(): + return Mock(spec=DBTCloud) + + +@pytest.fixture +def base_job_definition(): + return JobDefinition( + id=None, + linked_id=123, + identifier=None, + account_id=1, + project_id=1, + environment_id=1, + name="Test Job", + settings={}, + run_generate_sources=False, + execute_steps=[], + generate_docs=False, + schedule={"cron": "0 14 * * 0,1,2,3,4,5,6"}, + triggers={}, + ) + + +def test_cant_be_linked_no_id_in_yaml(mock_dbt_cloud, base_job_definition): + base_job_definition.linked_id = None + + result = can_be_linked("test_job", base_job_definition, mock_dbt_cloud) + + assert result.can_be_linked is False + assert "doesn't have an ID in YAML" in result.message + assert result.linked_job is None + + +def test_cant_be_linked_job_not_exist(mock_dbt_cloud, base_job_definition): + mock_dbt_cloud.get_job.side_effect = DBTCloudException("Job not found") + + result = can_be_linked("test_job", base_job_definition, mock_dbt_cloud) + + assert result.can_be_linked is False + assert "doesn't exist in dbt Cloud" in result.message + assert result.linked_job is None + mock_dbt_cloud.get_job.assert_called_once_with(job_id=123) + + +def test_cant_be_linked_already_linked(mock_dbt_cloud, base_job_definition): + cloud_job = copy.deepcopy(base_job_definition) + cloud_job.identifier = "existing_identifier" + + mock_dbt_cloud.get_job.return_value = cloud_job + + result = can_be_linked("test_job", base_job_definition, mock_dbt_cloud) + + assert result.can_be_linked is False + assert "already linked" in result.message + assert result.linked_job is None + mock_dbt_cloud.get_job.assert_called_once_with(job_id=123) + + +def test_can_be_linked_success(mock_dbt_cloud, base_job_definition): + cloud_job = copy.deepcopy(base_job_definition) + cloud_job.id = 123 + mock_dbt_cloud.get_job.return_value = cloud_job + + result = can_be_linked("test_job", base_job_definition, mock_dbt_cloud) + + assert result.can_be_linked is True + assert result.message == "" + assert result.linked_job == cloud_job + mock_dbt_cloud.get_job.assert_called_once_with(job_id=123) diff --git a/tests/loader/test_loader.py b/tests/loader/test_loader.py index 8ab07b8..09d67d9 100644 --- a/tests/loader/test_loader.py +++ b/tests/loader/test_loader.py @@ -25,6 +25,7 @@ "identifier": "job1", "job_completion_trigger_condition": None, "job_type": "scheduled", + "linked_id": None, "name": "My Job 1 with a new name", "project_id": 176941, "run_generate_sources": True, @@ -77,6 +78,7 @@ "condition": {"job_id": 123, "project_id": 234, "statuses": [10, 20]} }, "job_type": "other", + "linked_id": None, "name": "CI/CD run", "project_id": 176941, "run_generate_sources": True, From 5be6edbc3fc5d6b314576643364f219056f7521b Mon Sep 17 00:00:00 2001 From: Benoit Perigaud <8754100+b-per@users.noreply.github.com> Date: Mon, 18 Nov 2024 19:31:24 +0100 Subject: [PATCH 5/5] Bump version and update docs --- README.md | 35 +++++++++++++++++++++-------------- pyproject.toml | 2 +- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 1342c79..681924c 100644 --- a/README.md +++ b/README.md @@ -92,15 +92,21 @@ Queries dbt Cloud and provide the YAML definition for those jobs. It includes th - it is possible to restrict the list of dbt Cloud Job IDs by adding `... -j 101 -j 123 -j 234` - this command also accepts a list of project IDs or environments IDs to limit the command for: `dbt-jobs-as-code sync -p 1234 -p 2345 -e 4567 -e 5678` +- this command accepts a `--include-linked-id` parameter to allow linking the jobs in the YAML to existing jobs in dbt Cloud, by renaming those - once the YAML has been retrieved, it is possible to copy/paste it in a local YAML file to create/update the local jobs definition. -To move some ui-jobs to jobs-as-code, perform the following steps: +Once the configuration is imported, it is possible to "link" existing jobs by using the `link` command explained below. -- run the command to import the jobs -- copy paste the job/jobs into a YAML file -- change the `import_` id of the job in the YAML file to another unique identifier -- rename the job in the UI to end with `[[new_job_identifier]]` -- run a `plan` command to verify that no changes are required for the given job +#### `link` + +Command: `dbt-jobs-as-code link ` + +Links dbt Cloud jobs with the corresponding identifier from the YAML file by renaming the jobs, adding the `[[ ... ]]` part in the job name. + +To do so, the program looks at the YAML file for the config `linked_id`. +`linked_id` can be added manually or can be added automatically when calling `dbt-jobs-as-code import-jobs` with the `--include-linked-id` parameter. + +Accepts a `--dry-run` flag to see what jobs would be changed, without actually changing them. #### `unlink` @@ -161,14 +167,15 @@ The tool will raise errors if: ### Summary of parameters -| Command | `--project-id` / `-p` | `--environment-id` / `-e` | `--limit-projects-envs-to-yml` / `-l` | `--vars-yml` / `-v` | `--online` | `--job-id` / `-j` | `--identifier` / `-i` | `--dry-run` | -| --------------- | :-------------------: | :-----------------------: | :-----------------------------------: | :-----------------: | :--------: | :---------------: | :-------------------: | :---------: | -| plan | ✅ | ✅ | ✅ | ✅ | | | | | -| sync | ✅ | ✅ | ✅ | ✅ | | | | | -| validate | | | | ✅ | ✅ | | | | -| import-jobs | ✅ | ✅ | | | | ✅ | | | -| unlink | | | | | | | ✅ | ✅ | -| deactivate-jobs | | | | | | ✅ | | | +| Command | `--project-id` / `-p` | `--environment-id` / `-e` | `--limit-projects-envs-to-yml` / `-l` | `--vars-yml` / `-v` | `--online` | `--job-id` / `-j` | `--identifier` / `-i` | `--dry-run` | `--include-linked-id` | +| --------------- | :-------------------: | :-----------------------: | :-----------------------------------: | :-----------------: | :--------: | :---------------: | :-------------------: | :---------: | :-------------------: | +| plan | ✅ | ✅ | ✅ | ✅ | | | | | | +| sync | ✅ | ✅ | ✅ | ✅ | | | | | | +| validate | | | | ✅ | ✅ | | | | | +| import-jobs | ✅ | ✅ | | | | ✅ | | | ✅ | +| link | | | | | | | | ✅ | | +| unlink | | | | | | | ✅ | ✅ | | +| deactivate-jobs | | | | | | ✅ | | | | As a reminder using `--project-id` and/or `--environment-id` is not compatible with using `--limit-projects-envs-to-yml`. We can only restricts by providing the IDs or by forcing to restrict on the environments and projects in the YML file. diff --git a/pyproject.toml b/pyproject.toml index 5e4d025..b222f76 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ dependencies = [ "importlib-metadata<7,>=6.0", ] name = "dbt-jobs-as-code" -version = "0.10.0" +version = "0.11.0" description = "A CLI to allow defining dbt Cloud jobs as code" readme = "README.md" keywords = [