From 882fd5570585a4821c3246d82b8af42315abbf05 Mon Sep 17 00:00:00 2001 From: Benjamin Smith Date: Wed, 27 Nov 2024 13:42:07 +0100 Subject: [PATCH 1/5] Single or Specific Job Runner as Runtime Argument --- Makefile | 1 + src/args.py | 42 ++++++++++++++++++++++++++++++++++++++++++ src/config.py | 9 +++++++++ src/main.py | 26 ++++++++++---------------- 4 files changed, 62 insertions(+), 16 deletions(-) create mode 100644 src/args.py diff --git a/Makefile b/Makefile index ba74820..09bf1f7 100644 --- a/Makefile +++ b/Makefile @@ -15,6 +15,7 @@ clean: fmt: black ./ + ruff check --fix . lint: ruff check . diff --git a/src/args.py b/src/args.py new file mode 100644 index 0000000..dc5f989 --- /dev/null +++ b/src/args.py @@ -0,0 +1,42 @@ +"""Command line argument parser for dune-sync application.""" + +from __future__ import annotations + +import argparse +from dataclasses import dataclass +from pathlib import Path + +from src import root_path + + +@dataclass +class Args: + """Command line argument parser for dune-sync application.""" + + config: Path + jobs: list[str] | None + + @classmethod + def from_command_line(cls) -> Args: + """Create Args instance from command line arguments.""" + parser = argparse.ArgumentParser( + description="Dune Sync - Data synchronization tool" + ) + parser.add_argument( + "--config", + type=Path, + default=root_path.parent / "config.yaml", + help="Path to configuration file (default: config.yaml)", + ) + parser.add_argument( + "--jobs", + type=str, + nargs="*", # accepts zero or more arguments + default=None, + help="Names of specific jobs to run (default: run all jobs)", + ) + args = parser.parse_args() + return cls( + config=args.config, + jobs=args.jobs if args.jobs else None, # Convert empty list to None + ) diff --git a/src/config.py b/src/config.py index e0198cb..e357ae1 100644 --- a/src/config.py +++ b/src/config.py @@ -172,6 +172,15 @@ class RuntimeConfig: jobs: list[Job] + def __post_init__(self) -> None: + """Validate that unique job names are used.""" + job_names = [job.name for job in self.jobs] + if len(job_names) != len(set(job_names)): + duplicates = {name for name in job_names if job_names.count(name) > 1} + raise ValueError( + f"Duplicate job names found in configuration: {', '.join(duplicates)}" + ) + @classmethod def load_from_yaml(cls, file_path: Path | str = "config.yaml") -> RuntimeConfig: """Load and parse a YAML configuration file. diff --git a/src/main.py b/src/main.py index a2ae196..d9752d9 100644 --- a/src/main.py +++ b/src/main.py @@ -20,12 +20,9 @@ Typically includes database connection strings and API keys. """ - -import argparse import asyncio -from pathlib import Path -from src import root_path +from src.args import Args from src.config import RuntimeConfig from src.logger import log @@ -45,20 +42,17 @@ async def main() -> None: Various exceptions depending on job configuration and execution """ - parser = argparse.ArgumentParser( - description="Dune Sync - Data synchronization tool" - ) - parser.add_argument( - "--config", - type=Path, - default=root_path.parent / "config.yaml", - help="Path to configuration file (default: config.yaml)", - ) - args = parser.parse_args() - + args = Args.from_command_line() config = RuntimeConfig.load_from_yaml(args.config) - tasks = [job.run() for job in config.jobs] + # Filter jobs if specific ones were requested + jobs_to_run = ( + [job for job in config.jobs if job.name in args.jobs] + if args.jobs is not None + else config.jobs + ) + + tasks = [job.run() for job in jobs_to_run] for job, completed_task in zip( config.jobs, asyncio.as_completed(tasks), strict=False ): From 049ba93bc48518d11d2d17289e95f38d29233b88 Mon Sep 17 00:00:00 2001 From: Benjamin Smith Date: Wed, 27 Nov 2024 13:46:16 +0100 Subject: [PATCH 2/5] Update Readme --- README.md | 4 +++- config.yaml | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 18974d1..db6b260 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,8 @@ docker run --rm \ # - Mount queries directory (if using SQL file paths) -v "$(pwd)/queries:/app/queries" \ --config /app/my-config.yaml +# - Specify jobs to run (if not specified, all jobs will be run) + --jobs job1 job2 ``` Note that postgres queries can also be file paths (they would also need to be mounted into the container). @@ -99,7 +101,7 @@ Fill out the empty fields in [Sample Env](.env.sample) (e.g. `DUNE_API_KEY` and ```shell docker-compose up -d # Starts postgres container (in the background) -python -m src.main --config config.yaml +python -m src.main [--config config.yaml] [--jobs d2p-test-1 p2d-test] ``` ### Development Commands diff --git a/config.yaml b/config.yaml index eb090e0..c185aa5 100644 --- a/config.yaml +++ b/config.yaml @@ -7,7 +7,7 @@ data_sources: key: ${DB_URL} jobs: - - name: Sync Parameterized Dune Query with Multiple Types to Postgres + - name: d2p-test-1 source: ref: Dune query_id: 4238114 @@ -30,7 +30,7 @@ jobs: index_columns: - hash - - name: Table Independent Query to Dune + - name: p2d-test source: ref: PG query_string: "SELECT 1 as number, '\\x1234'::bytea as my_bytes;" @@ -38,7 +38,7 @@ jobs: ref: Dune table_name: dune_sync_test_table - - name: Sync Parameterized Dune Query with Multiple Types to Postgres + - name: p2d-test-2 source: ref: Dune query_id: 4273244 From b4a3c16f0e778bb07e560690f0cbbfdb20a42001 Mon Sep 17 00:00:00 2001 From: Benjamin Smith Date: Wed, 27 Nov 2024 13:53:26 +0100 Subject: [PATCH 3/5] Add Test for Unique Job Names --- src/config.py | 2 +- tests/fixtures/config/invalid_names.yaml | 24 ++++++++++++++++++++++++ tests/unit/config_test.py | 6 ++++++ 3 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 tests/fixtures/config/invalid_names.yaml diff --git a/src/config.py b/src/config.py index e357ae1..4c5f9a0 100644 --- a/src/config.py +++ b/src/config.py @@ -303,7 +303,7 @@ def _build_destination( return PostgresDestination( db_url=dest.key, table_name=dest_config["table_name"], - if_exists=dest_config["if_exists"], + if_exists=dest_config.get("if_exists"), index_columns=dest_config.get("index_columns", []), ) raise ValueError(f"Unsupported destination_db type: {dest}") diff --git a/tests/fixtures/config/invalid_names.yaml b/tests/fixtures/config/invalid_names.yaml new file mode 100644 index 0000000..fff3dcd --- /dev/null +++ b/tests/fixtures/config/invalid_names.yaml @@ -0,0 +1,24 @@ +--- +data_sources: + - name: postgres + type: postgres + key: ${DB_URL} + +jobs: + - name: jobName + source: + ref: postgres + table_name: table1 + query_string: SELECT 1; + destination: + ref: postgres + table_name: table2 + + - name: jobName + source: + ref: postgres + table_name: table1 + query_string: SELECT 2; + destination: + ref: postgres + table_name: table2 \ No newline at end of file diff --git a/tests/unit/config_test.py b/tests/unit/config_test.py index acf5bff..35256c0 100644 --- a/tests/unit/config_test.py +++ b/tests/unit/config_test.py @@ -62,6 +62,12 @@ def test_load_basic_conf(self): self.assertEqual(2, len(conf.jobs)) # TODO: come up with more explicit assertions. + def test_load_invalid_names(self): + config_file = config_root / "invalid_names.yaml" + with self.assertRaises(ValueError) as context: + RuntimeConfig.load_from_yaml(config_file.absolute()) + self.assertIn("Duplicate job names found in configuration: jobName", str(context.exception)) + def test_load_unsupported_conf(self): with self.assertRaises(ValueError) as context: RuntimeConfig.load_from_yaml(config_root / "unsupported_source.yaml") From c9d97c5fb034749394967b8b5c7a061ad6d281de Mon Sep 17 00:00:00 2001 From: Benjamin Smith Date: Wed, 27 Nov 2024 14:09:05 +0100 Subject: [PATCH 4/5] test args --- src/config.py | 2 +- tests/unit/args_test.py | 55 +++++++++++++++++++++++++++++++++++++++ tests/unit/config_test.py | 5 +++- 3 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 tests/unit/args_test.py diff --git a/src/config.py b/src/config.py index 4c5f9a0..a4c8405 100644 --- a/src/config.py +++ b/src/config.py @@ -303,7 +303,7 @@ def _build_destination( return PostgresDestination( db_url=dest.key, table_name=dest_config["table_name"], - if_exists=dest_config.get("if_exists"), + if_exists=dest_config.get("if_exists", "append"), index_columns=dest_config.get("index_columns", []), ) raise ValueError(f"Unsupported destination_db type: {dest}") diff --git a/tests/unit/args_test.py b/tests/unit/args_test.py new file mode 100644 index 0000000..c33cd35 --- /dev/null +++ b/tests/unit/args_test.py @@ -0,0 +1,55 @@ +from pathlib import Path +from unittest.mock import patch + +from src import root_path +from src.args import Args + + +def test_args_default_values(): + """Test Args parser with default values (no command line arguments).""" + with patch("sys.argv", ["script.py"]): + args = Args.from_command_line() + + assert args.config == root_path.parent / "config.yaml" + assert args.jobs is None + + +def test_args_custom_config(): + """Test Args parser with custom config path.""" + test_config = Path("/custom/path/config.yaml") + with patch("sys.argv", ["script.py", "--config", str(test_config)]): + args = Args.from_command_line() + + assert args.config == test_config + assert args.jobs is None + + +def test_args_with_jobs(): + """Test Args parser with specific jobs.""" + with patch("sys.argv", ["script.py", "--jobs", "job1", "job2"]): + args = Args.from_command_line() + + assert args.config == root_path.parent / "config.yaml" + assert args.jobs == ["job1", "job2"] + + +def test_args_with_empty_jobs(): + """Test Args parser with empty jobs list.""" + with patch("sys.argv", ["script.py", "--jobs"]): + args = Args.from_command_line() + + assert args.config == root_path.parent / "config.yaml" + assert args.jobs is None + + +def test_args_with_all_options(): + """Test Args parser with both config and jobs specified.""" + test_config = Path("/custom/path/config.yaml") + with patch( + "sys.argv", + ["script.py", "--config", str(test_config), "--jobs", "job1", "job2"], + ): + args = Args.from_command_line() + + assert args.config == test_config + assert args.jobs == ["job1", "job2"] diff --git a/tests/unit/config_test.py b/tests/unit/config_test.py index 35256c0..61dfc4c 100644 --- a/tests/unit/config_test.py +++ b/tests/unit/config_test.py @@ -66,7 +66,10 @@ def test_load_invalid_names(self): config_file = config_root / "invalid_names.yaml" with self.assertRaises(ValueError) as context: RuntimeConfig.load_from_yaml(config_file.absolute()) - self.assertIn("Duplicate job names found in configuration: jobName", str(context.exception)) + self.assertIn( + "Duplicate job names found in configuration: jobName", + str(context.exception), + ) def test_load_unsupported_conf(self): with self.assertRaises(ValueError) as context: From 6660f420a5323d3a746a1b0582f8cefea637a747 Mon Sep 17 00:00:00 2001 From: Benjamin Smith Date: Wed, 27 Nov 2024 14:29:01 +0100 Subject: [PATCH 5/5] Update tests/fixtures/config/invalid_names.yaml --- tests/fixtures/config/invalid_names.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/fixtures/config/invalid_names.yaml b/tests/fixtures/config/invalid_names.yaml index fff3dcd..3c87609 100644 --- a/tests/fixtures/config/invalid_names.yaml +++ b/tests/fixtures/config/invalid_names.yaml @@ -21,4 +21,4 @@ jobs: query_string: SELECT 2; destination: ref: postgres - table_name: table2 \ No newline at end of file + table_name: table2