From 0f7004f86826c252919780c388436a29be444e27 Mon Sep 17 00:00:00 2001 From: mooster531 <15d1dc293d2e@proton.me> Date: Sun, 8 Dec 2024 18:19:27 +0100 Subject: [PATCH] Fetch Config from URL (#116) Closes #113 --- README.md | 9 +++++- poetry.lock | 2 +- src/args.py | 7 ++--- src/config.py | 66 +++++++++++++++++++++++++++++++++++++-- tests/e2e_test.py | 15 +++++++++ tests/unit/args_test.py | 5 ++- tests/unit/config_test.py | 59 ++++++++++++++++++++++++++++++++-- 7 files changed, 150 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 7b80386..bb548bb 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,14 @@ The config file may contain environment variable placeholders in - `$varname` **Note**: Every variable referenced this way __must__ be defined at runtime, -otherwise the program will exit with an error. +otherwise the program exits with an error. + +#### Specifying configuration at runtime +- By default, the program looks for a file called `config.yaml` next to `main.py` +- You may pass a configuration file by using the `--config` parameter + - The argument to `--config` may be a filename, a file path, or a URL starting with `http://` or `https://` + - If a URL is passed, it's downloaded and its contents are assumed to be the configuration for the program +- File or content served at the given URL must be valid YAML and encoded in UTF-8 #### Data Source Definitions diff --git a/poetry.lock b/poetry.lock index 7190c94..66540f0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" diff --git a/src/args.py b/src/args.py index dc5f989..565f252 100644 --- a/src/args.py +++ b/src/args.py @@ -4,7 +4,6 @@ import argparse from dataclasses import dataclass -from pathlib import Path from src import root_path @@ -13,7 +12,7 @@ class Args: """Command line argument parser for dune-sync application.""" - config: Path + config: str jobs: list[str] | None @classmethod @@ -24,9 +23,9 @@ def from_command_line(cls) -> Args: ) parser.add_argument( "--config", - type=Path, + type=str, default=root_path.parent / "config.yaml", - help="Path to configuration file (default: config.yaml)", + help="Path/URL with scheme to configuration file (default: config.yaml)", ) parser.add_argument( "--jobs", diff --git a/src/config.py b/src/config.py index ddb87c3..424a339 100644 --- a/src/config.py +++ b/src/config.py @@ -2,13 +2,18 @@ from __future__ import annotations +import asyncio import os from dataclasses import dataclass +from io import StringIO from pathlib import Path from string import Template from typing import Any, TextIO +from urllib.parse import urlsplit, urlunsplit import yaml +from aiohttp import ClientError, ClientResponseError +from aiohttp.client import ClientSession from dotenv import load_dotenv from dune_client.query import QueryBase @@ -16,6 +21,7 @@ from src.destinations.postgres import PostgresDestination from src.interfaces import Destination, Source from src.job import Database, Job +from src.logger import log from src.sources.dune import DuneSource, parse_query_parameters from src.sources.postgres import PostgresSource @@ -139,6 +145,60 @@ def __post_init__(self) -> None: f"Duplicate job names found in configuration: {', '.join(duplicates)}" ) + @classmethod + def _is_url(cls, path: str) -> bool: + """Perform a basic check if given string looks like a URL. + + :param path: arbitrary string + """ + try: + result = urlsplit(path) + urlunsplit(result) + if result.scheme and result.netloc: + return True + except ( + ValueError, + TypeError, + ): # raised when not enough parts were given to unsplit -> not a URL probably + return False + + return False + + @classmethod + def _load_config_file(cls, file_path: Path | str) -> Any: + with open(file_path, encoding="utf-8") as _handle: + return cls.read_yaml(_handle) + + @classmethod + async def _download_config(cls, url: str) -> str | None: + try: + async with ClientSession() as session: + async with session.get(url) as response: + try: + response.raise_for_status() + except ClientResponseError as e: + log.error( + "Error fetching config from URL: %s", + e, + ) + return None + + return await response.text() + + except ClientError as e: + log.error("Request failed: %s", e) + return None + + @classmethod + def _load_config_url(cls, url: str) -> Any: + loop = asyncio.get_event_loop() + config_data = loop.run_until_complete(cls._download_config(url)) + if not config_data: + raise SystemExit("Could not download config") + + pseudofile = StringIO(config_data) + return cls.read_yaml(pseudofile) + @classmethod def read_yaml(cls, file_handle: TextIO) -> Any: """Load YAML from text, substituting any environment variables referenced.""" @@ -163,8 +223,10 @@ def load(cls, file_path: Path | str = "config.yaml") -> RuntimeConfig: ValueError: If the configuration contains invalid database types """ - with open(file_path, encoding="utf-8") as _handle: - data = cls.read_yaml(_handle) + if cls._is_url(str(file_path)): + data = cls._load_config_url(url=str(file_path)) + else: + data = cls._load_config_file(file_path) # Load data sources map sources = {} diff --git a/tests/e2e_test.py b/tests/e2e_test.py index 2821b5e..6baa848 100644 --- a/tests/e2e_test.py +++ b/tests/e2e_test.py @@ -4,6 +4,7 @@ import unittest from logging import WARNING from os import getenv +from unittest import skipIf from unittest.mock import AsyncMock, patch import pandas.testing @@ -207,3 +208,17 @@ async def test_dune_to_local_job_run(self, mock_env, mock_dune_client): await conf.jobs[0].run() self.assertIn("No Query results found! Skipping write", logs.output[0]) + + @patch("src.config.load_dotenv") + @patch.dict(os.environ, {"DUNE_API_KEY": "test_key", "DB_URL": DB_URL}) + @skipIf(not os.getenv("CI"), "this test only runs in CI") + def test_dune_to_local_job_run_remote_config(self, *_): + cfg_url = "https://raw.githubusercontent.com/bh2smith/dune-sync/refs/heads/main/tests/fixtures/config/basic.yaml" + + conf = RuntimeConfig.load(cfg_url) + self.assertIsNotNone(conf) + self.assertEqual(2, len(conf.jobs)) + self.assertEqual( + "Download simple test query to local postgres", conf.jobs[0].name + ) + self.assertEqual("Some other job", conf.jobs[1].name) diff --git a/tests/unit/args_test.py b/tests/unit/args_test.py index c33cd35..f22f3fc 100644 --- a/tests/unit/args_test.py +++ b/tests/unit/args_test.py @@ -1,4 +1,3 @@ -from pathlib import Path from unittest.mock import patch from src import root_path @@ -16,7 +15,7 @@ def test_args_default_values(): def test_args_custom_config(): """Test Args parser with custom config path.""" - test_config = Path("/custom/path/config.yaml") + test_config = "/custom/path/config.yaml" with patch("sys.argv", ["script.py", "--config", str(test_config)]): args = Args.from_command_line() @@ -44,7 +43,7 @@ def test_args_with_empty_jobs(): def test_args_with_all_options(): """Test Args parser with both config and jobs specified.""" - test_config = Path("/custom/path/config.yaml") + test_config = "/custom/path/config.yaml" with patch( "sys.argv", ["script.py", "--config", str(test_config), "--jobs", "job1", "job2"], diff --git a/tests/unit/config_test.py b/tests/unit/config_test.py index a5a7ba0..c331517 100644 --- a/tests/unit/config_test.py +++ b/tests/unit/config_test.py @@ -1,8 +1,10 @@ import os import unittest from datetime import datetime -from unittest.mock import patch +from unittest.mock import AsyncMock, MagicMock, patch +import pytest +from aiohttp import ClientError, ClientResponseError from dune_client.types import QueryParameter from src.config import Env, RuntimeConfig @@ -38,7 +40,7 @@ def test_env_interpolate(self, mock_load_dotenv): ) -class TestRuntimeConfig(unittest.TestCase): +class TestRuntimeConfig(unittest.IsolatedAsyncioTestCase): maxDiff = None @classmethod @@ -122,6 +124,59 @@ def test_load_buggy_conf(self): with self.assertRaises(SystemExit): RuntimeConfig.load(config_root / "no_data_sources.yaml") + @pytest.mark.asyncio + async def test_successful_download(self): + mock_response = AsyncMock(name="Mock GET Response") + mock_response.text = AsyncMock(return_value="test_config_content") + mock_response.raise_for_status.return_value = True + mock_get = AsyncMock() + mock_get.__aenter__.return_value = mock_response + + with patch("src.config.ClientSession.get", return_value=mock_get): + result = await RuntimeConfig._download_config("http://test.xyz") + + self.assertEqual("test_config_content", result) + mock_response.raise_for_status.assert_called_once() + mock_response.text.assert_called_once() + + @pytest.mark.asyncio + async def test_http_error_response(self): + error_response = ClientResponseError( + request_info=None, history=None, status=404, message="Not Found" + ) + mock_response = AsyncMock(name="Mock GET Response") + mock_response.raise_for_status = MagicMock( + side_effect=error_response, name="mock raise for status" + ) + mock_get = AsyncMock() + mock_get.__aenter__.return_value = mock_response + + with ( + patch("src.config.log") as mock_logger, + patch("src.config.ClientSession.get", return_value=mock_get), + ): + result = await RuntimeConfig._download_config( + "http://test.thistldbetternotexist" + ) + + self.assertIsNone(result) + mock_logger.error.assert_called_once_with( + "Error fetching config from URL: %s", error_response + ) + + @pytest.mark.asyncio + async def test_client_connection_error(self): + with ( + patch("aiohttp.ClientSession", side_effect=ClientError("Connection error")), + patch("src.config.log") as mock_logger, + ): + result = await RuntimeConfig._download_config( + "http://test.thistldbetternotexist" + ) + + assert result is None + mock_logger.error.assert_called_once() + class TestParseQueryParameters(unittest.TestCase): def test_parse_query_parameters(self):