From 492f54c97e89ae74063be1fa1a9734380e5eea61 Mon Sep 17 00:00:00 2001 From: Jasper Ginn Date: Fri, 5 Jan 2024 22:12:50 +0100 Subject: [PATCH] chore: format and add pyorihect --- .justfile | 2 +- .pre-commit-config.yaml | 16 ++++----- dags/luchtmeetnet_ingestion/BUILD | 7 ++-- .../luchtmeetnet_ingestion/IO/resources.py | 1 - .../src/luchtmeetnet_ingestion/__init__.py | 6 ++-- .../luchtmeetnet_ingestion/assets/__init__.py | 11 +++--- .../src/luchtmeetnet_ingestion/jobs.py | 1 - .../luchtmeetnet/api.py | 1 - .../src/luchtmeetnet_ingestion/partitions.py | 4 +-- pyproject.toml | 35 +++++++++++++++++++ 10 files changed, 60 insertions(+), 24 deletions(-) create mode 100644 pyproject.toml diff --git a/.justfile b/.justfile index 9070bcb..27b2395 100644 --- a/.justfile +++ b/.justfile @@ -19,7 +19,7 @@ fmt: pants fmt :: lint: - pants lint check :: + pants lint :: # check test: pants test :: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ba3eaf9..8fb87b0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,11 +27,11 @@ repos: language: system types: [python] pass_filenames: false - - repo: local - hooks: - - id: pants-check - name: pants-check - entry: "pants check --changed-since=HEAD" - language: system - types: [python] - pass_filenames: false + # - repo: local + # hooks: + # - id: pants-check + # name: pants-check + # entry: "pants check --changed-since=HEAD" + # language: system + # types: [python] + # pass_filenames: false diff --git a/dags/luchtmeetnet_ingestion/BUILD b/dags/luchtmeetnet_ingestion/BUILD index b8fc50e..6cce18d 100644 --- a/dags/luchtmeetnet_ingestion/BUILD +++ b/dags/luchtmeetnet_ingestion/BUILD @@ -26,7 +26,10 @@ pex_binary( layout="packed", include_requirements=False, include_tools=True, - dependencies=[":pyproject", "dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion:luchtmeetnet_ingestion"], + dependencies=[ + ":pyproject", + "dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion:luchtmeetnet_ingestion", + ], environment="py39_slim", ) @@ -70,5 +73,5 @@ docker_image( "COPY dags.luchtmeetnet_ingestion/binary-srcs.pex/pyproject.toml /pyproject.toml", 'ENTRYPOINT ["/bin/app/pex"]', ], - image_tags=["latest"] + image_tags=["latest"], ) diff --git a/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/IO/resources.py b/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/IO/resources.py index 45028d6..1f6de06 100644 --- a/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/IO/resources.py +++ b/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/IO/resources.py @@ -1,7 +1,6 @@ from typing import Any, Dict, List, Optional from dagster import ConfigurableResource - from luchtmeetnet_ingestion.luchtmeetnet.api import get_results_luchtmeetnet_endpoint diff --git a/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/__init__.py b/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/__init__.py index 397c7e0..44b3756 100644 --- a/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/__init__.py +++ b/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/__init__.py @@ -1,9 +1,7 @@ from importlib import metadata from dagster import Definitions, FilesystemIOManager - from luchtmeetnet_ingestion.assets import air_quality_data -from luchtmeetnet_ingestion.IO.duckdb_io_manager import duckdb_parquet_io_manager from luchtmeetnet_ingestion.IO.resources import LuchtMeetNetResource from luchtmeetnet_ingestion.jobs import ingestion_job @@ -17,7 +15,9 @@ env_resources = { "dev": shared_resources - | {"landing_zone": FilesystemIOManager()} #{"landing_zone": duckdb_parquet_io_manager.configured({"path": ".tmp/landing_zone"})} + | { + "landing_zone": FilesystemIOManager() + } # {"landing_zone": duckdb_parquet_io_manager.configured({"path": ".tmp/landing_zone"})} } definition = Definitions( diff --git a/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/assets/__init__.py b/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/assets/__init__.py index 92206ba..ce80cd8 100644 --- a/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/assets/__init__.py +++ b/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/assets/__init__.py @@ -1,6 +1,5 @@ import pandas as pd -from dagster import AutoMaterializePolicy, asset - +from dagster import asset from luchtmeetnet_ingestion.IO.resources import LuchtMeetNetResource from luchtmeetnet_ingestion.partitions import daily_partition @@ -10,12 +9,16 @@ compute_kind="duckdb", io_manager_key="landing_zone", partitions_def=daily_partition, - #auto_materialize_policy=AutoMaterializePolicy.eager()#max_materializations_per_minute=None), + # auto_materialize_policy=AutoMaterializePolicy.eager()#max_materializations_per_minute=None), ) def air_quality_data(context, luchtmeetnet_api: LuchtMeetNetResource): date = context.partition_key context.log.debug(f"Fetching data for {date}") - rp = {"start": f"{date}T00:00:00", "end": f"{date}T23:59:59", "station_number": "NL01494"} + rp = { + "start": f"{date}T00:00:00", + "end": f"{date}T23:59:59", + "station_number": "NL01494", + } df = pd.DataFrame(luchtmeetnet_api.request("measurements", request_params=rp)) return df.to_json() diff --git a/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/jobs.py b/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/jobs.py index 541c874..00c03b1 100644 --- a/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/jobs.py +++ b/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/jobs.py @@ -1,5 +1,4 @@ from dagster import define_asset_job - from luchtmeetnet_ingestion.assets import air_quality_data from luchtmeetnet_ingestion.partitions import daily_partition diff --git a/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/luchtmeetnet/api.py b/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/luchtmeetnet/api.py index 8f7b0d1..0e4c8fb 100644 --- a/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/luchtmeetnet/api.py +++ b/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/luchtmeetnet/api.py @@ -3,7 +3,6 @@ from typing import Any, Dict, List, Optional import requests # type: ignore - from luchtmeetnet_ingestion.luchtmeetnet.const import LUCHTMEETNET_BASE_URL logger = logging.getLogger("dagster_orchestrators.IO.api") diff --git a/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/partitions.py b/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/partitions.py index 481ca91..5d30a7a 100644 --- a/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/partitions.py +++ b/dags/luchtmeetnet_ingestion/src/luchtmeetnet_ingestion/partitions.py @@ -1,6 +1,4 @@ -from dagster import ( - DailyPartitionsDefinition -) +from dagster import DailyPartitionsDefinition daily_partition = DailyPartitionsDefinition( start_date="2023-12-20", end_offset=0, timezone="Europe/Amsterdam", fmt="%Y-%m-%d" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..a39aaa3 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,35 @@ +[tool.black] +line-length = 100 +exclude = ''' +^/( + ( + \.eggs # exclude a few common directories in the + | \.git # root of the project + | \.hg + | \.mypy_cache + | \.venv + | _build + | build + | dist + ) +) +''' + +[tool.isort] +profile = "black" +#extend_skip = [".md", ".json"] +#extend_skip = [".tmp", ".venv"] + +[tool.mypy] +exclude = "^tests/" +ignore_missing_imports = true + +[tool.ruff] +ignore = ["E501"] +extend-exclude = [ + "__pycache__", + "docs", + "site", + "src/dagster_orchestrators/__init__.py", +] +line-length = 100