From 6fcb2fdaeb2d25449bd725595d5d49181e837f63 Mon Sep 17 00:00:00 2001 From: Henry Rodman Date: Fri, 16 Aug 2024 15:50:53 -0500 Subject: [PATCH] add collection_id to items (#12) * optionally use `collection` when creating an item * update ruff --- .gitignore | 2 +- .pre-commit-config.yaml | 2 +- CHANGELOG.md | 2 +- pyproject.toml | 4 ++-- scripts/update-examples | 3 ++- src/stactools/noaa_hrrr/commands.py | 7 ++---- src/stactools/noaa_hrrr/inventory.py | 8 ++++--- src/stactools/noaa_hrrr/metadata.py | 1 + src/stactools/noaa_hrrr/stac.py | 33 +++++++++++++++++++--------- tests/test_commands.py | 1 + tests/test_inventory.py | 1 + tests/test_stac.py | 24 ++++++++++++++++++++ 12 files changed, 64 insertions(+), 24 deletions(-) diff --git a/.gitignore b/.gitignore index b4b7095..6c0d036 100644 --- a/.gitignore +++ b/.gitignore @@ -142,4 +142,4 @@ cython_debug/ scratch .Trash-0 .virtual_documents - +.envrc diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 91b9f8d..90e785c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -25,6 +25,6 @@ repos: - click != 8.1.0 - stactools - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.288 + rev: v0.6.0 hooks: - id: ruff diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b99a14..1a3d58f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ number as needed. ## [Unreleased] -- Nothing. +- Add `collection_id` to items [Unreleased]: diff --git a/pyproject.toml b/pyproject.toml index cf7ff02..4d5b518 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ dev = [ "pre-commit~=3.4", "pytest-cov~=4.1", "pytest~=7.4", - "ruff==0.0.288", + "ruff==0.6.0", ] docs = ["pystac~=1.8", "ipykernel~=6.25", "jinja2~=3.1"] @@ -60,7 +60,7 @@ strict = true mypy_path = "src" [tool.ruff] -select = ["E", "F", "I"] +lint.select = ["E", "F", "I"] [tool.setuptools.package-data] "stactools.noaa_hrrr.data" = ["*.csv.gz"] diff --git a/scripts/update-examples b/scripts/update-examples index be11fac..baa30f6 100755 --- a/scripts/update-examples +++ b/scripts/update-examples @@ -4,8 +4,9 @@ import shutil from datetime import datetime from pathlib import Path -import stactools.noaa_hrrr.stac from pystac import CatalogType + +import stactools.noaa_hrrr.stac from stactools.noaa_hrrr.metadata import ( CloudProvider, Product, diff --git a/src/stactools/noaa_hrrr/commands.py b/src/stactools/noaa_hrrr/commands.py index 98fa716..20d0c86 100644 --- a/src/stactools/noaa_hrrr/commands.py +++ b/src/stactools/noaa_hrrr/commands.py @@ -4,16 +4,13 @@ import click from click import Command, Group + from stactools.noaa_hrrr import stac from stactools.noaa_hrrr.constants import ( COLLECTION_ID_FORMAT, EXTENDED_FORECAST_MAX_HOUR, ) -from stactools.noaa_hrrr.metadata import ( - CloudProvider, - Product, - Region, -) +from stactools.noaa_hrrr.metadata import CloudProvider, Product, Region logger = logging.getLogger(__name__) diff --git a/src/stactools/noaa_hrrr/inventory.py b/src/stactools/noaa_hrrr/inventory.py index 541a838..d0cc81c 100644 --- a/src/stactools/noaa_hrrr/inventory.py +++ b/src/stactools/noaa_hrrr/inventory.py @@ -1,6 +1,6 @@ """Each .grib file in the HRRR dataset contains dozens or hundreds of distinct variables that represent data along several dimensions. The inventory files published by NOAA are -useful for the human-readable descriptions, but more reliable inventory dataframes can +useful for the human-readable descriptions, but more reliable inventory dataframes can be generated by reading the sidecar .grib2.idx files. The functions in this module generate the metadata required to define the coordinates @@ -8,12 +8,13 @@ dataframes are used to populate the datacube extension metadata for each collection. The dimensions of interest are: -1. forecast_valid: either the average, minimum, maximum, or accumulated value for a +1. forecast_valid: either the average, minimum, maximum, or accumulated value for a specific time range, e.g. 3-4 hours, 0-1 day, etc. For forecast hour 0, the level is "analysis" -2. level: the models generate predictions of many of the variables for various levels +2. level: the models generate predictions of many of the variables for various levels in the atmosphere, e.g. 0-9000 ft, cloud surface, top of atmosphere, etc. """ + import logging import multiprocessing as mp from datetime import datetime, timedelta @@ -23,6 +24,7 @@ import httpx import pandas as pd + from stactools.noaa_hrrr.constants import ( BYTE_SIZE, DESCRIPTION, diff --git a/src/stactools/noaa_hrrr/metadata.py b/src/stactools/noaa_hrrr/metadata.py index c083044..60723b9 100644 --- a/src/stactools/noaa_hrrr/metadata.py +++ b/src/stactools/noaa_hrrr/metadata.py @@ -8,6 +8,7 @@ from parse import Result, parse from rasterio.crs import CRS from rasterio.warp import transform_bounds + from stactools.noaa_hrrr.constants import ( EXTENDED_FORECAST_MAX_HOUR, STANDARD_FORECAST_MAX_HOUR, diff --git a/src/stactools/noaa_hrrr/stac.py b/src/stactools/noaa_hrrr/stac.py index 838ea4f..6b3e30c 100644 --- a/src/stactools/noaa_hrrr/stac.py +++ b/src/stactools/noaa_hrrr/stac.py @@ -1,17 +1,11 @@ import logging import multiprocessing as mp from datetime import datetime, timedelta -from typing import Union +from typing import Optional, Union import pandas as pd import pystac -from pystac import ( - Collection, - Extent, - Item, - SpatialExtent, - TemporalExtent, -) +from pystac import Collection, Extent, Item, SpatialExtent, TemporalExtent from pystac.catalog import CatalogType from pystac.extensions.datacube import ( DatacubeExtension, @@ -23,6 +17,7 @@ from pystac.extensions.item_assets import AssetDefinition, ItemAssetsExtension from pystac.item_collection import ItemCollection from pystac.provider import Provider, ProviderRole + from stactools.noaa_hrrr.constants import ( BYTE_SIZE, COLLECTION_ID_FORMAT, @@ -438,6 +433,7 @@ def create_item( cloud_provider: CloudProvider, reference_datetime: datetime, forecast_hour: int, + collection: Optional[Collection] = None, ) -> Item: """Creates a STAC item for a region x product x cloud provider x reference_datetime (cycle run hour) combination. @@ -487,6 +483,7 @@ def create_item( cloud_provider=cloud_provider, reference_datetime=reference_datetime, forecast_hour=forecast_hour, + collection=collection, ) @@ -497,6 +494,7 @@ def create_item_from_idx_df( cloud_provider: CloudProvider, reference_datetime: datetime, forecast_hour: int, + collection: Optional[Collection] = None, ) -> Item: """Creates a STAC item for a region x product x cloud provider x reference_datetime (cycle run hour) combination and a provided idx dataframe. @@ -550,6 +548,7 @@ def create_item_from_idx_df( geometry=region_config.geometry_4326, bbox=region_config.bbox_4326, datetime=forecast_datetime, + collection=collection, properties={ "forecast:reference_time": reference_datetime.strftime( "%Y-%m-%dT%H:%M:%SZ" @@ -613,11 +612,17 @@ def create_item_safe( cloud_provider: CloudProvider, reference_datetime: datetime, forecast_hour: int, + collection: Optional[Collection], ) -> Union[Item, None]: """Try to create an item and raise a warning if it fails""" try: return create_item( - region, product, cloud_provider, reference_datetime, forecast_hour + region, + product, + cloud_provider, + reference_datetime, + forecast_hour, + collection, ) except NotFoundError as e: logging.warning(e) @@ -630,6 +635,7 @@ def create_item_collection( cloud_provider: CloudProvider, start_date: datetime, end_date: datetime, + collection: Optional[Collection] = None, ) -> pystac.ItemCollection: """Create an item collection containing all items for a date range""" @@ -644,7 +650,14 @@ def create_item_collection( forecast_cycle_type = ForecastCycleType.from_timestamp(reference_datetime) for forecast_hour in forecast_cycle_type.generate_forecast_hours(): tasks.append( - (region, product, cloud_provider, reference_datetime, forecast_hour) + ( + region, + product, + cloud_provider, + reference_datetime, + forecast_hour, + collection, + ) ) reference_date += one_day diff --git a/tests/test_commands.py b/tests/test_commands.py index 1f5f2d2..050beb2 100644 --- a/tests/test_commands.py +++ b/tests/test_commands.py @@ -4,6 +4,7 @@ from click import Group from click.testing import CliRunner from pystac import Collection, Item + from stactools.noaa_hrrr.commands import create_noaahrrr_command from stactools.noaa_hrrr.metadata import ( CloudProvider, diff --git a/tests/test_inventory.py b/tests/test_inventory.py index a54003f..8809359 100644 --- a/tests/test_inventory.py +++ b/tests/test_inventory.py @@ -1,5 +1,6 @@ import pandas as pd import pytest + from stactools.noaa_hrrr.inventory import ( DESCRIPTION_COLS, INVENTORY_COLS, diff --git a/tests/test_stac.py b/tests/test_stac.py index b261ac9..a2a55d5 100644 --- a/tests/test_stac.py +++ b/tests/test_stac.py @@ -2,6 +2,7 @@ from datetime import datetime, timedelta import pytest + from stactools.noaa_hrrr import stac from stactools.noaa_hrrr.constants import ( COLLECTION_ID_FORMAT, @@ -81,6 +82,29 @@ def test_create_item( _ = json.dumps(item.to_dict()) +def test_create_item_with_collection() -> None: + region = Region.conus + product = Product.sfc + cloud_provider = CloudProvider.aws + item = stac.create_item( + region=region, + product=product, + cloud_provider=cloud_provider, + reference_datetime=datetime(year=2024, month=1, day=1, hour=6), + forecast_hour=12, + collection=stac.create_collection( + region=region, + product=product, + cloud_provider=cloud_provider, + ), + ) + assert item.collection_id == COLLECTION_ID_FORMAT.format( + region=region.value, + product=product.value, + cloud_provider=cloud_provider.value, + ) + + def test_create_item_collection() -> None: start_date = datetime(year=2024, month=5, day=1) item_collection = stac.create_item_collection(