Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correctly set default values, fix start/end/datetime field validators #135

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions .github/workflows/cicd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v4
Expand All @@ -27,11 +27,14 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install tox pre-commit
python -m pip install '.[lint]'
pre-commit install

# Run tox using the version of Python in `PATH`
- name: Run Tox
- name: Lint
run: pre-commit run --all

# Run tox using the version of Python in `PATH`
- name: Test
run: tox -e py

- name: Upload Results
Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
- Require `type` property to be set for Catalog and Collections
- Fix validator for Item `datetime` and Common MetaData `start_datetime` and `end_datetime`
- Include `datetime` and `license` to Common MetaData
- Make sure default values for required but unset fields are correctly parsed
- Add support from Python 3.12
- Lint all files
- Increase test coverage

3.0.0 (2024-01-25)
------------------
- Support pydantic>2.0 (@huard)
Expand Down
6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,14 @@ classifiers=[
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"License :: OSI Approved :: MIT License",
]
keywords=["stac", "pydantic", "validation"]
authors=[{ name = "Arturo Engineering", email = "[email protected]"}]
license= { text = "MIT" }
requires-python=">=3.8"
dependencies = ["click>=8.1.7", "pydantic>=2.4.1", "geojson-pydantic>=1.0.0", "ciso8601~=2.3"]
dependencies = ["click>=8.1.7", "pydantic>=2.4.1", "geojson-pydantic>=1.0.0", "ciso8601~=2.3","python-dateutil>=2.7.0"]
dynamic = ["version", "readme"]

[project.scripts]
Expand All @@ -42,11 +43,12 @@ dev = ["arrow>=1.2.3",
lint = ["types-requests>=2.31.0.5",
"types-jsonschema>=4.19.0.3",
"types-PyYAML>=6.0.12.12",
"types-python-dateutil>=2.7.0",
"black>=23.9.1",
"isort>=5.12.0",
"flake8>=6.1.0",
"Flake8-pyproject>=1.2.3",
"mypy>=1.5.1",
"mypy==1.4.1",
"pre-commit>=3.4.0",
"tox>=4.11.3"]

Expand Down
82 changes: 43 additions & 39 deletions stac_pydantic/api/search.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from datetime import datetime as dt
from typing import Any, Dict, List, Optional, Tuple, Union, cast

from ciso8601 import parse_rfc3339
from geojson_pydantic.geometries import ( # type: ignore
from geojson_pydantic.geometries import (
GeometryCollection,
LineString,
MultiLineString,
Expand All @@ -11,12 +10,12 @@
Point,
Polygon,
)
from pydantic import BaseModel, Field, field_validator, model_validator
from pydantic import BaseModel, Field, TypeAdapter, field_validator, model_validator

from stac_pydantic.api.extensions.fields import FieldsExtension
from stac_pydantic.api.extensions.query import Operator
from stac_pydantic.api.extensions.sort import SortExtension
from stac_pydantic.shared import BBox
from stac_pydantic.shared import BBox, UtcDatetime

Intersection = Union[
Point,
Expand All @@ -28,6 +27,8 @@
GeometryCollection,
]

SearchDatetime = TypeAdapter(Optional[UtcDatetime])


class Search(BaseModel):
"""
Expand All @@ -43,23 +44,18 @@ class Search(BaseModel):
datetime: Optional[str] = None
limit: int = 10

# Private properties to store the parsed datetime values. Not part of the model schema.
_start_date: Optional[dt] = None
_end_date: Optional[dt] = None

# Properties to return the private values
@property
def start_date(self) -> Optional[dt]:
values = (self.datetime or "").split("/")
if len(values) == 1:
return None
if values[0] == ".." or values[0] == "":
return None
return parse_rfc3339(values[0])
return self._start_date

@property
def end_date(self) -> Optional[dt]:
values = (self.datetime or "").split("/")
if len(values) == 1:
return parse_rfc3339(values[0])
if values[1] == ".." or values[1] == "":
return None
return parse_rfc3339(values[1])
return self._end_date

# Check https://docs.pydantic.dev/dev-v2/migration/#changes-to-validators for more information.
@model_validator(mode="before")
Expand Down Expand Up @@ -102,30 +98,38 @@ def validate_bbox(cls, v: BBox) -> BBox:

@field_validator("datetime")
@classmethod
def validate_datetime(cls, v: str) -> str:
if "/" in v:
values = v.split("/")
else:
# Single date is interpreted as end date
values = ["..", v]

dates: List[dt] = []
for value in values:
if value == ".." or value == "":
continue

dates.append(parse_rfc3339(value))

def validate_datetime(cls, value: str) -> str:
# Split on "/" and replace no value or ".." with None
values = [v if v and v != ".." else None for v in value.split("/")]
# If there are more than 2 dates, it's invalid
if len(values) > 2:
raise ValueError("Invalid datetime range, must match format (begin_date, end_date)")

if not {"..", ""}.intersection(set(values)):
if dates[0] > dates[1]:
raise ValueError(
"Invalid datetime range, must match format (begin_date, end_date)"
)

return v
raise ValueError(
"Invalid datetime range. Too many values. Must match format: {begin_date}/{end_date}"
)
# If there is only one date, insert a None for the start date
if len(values) == 1:
values.insert(0, None)
Comment on lines +109 to +111
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ran into this MR after I created #145. I see that the above matches the current behavior, nice.

But I also think the start and end should be the same if a single value is given. (Otherwise one cannot tell the difference between a single value, and a open-ended range, without looking at the original datetime again.) So I guess this could be changed to:

# If there is only one date, duplicate to use for both start and end dates
if len(values) == 1:
    values.insert(0, values[0])

Or maybe easier to read then:

    values.append(values[0])

# Cast because pylance gets confused by the type adapter and annotated type
dates = cast(
List[Optional[dt]],
[
# Use the type adapter to validate the datetime strings, strict is necessary
# due to pydantic issues #8736 and #8762
SearchDatetime.validate_strings(v, strict=True) if v else None
for v in values
],
)
# If there is a start and end date, check that the start date is before the end date
if dates[0] and dates[1] and dates[0] > dates[1]:
raise ValueError(
"Invalid datetime range. Begin date after end date. "
"Must match format: {begin_date}/{end_date}"
)
# Store the parsed dates
cls._start_date = dates[0]
cls._end_date = dates[1]
# Return the original string value
return value

@property
def spatial_filter(self) -> Optional[Intersection]:
Expand Down
22 changes: 17 additions & 5 deletions stac_pydantic/catalog.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import List, Literal, Optional
from typing import Any, List, Literal, Optional

from pydantic import AnyUrl, ConfigDict, Field
from pydantic import AnyUrl, ConfigDict, Field, model_validator

from stac_pydantic.links import Links
from stac_pydantic.shared import SEMVER_REGEX, StacBaseModel
Expand All @@ -14,13 +14,25 @@ class _Catalog(StacBaseModel):

id: str = Field(..., alias="id", min_length=1)
description: str = Field(..., alias="description", min_length=1)
stac_version: str = Field(STAC_VERSION, pattern=SEMVER_REGEX)
stac_version: str = Field(..., pattern=SEMVER_REGEX)
links: Links
stac_extensions: Optional[List[AnyUrl]] = []
stac_extensions: Optional[List[AnyUrl]] = None
title: Optional[str] = None
type: str
model_config = ConfigDict(use_enum_values=True, extra="allow")

@model_validator(mode="before")
@classmethod
def set_default_links(cls, data: Any) -> Any:
"""Make sure default values are properly set,
so that they are always present in the output JSON."""
if isinstance(data, dict):
if data.get("links") is None:
data["links"] = []
if data.get("stac_version") is None:
data["stac_version"] = STAC_VERSION
return data


class Catalog(_Catalog):
type: Literal["Catalog"] = "Catalog"
type: Literal["Catalog"]
2 changes: 1 addition & 1 deletion stac_pydantic/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,4 @@ class Collection(_Catalog):
keywords: Optional[List[str]] = None
providers: Optional[List[Provider]] = None
summaries: Optional[Dict[str, Union[Range, List[Any], Dict[str, Any]]]] = None
type: Literal["Collection"] = "Collection"
type: Literal["Collection"]
84 changes: 34 additions & 50 deletions stac_pydantic/item.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,11 @@
from datetime import datetime as dt
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Optional

from ciso8601 import parse_rfc3339
from geojson_pydantic import Feature
from pydantic import (
AnyUrl,
ConfigDict,
Field,
field_serializer,
model_serializer,
model_validator,
)
from pydantic import AnyUrl, ConfigDict, Field, model_serializer, model_validator
from typing_extensions import Annotated

from stac_pydantic.links import Links
from stac_pydantic.shared import (
DATETIME_RFC339,
SEMVER_REGEX,
Asset,
StacBaseModel,
StacCommonMetadata,
)
from stac_pydantic.shared import SEMVER_REGEX, Asset, StacBaseModel, StacCommonMetadata
from stac_pydantic.version import STAC_VERSION


Expand All @@ -28,60 +14,58 @@ class ItemProperties(StacCommonMetadata):
https://github.com/radiantearth/stac-spec/blob/v1.0.0/item-spec/item-spec.md#properties-object
"""

datetime: Union[dt, str] = Field(..., alias="datetime")

# Check https://docs.pydantic.dev/dev-v2/migration/#changes-to-config for more information.
model_config = ConfigDict(extra="allow")

@model_validator(mode="before")
@classmethod
def validate_datetime(cls, data: Dict[str, Any]) -> Dict[str, Any]:
datetime = data.get("datetime")
start_datetime = data.get("start_datetime")
end_datetime = data.get("end_datetime")

if not datetime or datetime == "null":
if not start_datetime and not end_datetime:
raise ValueError(
"start_datetime and end_datetime must be specified when datetime is null"
)

if isinstance(datetime, str):
data["datetime"] = parse_rfc3339(datetime)

if isinstance(start_datetime, str):
data["start_datetime"] = parse_rfc3339(start_datetime)

if isinstance(end_datetime, str):
data["end_datetime"] = parse_rfc3339(end_datetime)
def validate_datetime(cls, data: Any) -> Any:
if isinstance(data, dict):

datetime = data.get("datetime")
start_datetime = data.get("start_datetime")
end_datetime = data.get("end_datetime")

if datetime is None or datetime == "null":
if not start_datetime and not end_datetime:
raise ValueError(
"start_datetime and end_datetime must be specified when datetime is null"
)
# Make sure datetime is properly set to None
# so that it is not present in the output JSON.
data["datetime"] = None

return data

@field_serializer("datetime")
def serialize_datetime(self, v: dt, _info: Any) -> str:
return v.strftime(DATETIME_RFC339)


class Item(Feature, StacBaseModel):
"""
https://github.com/radiantearth/stac-spec/blob/v1.0.0/item-spec/item-spec.md
"""

id: str = Field(..., alias="id", min_length=1)
stac_version: str = Field(STAC_VERSION, pattern=SEMVER_REGEX)
id: Annotated[str, Field(min_length=1)]
stac_version: Annotated[str, Field(pattern=SEMVER_REGEX)]
properties: ItemProperties
assets: Dict[str, Asset]
links: Links
stac_extensions: Optional[List[AnyUrl]] = []
stac_extensions: Optional[List[AnyUrl]] = None
collection: Optional[str] = None

@model_validator(mode="before")
@classmethod
def validate_bbox(cls, values: Dict[str, Any]) -> Dict[str, Any]:
if isinstance(values, dict):
if values.get("geometry") and values.get("bbox") is None:
def validate_defaults(cls, data: Any) -> Any:
"""Make sure default values are properly set,
so that they are always present in the output JSON."""
if isinstance(data, dict):
if data.get("geometry") and data.get("bbox") is None:
raise ValueError("bbox is required if geometry is not null")
return values
if data.get("stac_version") is None:
data["stac_version"] = STAC_VERSION
if data.get("assets") is None:
data["assets"] = {}
if data.get("links") is None:
data["links"] = []
return data

# https://github.com/developmentseed/geojson-pydantic/issues/147
@model_serializer(mode="wrap")
Expand Down
Loading
Loading