Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unify entity version and name validation #91

Merged
merged 7 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 7 additions & 36 deletions entities_service/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
from fastapi import FastAPI, HTTPException, Path, status

from entities_service import __version__
from entities_service.models import Entity
from entities_service.models import (
Entity,
EntityNameType,
EntityVersionType,
)
from entities_service.service.backend import get_backend
from entities_service.service.config import CONFIG
from entities_service.service.logger import setup_logger
Expand Down Expand Up @@ -55,48 +59,15 @@ async def lifespan(_: FastAPI):
APP.include_router(router)


SEMVER_REGEX = (
r"^(?P<major>0|[1-9]\d*)(?:\.(?P<minor>0|[1-9]\d*))?(?:\.(?P<patch>0|[1-9]\d*))?"
r"(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)"
r"(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?"
r"(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$"
)
"""Semantic Versioning regular expression.

Slightly changed version of the one found at https://semver.org.
The changed bits pertain to `minor` and `patch`, which are now both optional.
"""


@APP.get(
"/{version}/{name}",
response_model=Entity,
response_model_by_alias=True,
response_model_exclude_unset=True,
)
async def get_entity(
version: Annotated[
str,
Path(
title="Entity version",
pattern=SEMVER_REGEX,
description=(
"The version part must be a semantic version, following the schema "
"laid out by SemVer.org."
),
),
],
name: Annotated[
str,
Path(
title="Entity name",
pattern=r"(?i)^[A-Z]+$",
description=(
"The name part is without any white space. It is conventionally "
"written in PascalCase."
),
),
],
version: Annotated[EntityVersionType, Path(title="Entity version")],
name: Annotated[EntityNameType, Path(title="Entity name")],
) -> dict[str, Any]:
"""Get an entity."""
uri = f"{str(CONFIG.base_url).rstrip('/')}/{version}/{name}"
Expand Down
20 changes: 19 additions & 1 deletion entities_service/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@

from .dlite_soft5 import DLiteSOFT5Entity
from .dlite_soft7 import DLiteSOFT7Entity
from .soft import URI_REGEX
from .soft import (
SEMVER_REGEX,
URI_REGEX,
EntityNameType,
EntityVersionType,
)
from .soft5 import SOFT5Entity
from .soft7 import SOFT7Entity

Expand All @@ -18,6 +23,19 @@
Entity = SOFT7Entity | SOFT5Entity | DLiteSOFT7Entity | DLiteSOFT5Entity
EntityType = get_args(Entity)

__all__ = (
"Entity",
"EntityType",
"soft_entity",
"get_uri",
"get_version",
"get_updated_version",
"URI_REGEX",
"SEMVER_REGEX",
"EntityNameType",
"EntityVersionType",
)


@overload
def soft_entity(
Expand Down
105 changes: 95 additions & 10 deletions entities_service/models/soft.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,94 @@
import difflib
import re
from typing import Annotated, Any
from urllib.parse import quote

from pydantic import (
AliasChoices,
BaseModel,
ConfigDict,
Field,
TypeAdapter,
ValidationError,
field_validator,
model_validator,
)
from pydantic.functional_validators import AfterValidator
from pydantic.networks import AnyHttpUrl

from entities_service.service.config import CONFIG

SEMVER_REGEX = (
r"(?P<major>0|[1-9]\d*)(?:\.(?P<minor>0|[1-9]\d*))?(?:\.(?P<patch>0|[1-9]\d*))?"
r"(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)"
r"(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?"
r"(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?"
)
"""Semantic Versioning regular expression.

Slightly changed version of the one found at https://semver.org.
The changed bits pertain to `minor` and `patch`, which are now both optional.
"""

NO_GROUPS_SEMVER_REGEX = (
r"(?:0|[1-9]\d*)(?:\.(?:0|[1-9]\d*))?(?:\.(?:0|[1-9]\d*))?"
r"(?:-(?:(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)"
r"(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?"
r"(?:\+(?:[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?"
)
"""Semantic Versioning regular expression.

Slightly changed version of the one found at https://semver.org.
The changed bits pertain to `minor` and `patch`, which are now both optional.

This is the same as `SEMVER_REGEX`, but without the named groups.
"""

URI_REGEX = re.compile(
r"^(?P<namespace>https?://.+)/(?P<version>\d(?:\.\d+){0,2})/(?P<name>[^/#?]+)$"
rf"^(?P<namespace>https?://.+)/(?P<version>{NO_GROUPS_SEMVER_REGEX})/(?P<name>[^/#?]+)$"
)
"""Regular expression to parse a SOFT entity URI."""


def _disallowed_characters(value: str) -> str:
"""Check that the value does not contain disallowed characters."""
special_url_characters = ["/", "?", "#", "@", ":"]
if any(char in value for char in special_url_characters):
raise ValueError(

Check warning on line 61 in entities_service/models/soft.py

View check run for this annotation

Codecov / codecov/patch

entities_service/models/soft.py#L61

Added line #L61 was not covered by tests
f"The value must not contain any of {special_url_characters} characters."
)
if " " in value:
raise ValueError("The value must not contain any spaces.")

Check warning on line 65 in entities_service/models/soft.py

View check run for this annotation

Codecov / codecov/patch

entities_service/models/soft.py#L65

Added line #L65 was not covered by tests
return value


def _ensure_url_encodeable(value: str) -> str:
"""Ensure that the value is URL encodeable."""
try:
quote(value)
except Exception as error: # noqa: BLE001
raise ValueError(f"The value is not URL encodeable: {error}") from error

Check warning on line 74 in entities_service/models/soft.py

View check run for this annotation

Codecov / codecov/patch

entities_service/models/soft.py#L73-L74

Added lines #L73 - L74 were not covered by tests
return value


EntityVersionType = Annotated[
str,
Field(
description=(
"The version of the entity. It must be a semantic version, following the "
"schema laid out by SemVer.org."
),
pattern=rf"^{SEMVER_REGEX}$",
),
]
EntityNameType = Annotated[
str,
Field(description="The name of the entity."),
AfterValidator(_disallowed_characters),
AfterValidator(_ensure_url_encodeable),
]


class SOFTProperty(BaseModel):
"""The minimum set of defining metadata for a SOFT Entity's property."""

Expand Down Expand Up @@ -61,10 +130,8 @@

model_config = ConfigDict(extra="forbid")

name: Annotated[str | None, Field(description="The name of the entity.")] = None
version: Annotated[str | None, Field(description="The version of the entity.")] = (
None
)
name: EntityNameType | None = None
version: EntityVersionType | None = None
namespace: Annotated[
AnyHttpUrl | None, Field(description="The namespace of the entity.")
] = None
Expand All @@ -86,21 +153,39 @@
service."""
if not str(value).startswith(str(CONFIG.base_url)):
error_message = (
"This service only works with SOFT entities at " f"{CONFIG.base_url}.\n"
f"This service only works with SOFT entities at {CONFIG.base_url}.\n"
)
raise ValueError(error_message)
return value

@field_validator("uri", mode="after")
@classmethod
def _validate_uri(cls, value: AnyHttpUrl) -> AnyHttpUrl:
"""Validate `uri` is consistent with `name`, `version`, and `namespace`."""
if URI_REGEX.match(str(value)) is None:
"""Validate all parts of the `uri`."""
try:
uri_deconstructed = URI_REGEX.match(str(value))
except Exception as error: # noqa: BLE001
error_message = f"The URI is invalid: {error}\n"
raise ValueError(error_message) from error

Check warning on line 169 in entities_service/models/soft.py

View check run for this annotation

Codecov / codecov/patch

entities_service/models/soft.py#L167-L169

Added lines #L167 - L169 were not covered by tests

if uri_deconstructed is None:
# The URI does not match the expected pattern.
# This will validate that the namespace starts with 'http(s)://' and the
# version is a semantic version.
error_message = (
"The 'uri' is not a valid SOFT7 entity URI. It must be of the form "
f"{str(CONFIG.base_url).rstrip('/')}/{{version}}/{{name}}.\n"
"The URI does not match the expected pattern. The URI must be of the "
"form `{namespace}/{version}/{name}`, where the 'version' must adhere "
"to the SemVer specification.\n"
)
raise ValueError(error_message)

try:
# This validates the name part of the URI.
TypeAdapter(EntityNameType).validate_python(uri_deconstructed.group("name"))
except (ValueError, ValidationError) as error:
error_message = f"The name part of the URI is invalid: {error}\n"
raise ValueError(error_message) from error

Check warning on line 187 in entities_service/models/soft.py

View check run for this annotation

Codecov / codecov/patch

entities_service/models/soft.py#L185-L187

Added lines #L185 - L187 were not covered by tests

return value

@model_validator(mode="before")
Expand Down
Loading