diff --git a/entities_service/main.py b/entities_service/main.py index 31b3c75..54d2646 100644 --- a/entities_service/main.py +++ b/entities_service/main.py @@ -10,7 +10,11 @@ from fastapi import FastAPI, HTTPException, Path, status from entities_service import __version__ -from entities_service.models import Entity +from entities_service.models import ( + Entity, + EntityNameType, + EntityVersionType, +) from entities_service.service.backend import get_backend from entities_service.service.config import CONFIG from entities_service.service.logger import setup_logger @@ -55,19 +59,6 @@ async def lifespan(_: FastAPI): APP.include_router(router) -SEMVER_REGEX = ( - r"^(?P0|[1-9]\d*)(?:\.(?P0|[1-9]\d*))?(?:\.(?P0|[1-9]\d*))?" - r"(?:-(?P(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)" - r"(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?" - r"(?:\+(?P[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$" -) -"""Semantic Versioning regular expression. - -Slightly changed version of the one found at https://semver.org. -The changed bits pertain to `minor` and `patch`, which are now both optional. -""" - - @APP.get( "/{version}/{name}", response_model=Entity, @@ -75,28 +66,8 @@ async def lifespan(_: FastAPI): response_model_exclude_unset=True, ) async def get_entity( - version: Annotated[ - str, - Path( - title="Entity version", - pattern=SEMVER_REGEX, - description=( - "The version part must be a semantic version, following the schema " - "laid out by SemVer.org." - ), - ), - ], - name: Annotated[ - str, - Path( - title="Entity name", - pattern=r"(?i)^[A-Z]+$", - description=( - "The name part is without any white space. It is conventionally " - "written in PascalCase." - ), - ), - ], + version: Annotated[EntityVersionType, Path(title="Entity version")], + name: Annotated[EntityNameType, Path(title="Entity name")], ) -> dict[str, Any]: """Get an entity.""" uri = f"{str(CONFIG.base_url).rstrip('/')}/{version}/{name}" diff --git a/entities_service/models/__init__.py b/entities_service/models/__init__.py index 62b9802..028898e 100644 --- a/entities_service/models/__init__.py +++ b/entities_service/models/__init__.py @@ -8,7 +8,12 @@ from .dlite_soft5 import DLiteSOFT5Entity from .dlite_soft7 import DLiteSOFT7Entity -from .soft import URI_REGEX +from .soft import ( + SEMVER_REGEX, + URI_REGEX, + EntityNameType, + EntityVersionType, +) from .soft5 import SOFT5Entity from .soft7 import SOFT7Entity @@ -18,6 +23,19 @@ Entity = SOFT7Entity | SOFT5Entity | DLiteSOFT7Entity | DLiteSOFT5Entity EntityType = get_args(Entity) +__all__ = ( + "Entity", + "EntityType", + "soft_entity", + "get_uri", + "get_version", + "get_updated_version", + "URI_REGEX", + "SEMVER_REGEX", + "EntityNameType", + "EntityVersionType", +) + @overload def soft_entity( diff --git a/entities_service/models/soft.py b/entities_service/models/soft.py index 8cc47c8..2b5cbbc 100644 --- a/entities_service/models/soft.py +++ b/entities_service/models/soft.py @@ -5,25 +5,94 @@ import difflib import re from typing import Annotated, Any +from urllib.parse import quote from pydantic import ( AliasChoices, BaseModel, ConfigDict, Field, + TypeAdapter, + ValidationError, field_validator, model_validator, ) +from pydantic.functional_validators import AfterValidator from pydantic.networks import AnyHttpUrl from entities_service.service.config import CONFIG +SEMVER_REGEX = ( + r"(?P0|[1-9]\d*)(?:\.(?P0|[1-9]\d*))?(?:\.(?P0|[1-9]\d*))?" + r"(?:-(?P(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)" + r"(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?" + r"(?:\+(?P[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?" +) +"""Semantic Versioning regular expression. + +Slightly changed version of the one found at https://semver.org. +The changed bits pertain to `minor` and `patch`, which are now both optional. +""" + +NO_GROUPS_SEMVER_REGEX = ( + r"(?:0|[1-9]\d*)(?:\.(?:0|[1-9]\d*))?(?:\.(?:0|[1-9]\d*))?" + r"(?:-(?:(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)" + r"(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?" + r"(?:\+(?:[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?" +) +"""Semantic Versioning regular expression. + +Slightly changed version of the one found at https://semver.org. +The changed bits pertain to `minor` and `patch`, which are now both optional. + +This is the same as `SEMVER_REGEX`, but without the named groups. +""" + URI_REGEX = re.compile( - r"^(?Phttps?://.+)/(?P\d(?:\.\d+){0,2})/(?P[^/#?]+)$" + rf"^(?Phttps?://.+)/(?P{NO_GROUPS_SEMVER_REGEX})/(?P[^/#?]+)$" ) """Regular expression to parse a SOFT entity URI.""" +def _disallowed_characters(value: str) -> str: + """Check that the value does not contain disallowed characters.""" + special_url_characters = ["/", "?", "#", "@", ":"] + if any(char in value for char in special_url_characters): + raise ValueError( + f"The value must not contain any of {special_url_characters} characters." + ) + if " " in value: + raise ValueError("The value must not contain any spaces.") + return value + + +def _ensure_url_encodeable(value: str) -> str: + """Ensure that the value is URL encodeable.""" + try: + quote(value) + except Exception as error: # noqa: BLE001 + raise ValueError(f"The value is not URL encodeable: {error}") from error + return value + + +EntityVersionType = Annotated[ + str, + Field( + description=( + "The version of the entity. It must be a semantic version, following the " + "schema laid out by SemVer.org." + ), + pattern=rf"^{SEMVER_REGEX}$", + ), +] +EntityNameType = Annotated[ + str, + Field(description="The name of the entity."), + AfterValidator(_disallowed_characters), + AfterValidator(_ensure_url_encodeable), +] + + class SOFTProperty(BaseModel): """The minimum set of defining metadata for a SOFT Entity's property.""" @@ -61,10 +130,8 @@ class SOFTEntity(BaseModel): model_config = ConfigDict(extra="forbid") - name: Annotated[str | None, Field(description="The name of the entity.")] = None - version: Annotated[str | None, Field(description="The version of the entity.")] = ( - None - ) + name: EntityNameType | None = None + version: EntityVersionType | None = None namespace: Annotated[ AnyHttpUrl | None, Field(description="The namespace of the entity.") ] = None @@ -86,7 +153,7 @@ def _validate_base_url(cls, value: AnyHttpUrl) -> AnyHttpUrl: service.""" if not str(value).startswith(str(CONFIG.base_url)): error_message = ( - "This service only works with SOFT entities at " f"{CONFIG.base_url}.\n" + f"This service only works with SOFT entities at {CONFIG.base_url}.\n" ) raise ValueError(error_message) return value @@ -94,13 +161,31 @@ def _validate_base_url(cls, value: AnyHttpUrl) -> AnyHttpUrl: @field_validator("uri", mode="after") @classmethod def _validate_uri(cls, value: AnyHttpUrl) -> AnyHttpUrl: - """Validate `uri` is consistent with `name`, `version`, and `namespace`.""" - if URI_REGEX.match(str(value)) is None: + """Validate all parts of the `uri`.""" + try: + uri_deconstructed = URI_REGEX.match(str(value)) + except Exception as error: # noqa: BLE001 + error_message = f"The URI is invalid: {error}\n" + raise ValueError(error_message) from error + + if uri_deconstructed is None: + # The URI does not match the expected pattern. + # This will validate that the namespace starts with 'http(s)://' and the + # version is a semantic version. error_message = ( - "The 'uri' is not a valid SOFT7 entity URI. It must be of the form " - f"{str(CONFIG.base_url).rstrip('/')}/{{version}}/{{name}}.\n" + "The URI does not match the expected pattern. The URI must be of the " + "form `{namespace}/{version}/{name}`, where the 'version' must adhere " + "to the SemVer specification.\n" ) raise ValueError(error_message) + + try: + # This validates the name part of the URI. + TypeAdapter(EntityNameType).validate_python(uri_deconstructed.group("name")) + except (ValueError, ValidationError) as error: + error_message = f"The name part of the URI is invalid: {error}\n" + raise ValueError(error_message) from error + return value @model_validator(mode="before")