Skip to content

Commit

Permalink
[NHUB-540] Add async ResourceModel for ContentAPI items (superdesk#2740)
Browse files Browse the repository at this point in the history
* Add async resource for contentapi items

* Add projection arg to service find_one funcs, and other improvements

* Support `include_in_parent` elastic mapping config

* New Pydantic BaseModel (for non-resource usage)

* Elastic cursor: Support async next function. Also reset elastic index once looping completed

* Elastic fields: Support more elastic mapping configs

* Resource service: Fix cache issue with thread locks

* Add comment: Improve validators later

* Improve types module/file layout

* Add is_json_request to Request instance

* Add GUID_FIELD to resource fields

* Fix tests

* fix lint issues
  • Loading branch information
MarkLark86 authored Oct 30, 2024
1 parent 57ba31f commit beb6af3
Show file tree
Hide file tree
Showing 27 changed files with 1,104 additions and 384 deletions.
4 changes: 4 additions & 0 deletions content_api/app/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@
"content_api.api_audit",
]

MODULES = [
"content_api.items.module",
]

CONTENTAPI_DOMAIN = {}

# NOTE: no trailing slash for the CONTENTAPI_URL setting!
Expand Down
7 changes: 7 additions & 0 deletions content_api/items/async_service.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from superdesk.core.resources import AsyncResourceService

from .model import ContentAPIItem


class ContentAPIItemService(AsyncResourceService[ContentAPIItem]):
pass
180 changes: 180 additions & 0 deletions content_api/items/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
from typing import Annotated, Any
from datetime import datetime
from enum import Enum, unique

from pydantic import Field, field_validator

from superdesk.core.resources import ResourceModel, dataclass, fields, validators, ModelWithVersions


ContentAssociation = Annotated[
dict[str, Any],
fields.elastic_mapping(
{
"dynamic": False,
"properties": {
"featuremedia": {
"dynamic": False,
"properties": {
"_id": {"type": "keyword"},
"guid": {"type": "keyword"},
"unique_id": {"type": "integer"},
},
},
},
}
),
]


@dataclass
class CVItem:
qcode: fields.Keyword
name: fields.Keyword
schema: fields.Keyword | None = None


@dataclass
class CVItemWithCode:
code: fields.Keyword
name: fields.Keyword
schema: fields.Keyword | None = None
scheme: fields.Keyword | None = None


@dataclass
class Place:
scheme: fields.Keyword | None = None
qcode: fields.Keyword | None = None
code: fields.Keyword | None = None
name: fields.Keyword | None = None
locality: fields.Keyword | None = None
state: fields.Keyword | None = None
country: fields.Keyword | None = None
world_region: fields.Keyword | None = None
locality_code: fields.Keyword | None = None
state_code: fields.Keyword | None = None
country_code: fields.Keyword | None = None
world_region_code: fields.Keyword | None = None
feature_class: fields.Keyword | None = None
location: fields.Geopoint | None = None
rel: fields.Keyword | None = None


@dataclass
class Annotation:
id: int
type: fields.Keyword
body: fields.Keyword


@dataclass
class ContentAuthor:
uri: fields.Keyword | None = None
parent: fields.Keyword | None = None
name: fields.TextWithKeyword | None = None
role: fields.Keyword | None = None
jobtitle: dict | None = None
sub_label: fields.TextWithKeyword | None = None
biography: str | None = None
code: fields.Keyword | None = None


@dataclass
class ContentReference:
id: Annotated[fields.Keyword, Field(alias="_id")]
key: fields.Keyword | None = None
uri: fields.Keyword | None = None
guid: fields.Keyword | None = None
type: fields.Keyword | None = None
source: fields.Keyword | None = None


@unique
class PubStatusType(str, Enum):
USABLE = "usable"
WITHHELD = "withheld"
CANCELLED = "canceled"


@unique
class ContentType(str, Enum):
TEXT = "text"
PREFORMATTED = "preformatted"
AUDIO = "audio"
VIDEO = "video"
PICTURE = "picture"
GRAPHIC = "graphic"
COMPOSITE = "composite"


class ContentAPIItem(ResourceModel, ModelWithVersions):
id: Annotated[str, Field(alias="_id")]
associations: ContentAssociation | None = None
anpa_category: list[CVItem] = Field(default_factory=list)
body_html: fields.HTML | None = None
body_text: str | None = None
byline: str | None = None
copyrightnotice: Annotated[str | None, fields.not_indexed()] = None
copyrightholder: str | None = None
description_html: str | None = None
description_text: str | None = None
headline: fields.HTML | None = None
language: fields.Keyword | None = None
located: str | None = None
mimetype: fields.Keyword | None = None
organization: list[dict] = Field(default_factory=list)
person: list[dict] = Field(default_factory=list)
place: list[Place] = Field(default_factory=list)
profile: str | None = None
pubstatus: Annotated[PubStatusType | None, fields.keyword_mapping()] = None
renditions: dict | None = None
service: list[CVItemWithCode] = Field(default_factory=list)
slugline: str | None = None
source: fields.Keyword | None = None
subject: list[CVItemWithCode] = Field(default_factory=list)
keywords: list[fields.HTML] = Field(default_factory=list)
anpa_take_key: str | None = None

content_type: Annotated[ContentType, fields.keyword_mapping(), Field(alias="type")] = ContentType.TEXT

urgency: int | None = None
priority: int | None = None
uri: Annotated[fields.Keyword | None, validators.validate_iunique_value_async("items", "uri")] = None
usageterms: str | None = None
version: str | None = None
versioncreated: datetime = Field(default_factory=datetime.now)
firstcreated: datetime = Field(default_factory=datetime.now)
firstpublished: datetime = Field(default_factory=datetime.now)
embargoed: datetime | None = None
evolvedfrom: fields.Keyword | None = None
nextversion: fields.Keyword | None = None
original_id: fields.Keyword | None = None
subscribers: Annotated[list[fields.Keyword], fields.keyword_mapping(), Field(default_factory=list)]
ednote: str | None = None
signal: list[CVItemWithCode] = Field(default_factory=list)
genre: list[CVItemWithCode] = Field(default_factory=list)
ancestors: Annotated[list[fields.Keyword], fields.keyword_mapping(), Field(default_factory=list)]
attachments: list[dict] = Field(default_factory=list)
annotations: list[Annotation] = Field(default_factory=list)

extra: dict | None = None
extra_items: dict | None = None
authors: list[ContentAuthor] = Field(default_factory=list)
wordcount: int | None = None
charcount: int | None = None
readtime: int | None = None

# These are for linking to Planning module resources
event_id: fields.Keyword | None = None
planning_id: fields.Keyword | None = None
coverage_id: fields.Keyword | None = None
agenda_id: fields.Keyword | None = None
agenda_href: fields.Keyword | None = None

refs: list[ContentReference] = Field(default_factory=list)
expiry: datetime = Field(default_factory=datetime.now)

@field_validator("version", mode="before")
def parse_version(cls, value: int | str | None) -> str | None:
return str(value) if value is not None else None
44 changes: 44 additions & 0 deletions content_api/items/module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from superdesk.core.module import Module
from superdesk.core.resources import (
ResourceConfig,
MongoResourceConfig,
MongoIndexOptions,
ElasticResourceConfig,
)
from content_api import MONGO_PREFIX, ELASTIC_PREFIX

from .model import ContentAPIItem
from .async_service import ContentAPIItemService


content_api_item_resource_config = ResourceConfig(
name="items",
data_class=ContentAPIItem,
service=ContentAPIItemService,
default_sort=[("versioncreated", -1)],
versioning=True,
mongo=MongoResourceConfig(
prefix=MONGO_PREFIX,
indexes=[
MongoIndexOptions(
name="_ancestors_",
keys=[("ancestors", 1)],
),
MongoIndexOptions(
name="expiry_1",
keys=[("expiry", 1)],
),
],
),
elastic=ElasticResourceConfig(
prefix=ELASTIC_PREFIX,
filter={"bool": {"must_not": {"term": {"type": "composite"}}}},
),
# TODO-ASYNC: Implement the GET & Search endpoints for this resource
)


module = Module(
"content_api.items",
resources=[content_api_item_resource_config],
)
103 changes: 103 additions & 0 deletions content_api/publish/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
from superdesk import get_resource_service
from superdesk.resource_fields import ID_FIELD, GUID_FIELD

from content_api.items.model import ContentAPIItem, PubStatusType
from content_api.items.async_service import ContentAPIItemService


async def publish_docs_to_content_api(docs: list[dict]) -> list[str]:
ids = []
for doc in docs:
item_id = doc.pop(GUID_FIELD)
doc[ID_FIELD] = item_id
ids.append(await publish_doc_to_content_api(doc))
return ids


async def publish_doc_to_content_api(item_dict: dict) -> str:
item = ContentAPIItem.from_dict(item_dict)
service = ContentAPIItemService()

original = await service.find_by_id(item.id)
if original:
item.subscribers = list(set(original.subscribers or []) | set(item.subscribers or []))

process_associations(item, original)
create_version_doc(item)

if original:
await service.update(original.id, item.to_dict(context={"use_objectid": True}))
return original.id
else:
return (await service.create([item]))[0]


def process_associations(updates: ContentAPIItem, original: ContentAPIItem | None) -> None:
"""Update associations using existing published item and ensure that associated item subscribers
are equal or subset of the parent subscribers.
:param updates:
:param original:
:return:
"""

subscribers = updates.subscribers or []
for assoc, update_assoc in (updates.associations or {}).items():
if not update_assoc:
continue

if original:
original_assoc = (original.associations or {}).get(assoc)

if original_assoc:
if original_assoc.get(ID_FIELD) == update_assoc.get(ID_FIELD):
update_assoc["subscribers"] = list(
set(original_assoc.get("subscribers") or []) | set(update_assoc.get("subscribers") or [])
)

if original_assoc.get("renditions"):
update_assoc.setdefault("renditions", {})
for rend in original_assoc["renditions"]:
update_assoc["renditions"].setdefault(rend, None)

update_assoc["subscribers"] = list(set(update_assoc["subscribers"]) & set(subscribers))

# remove associations which were there previously
# but are missing now
if original and original.associations:
if not updates.associations:
updates.associations = {}
for assoc in original.associations:
updates.associations.setdefault(assoc, None)

# If there are no associations anymore, then set the entire associations field to None
if updates.associations is not None and not any([assoc for assoc in updates.associations.values()]):
updates.associations = None


# TODO-ASYNC: Use new versioning system
def create_version_doc(item: ContentAPIItem) -> None:
"""
Store the item in the item version collection
:param item:
:return:
"""
version_item = item.to_dict(context={"use_objectid": True})
version_item["_id_document"] = version_item.pop("_id")
get_resource_service("items_versions").create([version_item])
# if the update is a cancel we need to cancel all versions
if item.pubstatus == PubStatusType.CANCELLED:
_cancel_versions(item.id)


# TODO-ASYNC: Use new versioning system
def _cancel_versions(doc_id: str) -> None:
"""
Given an id of a document set the pubstatus to canceled for all versions
:param doc_id:
:return:
"""
query = {"_id_document": doc_id}
update = {"pubstatus": "canceled"}
for item in get_resource_service("items_versions").get_from_mongo(req=None, lookup=query):
if item.get("pubstatus") != "canceled":
get_resource_service("items_versions").update(item["_id"], update, item)
Loading

0 comments on commit beb6af3

Please sign in to comment.