Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle custom attributes received in the API response. #213

Merged
merged 13 commits into from
Sep 19, 2024
24 changes: 21 additions & 3 deletions scrapy_zyte_api/_annotations.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from enum import Enum
from typing import Iterable, List, Optional, TypedDict
from typing import Any, Dict, FrozenSet, Iterable, List, Optional, Tuple, TypedDict


class ExtractFrom(str, Enum):
Expand Down Expand Up @@ -56,7 +56,7 @@
error: Optional[str]


def make_hashable(obj):
def make_hashable(obj: Any) -> Any:
if isinstance(obj, (tuple, list)):
return tuple((make_hashable(e) for e in obj))

Expand All @@ -66,7 +66,25 @@
return obj


def actions(value: Iterable[Action]):
def _from_hashable(obj: Any) -> Any:
if isinstance(obj, tuple):
return [_from_hashable(o) for o in obj]

if isinstance(obj, frozenset):
return {_from_hashable(k): _from_hashable(v) for k, v in obj}

return obj

Check warning on line 76 in scrapy_zyte_api/_annotations.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/_annotations.py#L76

Added line #L76 was not covered by tests


def actions(value: Iterable[Action]) -> Tuple[Any, ...]:
"""Convert an iterable of :class:`~scrapy_zyte_api.Action` dicts into a hashable value."""
# both lists and dicts are not hashable and we need dep types to be hashable
return tuple(make_hashable(action) for action in value)


def custom_attrs(
input: Dict[str, Any], options: Optional[Dict[str, Any]] = None
) -> Tuple[FrozenSet[Any], Optional[FrozenSet[Any]]]:
input_wrapped = make_hashable(input)
options_wrapped = make_hashable(options) if options else None
return input_wrapped, options_wrapped

Check warning on line 90 in scrapy_zyte_api/_annotations.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/_annotations.py#L88-L90

Added lines #L88 - L90 were not covered by tests
45 changes: 35 additions & 10 deletions scrapy_zyte_api/providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
AutoProductListPage,
AutoProductNavigationPage,
AutoProductPage,
CustomAttributes,
CustomAttributesMetadata,
CustomAttributesValues,
Item,
JobPosting,
Product,
Expand All @@ -35,7 +38,7 @@
from zyte_common_items.fields import is_auto_field

from scrapy_zyte_api import Actions, ExtractFrom, Geolocation, Screenshot
from scrapy_zyte_api._annotations import _ActionResult
from scrapy_zyte_api._annotations import _ActionResult, _from_hashable

Check warning on line 41 in scrapy_zyte_api/providers.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/providers.py#L41

Added line #L41 was not covered by tests
from scrapy_zyte_api.responses import ZyteAPITextResponse

try:
Expand Down Expand Up @@ -76,6 +79,8 @@
ArticleNavigation,
BrowserHtml,
BrowserResponse,
CustomAttributes,
CustomAttributesValues,
Geolocation,
JobPosting,
Product,
Expand Down Expand Up @@ -175,15 +180,14 @@
)
zyte_api_meta["actions"] = []
for action in cls.__metadata__[0]: # type: ignore[attr-defined]
zyte_api_meta["actions"].append(
{
k: (
dict(v)
if isinstance(v, frozenset)
else list(v) if isinstance(v, tuple) else v
)
for k, v in action
}
zyte_api_meta["actions"].append(_from_hashable(action))
continue

Check warning on line 184 in scrapy_zyte_api/providers.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/providers.py#L183-L184

Added lines #L183 - L184 were not covered by tests
if cls_stripped in {CustomAttributes, CustomAttributesValues}:
custom_attrs_input, custom_attrs_options = cls.__metadata__[0] # type: ignore[attr-defined]
zyte_api_meta["customAttributes"] = _from_hashable(custom_attrs_input)

Check warning on line 187 in scrapy_zyte_api/providers.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/providers.py#L186-L187

Added lines #L186 - L187 were not covered by tests
if custom_attrs_options:
zyte_api_meta["customAttributesOptions"] = _from_hashable(

Check warning on line 189 in scrapy_zyte_api/providers.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/providers.py#L189

Added line #L189 was not covered by tests
custom_attrs_options
)
continue
kw = _ITEM_KEYWORDS.get(cls_stripped)
Expand Down Expand Up @@ -322,6 +326,27 @@
result = AnnotatedInstance(Actions(actions_result), cls.__metadata__) # type: ignore[attr-defined]
results.append(result)
continue
if cls_stripped is CustomAttributes and is_typing_annotated(cls):
custom_attrs_result = api_response.raw_api_response["customAttributes"]
result = AnnotatedInstance(

Check warning on line 331 in scrapy_zyte_api/providers.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/providers.py#L330-L331

Added lines #L330 - L331 were not covered by tests
CustomAttributes(
CustomAttributesValues(custom_attrs_result["values"]),
CustomAttributesMetadata.from_dict(
custom_attrs_result["metadata"]
),
),
cls.__metadata__, # type: ignore[attr-defined]
)
results.append(result)
continue

Check warning on line 341 in scrapy_zyte_api/providers.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/providers.py#L340-L341

Added lines #L340 - L341 were not covered by tests
if cls_stripped is CustomAttributesValues and is_typing_annotated(cls):
custom_attrs_result = api_response.raw_api_response["customAttributes"]
result = AnnotatedInstance(

Check warning on line 344 in scrapy_zyte_api/providers.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/providers.py#L343-L344

Added lines #L343 - L344 were not covered by tests
CustomAttributesValues(custom_attrs_result["values"]),
cls.__metadata__, # type: ignore[attr-defined]
)
results.append(result)
continue

Check warning on line 349 in scrapy_zyte_api/providers.py

View check run for this annotation

Codecov / codecov/patch

scrapy_zyte_api/providers.py#L348-L349

Added lines #L348 - L349 were not covered by tests
kw = _ITEM_KEYWORDS.get(cls_stripped)
if not kw:
continue
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ def get_version():
"andi>=0.6.0",
"scrapy-poet>=0.22.3",
"web-poet>=0.17.0",
"zyte-common-items>=0.20.0",
# https://github.com/zytedata/zyte-common-items/pull/106
"zyte-common-items @ git+https://github.com/zytedata/zyte-common-items.git@custom-attrs-dep",
]
},
classifiers=[
Expand Down
11 changes: 11 additions & 0 deletions tests/mockserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,17 @@ def render_POST(self, request):
"name"
] += f" (country {request_data['geolocation']})"

if "customAttributes" in request_data:
response_data["customAttributes"] = {
"metadata": {
"textInputTokens": 1000,
},
"values": {
"attr1": "foo",
"attr2": 42,
},
}

return json.dumps(response_data).encode()


Expand Down
114 changes: 113 additions & 1 deletion tests/test_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import pytest

from scrapy_zyte_api._annotations import custom_attrs

pytest.importorskip("scrapy_poet")

import attrs
Expand All @@ -24,7 +26,14 @@
handle_urls,
)
from web_poet.pages import get_item_cls
from zyte_common_items import AutoProductPage, BasePage, BaseProductPage, Product
from zyte_common_items import (
AutoProductPage,
BasePage,
BaseProductPage,
CustomAttributes,
CustomAttributesValues,
Product,
)
from zyte_common_items.fields import auto_field

from scrapy_zyte_api import Actions, ExtractFrom, Geolocation, Screenshot, actions
Expand Down Expand Up @@ -394,6 +403,109 @@ def parse_(self, response: DummyResponse, page: GeoProductPage): # type: ignore
assert "Geolocation dependencies must be annotated" in caplog.text


custom_attrs_input = {
"attr1": {"type": "string", "description": "descr1"},
"attr2": {"type": "number", "description": "descr2"},
}


@pytest.mark.skipif(
sys.version_info < (3, 9), reason="No Annotated support in Python < 3.9"
)
@pytest.mark.parametrize(
"annotation",
[
custom_attrs(custom_attrs_input),
custom_attrs(custom_attrs_input, None),
custom_attrs(custom_attrs_input, {}),
custom_attrs(custom_attrs_input, {"foo": "bar"}),
],
)
@ensureDeferred
async def test_provider_custom_attrs(mockserver, annotation):
from typing import Annotated

@attrs.define
class CustomAttrsPage(BasePage):
product: Product
custom_attrs: Annotated[CustomAttributes, annotation]

class CustomAttrsZyteAPISpider(ZyteAPISpider):
def parse_(self, response: DummyResponse, page: CustomAttrsPage): # type: ignore[override]
yield {
"product": page.product,
"custom_attrs": page.custom_attrs,
}

settings = create_scrapy_settings()
settings["ZYTE_API_URL"] = mockserver.urljoin("/")
settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0}

item, url, _ = await crawl_single_item(
CustomAttrsZyteAPISpider, HtmlResource, settings
)
assert item["product"] == Product.from_dict(
dict(
url=url,
name="Product name",
price="10",
currency="USD",
)
)
assert item["custom_attrs"] == CustomAttributes.from_dict(
{
"values": {
"attr1": "foo",
"attr2": 42,
},
"metadata": {"textInputTokens": 1000},
}
)


@pytest.mark.skipif(
sys.version_info < (3, 9), reason="No Annotated support in Python < 3.9"
)
@ensureDeferred
async def test_provider_custom_attrs_values(mockserver):
from typing import Annotated

@attrs.define
class CustomAttrsPage(BasePage):
product: Product
custom_attrs: Annotated[
CustomAttributesValues,
custom_attrs(custom_attrs_input),
]

class CustomAttrsZyteAPISpider(ZyteAPISpider):
def parse_(self, response: DummyResponse, page: CustomAttrsPage): # type: ignore[override]
yield {
"product": page.product,
"custom_attrs": page.custom_attrs,
}

settings = create_scrapy_settings()
settings["ZYTE_API_URL"] = mockserver.urljoin("/")
settings["SCRAPY_POET_PROVIDERS"] = {ZyteApiProvider: 0}

item, url, _ = await crawl_single_item(
CustomAttrsZyteAPISpider, HtmlResource, settings
)
assert item["product"] == Product.from_dict(
dict(
url=url,
name="Product name",
price="10",
currency="USD",
)
)
assert item["custom_attrs"] == {
"attr1": "foo",
"attr2": 42,
}


class RecordingHandler(ScrapyZyteAPIDownloadHandler):
"""Subclasses the original handler in order to record the Zyte API parameters
used for each downloading request.
Expand Down
2 changes: 1 addition & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ deps =
andi==0.6.0
scrapy-poet==0.22.3
web-poet==0.17.0
zyte-common-items==0.20.0
zyte-common-items @ git+https://github.com/zytedata/zyte-common-items.git@custom-attrs-dep

[testenv:pinned-extra]
basepython=python3.8
Expand Down
Loading