Skip to content

Commit

Permalink
Merge pull request #127 from Gallaecio/add-on
Browse files Browse the repository at this point in the history
Add an add-on, cover ZyteLogFormatter in the docs
  • Loading branch information
kmike authored Jan 14, 2025
2 parents 5fb57aa + 4bfa519 commit ca375d0
Show file tree
Hide file tree
Showing 10 changed files with 143 additions and 14 deletions.
25 changes: 25 additions & 0 deletions docs/_ext/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
def setup(app):
# https://stackoverflow.com/a/13663325
#
# Scrapy’s
# https://github.com/scrapy/scrapy/blob/dba37674e6eaa6c2030c8eb35ebf8127cd488062/docs/_ext/scrapydocs.py#L90C16-L110C6
app.add_crossref_type(
directivename="setting",
rolename="setting",
indextemplate="pair: %s; setting",
)
app.add_crossref_type(
directivename="signal",
rolename="signal",
indextemplate="pair: %s; signal",
)
app.add_crossref_type(
directivename="command",
rolename="command",
indextemplate="pair: %s; command",
)
app.add_crossref_type(
directivename="reqmeta",
rolename="reqmeta",
indextemplate="pair: %s; reqmeta",
)
4 changes: 4 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import pkgutil
import sys
from datetime import datetime
from pathlib import Path


def get_copyright(attribution, *, first_year):
Expand All @@ -26,7 +28,9 @@ def get_version_and_release():
copyright = get_copyright("Zyte Group Ltd", first_year=2022)
version, release = get_version_and_release()

sys.path.insert(0, str(Path(__file__).parent.absolute())) # _ext
extensions = [
"_ext",
"sphinx.ext.autodoc",
"sphinx.ext.intersphinx",
]
Expand Down
2 changes: 1 addition & 1 deletion docs/reference/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@ Reference
components
converters
adapter
pipelines
scrapy
6 changes: 0 additions & 6 deletions docs/reference/pipelines.rst

This file was deleted.

16 changes: 16 additions & 0 deletions docs/reference/scrapy.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
=================
Scrapy components
=================

Item pipelines
==============

.. autoclass:: zyte_common_items.pipelines.AEPipeline
.. autoclass:: zyte_common_items.pipelines.DropLowProbabilityItemPipeline


Log formatters
==============

.. autoclass:: zyte_common_items.log_formatters.ZyteLogFormatter
.. autoclass:: zyte_common_items.log_formatters.InfoDropItem
45 changes: 41 additions & 4 deletions docs/setup.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,49 @@ Installation
.. _configuration:
.. _scrapy-config:

Configuration
=============
Scrapy configuration
====================

To allow itemadapter_ users, like Scrapy_, to interact with :ref:`items
<items>`, prepend :class:`~zyte_common_items.ZyteItemAdapter` or
If you use Scrapy, zyte-common-items provides some functionality that needs
configuring:

- If using Scrapy_ 2.10 or higher, enable the add-on:

.. code-block:: python
:caption: settings.py
ADDONS = {
"zyte_common_items.Addon": 400,
}
The add-on:

- Appends :class:`~zyte_common_items.ZyteItemAdapter` to
itemadapter.ItemAdapter.ADAPTER_CLASSES_ if neither
:class:`~zyte_common_items.ZyteItemAdapter` nor
:class:`~zyte_common_items.ZyteItemKeepEmptyAdapter` are already there.

- Sets :setting:`LOG_FORMATTER <scrapy:LOG_FORMATTER>` to
:class:`~zyte_common_items.log_formatters.ZyteLogFormatter` if a custom
log formatter is not already set (i.e. with ``addon`` priority, see
:attr:`~scrapy.settings.SETTINGS_PRIORITIES`).

- If using Scrapy_ 2.9 or lower, apply those configurations manually as
needed.


.. _itemadapter-config:

itemadapter configuration
=========================

.. tip:: You do not need to set this manually if you are :ref:`using the Scrapy
add-on <scrapy-config>`.

To allow itemadapter_ to interact with :ref:`items <items>`, prepend
:class:`~zyte_common_items.ZyteItemAdapter` or
:class:`~zyte_common_items.ZyteItemKeepEmptyAdapter` to
itemadapter.ItemAdapter.ADAPTER_CLASSES_ as early as possible in your code::

Expand Down
7 changes: 6 additions & 1 deletion tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,14 @@ deps =
web-poet==0.14.0
zyte-parsers==0.5.0

[extra]
deps =
scrapy

[testenv:extra]
deps =
{[base]deps}
scrapy
{[extra]deps}
commands =
pytest \
--cov-report=term-missing:skip-covered \
Expand All @@ -63,6 +67,7 @@ deps =
changedir = docs
deps =
-rdocs/requirements.txt
{[extra]deps}
setenv =
READTHEDOCS_PROJECT=zyte-common-items
READTHEDOCS_VERSION=main
Expand Down
5 changes: 5 additions & 0 deletions zyte_common_items/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,8 @@
SerpPage,
SocialMediaPostPage,
)

try:
from ._addon import Addon
except ImportError: # Scrapy is not installed.
pass
36 changes: 36 additions & 0 deletions zyte_common_items/_addon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from collections import deque

from itemadapter import ItemAdapter
from scrapy.settings import BaseSettings
from scrapy.utils.misc import load_object

from . import ZyteItemAdapter, ZyteItemKeepEmptyAdapter
from .log_formatters import ZyteLogFormatter


def _setdefault(settings, setting, cls, pos):
setting_value = settings[setting]
if not setting_value:
settings[setting] = {cls: pos}
return
if cls in setting_value:
return
for cls_or_path in setting_value:
if isinstance(cls_or_path, str):
_cls = load_object(cls_or_path)
if _cls == cls:
return
settings[setting][cls] = pos


class Addon:
def update_settings(self, settings: BaseSettings) -> None:
if not any(
issubclass(cls, (ZyteItemAdapter, ZyteItemKeepEmptyAdapter))
for cls in ItemAdapter.ADAPTER_CLASSES
):
ItemAdapter.ADAPTER_CLASSES = deque(
(ZyteItemAdapter,) + tuple(ItemAdapter.ADAPTER_CLASSES)
)

settings.set("LOG_FORMATTER", ZyteLogFormatter, priority="addon")
11 changes: 9 additions & 2 deletions zyte_common_items/log_formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,18 @@


class InfoDropItem(DropItem):
"""DropItem subclass for items that should be dropped with an INFO message
(instead of the default WARNING message)."""
""":class:`~scrapy.exceptions.DropItem` subclass for items that should be
dropped with an ``INFO`` message (instead of the default ``WARNING``
message).
It is used, for example, by
:class:`~zyte_common_items.pipelines.DropLowProbabilityItemPipeline`.
"""


class ZyteLogFormatter(LogFormatter):
"""Log formatter that implements support for :class:`InfoDropItem`."""

def dropped(self, item, exception, response, spider):
data = super().dropped(item, exception, response, spider)
if isinstance(exception, InfoDropItem):
Expand Down

0 comments on commit ca375d0

Please sign in to comment.