diff --git a/scrapy_zyte_api/_params.py b/scrapy_zyte_api/_params.py index 6ddb7df9..720c0737 100644 --- a/scrapy_zyte_api/_params.py +++ b/scrapy_zyte_api/_params.py @@ -2,7 +2,7 @@ from copy import copy from logging import getLogger from os import environ -from typing import Any, Dict, List, Mapping, Optional, Set, Union +from typing import Any, Dict, Iterable, List, Mapping, Optional, Set, Tuple, Union from warnings import warn from scrapy import Request @@ -290,7 +290,7 @@ def _iter_headers( api_params: Dict[str, Any], request: Request, header_parameter: str, -): +) -> Iterable[Tuple[bytes, bytes, bytes]]: headers = api_params.get(header_parameter) if headers not in (None, True): logger.warning( @@ -306,8 +306,8 @@ def _iter_headers( continue decoded_k = k.decode() lowercase_k = k.strip().lower() - v = b",".join(v) - decoded_v = v.decode() + joined_v = b",".join(v) + decoded_v = joined_v.decode() if lowercase_k.startswith(b"x-crawlera-"): for spm_header_suffix, zapi_request_param in ( @@ -435,7 +435,7 @@ def _iter_headers( ) continue - yield k, lowercase_k, v + yield k, lowercase_k, joined_v def _map_custom_http_request_headers( @@ -461,7 +461,7 @@ def _map_request_headers( *, api_params: Dict[str, Any], request: Request, - browser_headers: Dict[str, str], + browser_headers: Dict[bytes, str], browser_ignore_headers: SKIP_HEADER_T, ): request_headers = {} @@ -477,7 +477,7 @@ def _map_request_headers( lowercase_k ] not in (ANY_VALUE, v): logger.warning( - f"Request {request} defines header {k}, which " + f"Request {request} defines header {k.decode()}, which " f"cannot be mapped into the Zyte API requestHeaders " f"parameter. See the ZYTE_API_BROWSER_HEADERS setting." ) @@ -500,7 +500,7 @@ def _set_request_headers_from_request( api_params: Dict[str, Any], request: Request, skip_headers: SKIP_HEADER_T, - browser_headers: Dict[str, str], + browser_headers: Dict[bytes, str], browser_ignore_headers: SKIP_HEADER_T, ): """Updates *api_params*, in place, based on *request*.""" @@ -727,7 +727,7 @@ def _update_api_params_from_request( default_params: Dict[str, Any], meta_params: Dict[str, Any], skip_headers: SKIP_HEADER_T, - browser_headers: Dict[str, str], + browser_headers: Dict[bytes, str], browser_ignore_headers: SKIP_HEADER_T, cookies_enabled: bool, cookie_jars: Optional[Dict[Any, CookieJar]], @@ -859,7 +859,7 @@ def _get_automap_params( default_enabled: bool, default_params: Dict[str, Any], skip_headers: SKIP_HEADER_T, - browser_headers: Dict[str, str], + browser_headers: Dict[bytes, str], browser_ignore_headers: SKIP_HEADER_T, cookies_enabled: bool, cookie_jars: Optional[Dict[Any, CookieJar]], @@ -906,7 +906,7 @@ def _get_api_params( transparent_mode: bool, automap_params: Dict[str, Any], skip_headers: SKIP_HEADER_T, - browser_headers: Dict[str, str], + browser_headers: Dict[bytes, str], browser_ignore_headers: SKIP_HEADER_T, job_id: Optional[str], cookies_enabled: bool, @@ -1003,7 +1003,7 @@ def _load_mw_skip_headers(crawler): return mw_skip_headers -def _load_browser_headers(settings): +def _load_browser_headers(settings) -> Dict[bytes, str]: browser_headers = settings.getdict( "ZYTE_API_BROWSER_HEADERS", {"Referer": "referer"}, diff --git a/scrapy_zyte_api/_request_fingerprinter.py b/scrapy_zyte_api/_request_fingerprinter.py index 5cb2a048..ee65ba23 100644 --- a/scrapy_zyte_api/_request_fingerprinter.py +++ b/scrapy_zyte_api/_request_fingerprinter.py @@ -1,5 +1,5 @@ from logging import getLogger -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, cast logger = getLogger(__name__) @@ -49,7 +49,7 @@ def __init__(self, crawler): crawler=crawler, ) if self._has_poet and not isinstance( - self._fallback_request_fingerprinter, RequestFingerprinter + self._fallback_request_fingerprinter, cast(type, RequestFingerprinter) ): logger.warning( f"You have scrapy-poet installed, but your custom value " diff --git a/scrapy_zyte_api/_session.py b/scrapy_zyte_api/_session.py index eb674492..b0475584 100644 --- a/scrapy_zyte_api/_session.py +++ b/scrapy_zyte_api/_session.py @@ -73,7 +73,7 @@ class DummyResponse: # type: ignore[no-redef] from scrapy.downloadermiddlewares.retry import get_retry_request except ImportError: # pragma: no cover # https://github.com/scrapy/scrapy/blob/b1fe97dc6c8509d58b29c61cf7801eeee1b409a9/scrapy/downloadermiddlewares/retry.py#L57-L142 - def get_retry_request( + def get_retry_request( # type: ignore[misc] request, *, spider, @@ -125,7 +125,7 @@ def get_retry_request( from scrapy.http.request import NO_CALLBACK except ImportError: - def NO_CALLBACK(response): + def NO_CALLBACK(response): # type: ignore[misc] pass # pragma: no cover @@ -165,7 +165,7 @@ def _get_asyncio_event_loop(): return set_asyncio_event_loop() # https://github.com/scrapy/scrapy/blob/b1fe97dc6c8509d58b29c61cf7801eeee1b409a9/scrapy/utils/defer.py#L360-L379 - def deferred_to_future(d): + def deferred_to_future(d): # type: ignore[misc] return d.asFuture(_get_asyncio_event_loop()) @@ -177,7 +177,7 @@ def deferred_to_future(d): def build_from_crawler( objcls: Type[T], crawler: Crawler, /, *args: Any, **kwargs: Any ) -> T: - return create_instance(objcls, settings=None, crawler=crawler, *args, **kwargs) + return create_instance(objcls, settings=None, crawler=crawler, *args, **kwargs) # type: ignore[misc] class PoolError(ValueError): @@ -382,7 +382,7 @@ def check(self, response: Response, request: Request) -> bool: location = self.location(request) if not location: return True - for action in response.raw_api_response.get("actions", []): + for action in response.raw_api_response.get("actions", []): # type: ignore[attr-defined] if action.get("action", None) != "setLocation": continue if action.get("error", "").startswith("Action setLocation not supported "): @@ -647,6 +647,8 @@ def _get_pool(self, request): return pool async def _init_session(self, session_id: str, request: Request, pool: str) -> bool: + assert self._crawler.engine + assert self._crawler.stats session_config = self._get_session_config(request) if meta_params := request.meta.get("zyte_api_session_params", None): session_params = meta_params @@ -685,7 +687,7 @@ async def _init_session(self, session_id: str, request: Request, pool: str) -> b callback=NO_CALLBACK, ) if _DOWNLOAD_NEEDS_SPIDER: - deferred = self._crawler.engine.download( + deferred = self._crawler.engine.download( # type: ignore[call-arg] session_init_request, spider=spider ) else: @@ -829,6 +831,7 @@ async def check(self, response: Response, request: Request) -> bool: """Check the response for signs of session expiration, update the internal session pool accordingly, and return ``False`` if the session has expired or ``True`` if the session passed validation.""" + assert self._crawler.stats with self._fatal_error_handler: if self.is_init_request(request): return True @@ -860,6 +863,7 @@ async def check(self, response: Response, request: Request) -> bool: async def assign(self, request: Request): """Assign a working session to *request*.""" + assert self._crawler.stats with self._fatal_error_handler: if self.is_init_request(request): return @@ -895,6 +899,7 @@ def is_enabled(self, request: Request) -> bool: return session_config.enabled(request) def handle_error(self, request: Request): + assert self._crawler.stats with self._fatal_error_handler: pool = self._get_pool(request) self._crawler.stats.inc_value( @@ -908,6 +913,7 @@ def handle_error(self, request: Request): self._start_request_session_refresh(request, pool) def handle_expiration(self, request: Request): + assert self._crawler.stats with self._fatal_error_handler: pool = self._get_pool(request) self._crawler.stats.inc_value( diff --git a/scrapy_zyte_api/addon.py b/scrapy_zyte_api/addon.py index 7cf90257..1e6f8daa 100644 --- a/scrapy_zyte_api/addon.py +++ b/scrapy_zyte_api/addon.py @@ -1,3 +1,5 @@ +from typing import cast + from scrapy.settings import BaseSettings from scrapy.utils.misc import load_object from zyte_api import zyte_api_retrying @@ -74,7 +76,7 @@ def update_settings(self, settings: BaseSettings) -> None: settings.set( "REQUEST_FINGERPRINTER_CLASS", "scrapy_zyte_api.ScrapyZyteAPIRequestFingerprinter", - settings.getpriority("REQUEST_FINGERPRINTER_CLASS"), + cast(int, settings.getpriority("REQUEST_FINGERPRINTER_CLASS")), ) else: settings.set( @@ -124,5 +126,5 @@ def update_settings(self, settings: BaseSettings) -> None: settings.set( "ZYTE_API_RETRY_POLICY", _SESSION_RETRY_POLICIES.get(loaded_retry_policy, retry_policy), - settings.getpriority("ZYTE_API_RETRY_POLICY"), + cast(int, settings.getpriority("ZYTE_API_RETRY_POLICY")), ) diff --git a/scrapy_zyte_api/handler.py b/scrapy_zyte_api/handler.py index 85294a7e..44ebfa5c 100644 --- a/scrapy_zyte_api/handler.py +++ b/scrapy_zyte_api/handler.py @@ -88,8 +88,8 @@ def __init__( # We keep the client in the crawler object to prevent multiple, # duplicate clients with the same settings to be used. # https://github.com/scrapy-plugins/scrapy-zyte-api/issues/58 - crawler.zyte_api_client = client - self._client: AsyncZyteAPI = crawler.zyte_api_client + crawler.zyte_api_client = client # type: ignore[attr-defined] + self._client: AsyncZyteAPI = crawler.zyte_api_client # type: ignore[attr-defined] logger.info("Using a Zyte API key starting with %r", self._client.api_key[:7]) verify_installed_reactor( "twisted.internet.asyncioreactor.AsyncioSelectorReactor" @@ -104,6 +104,7 @@ def __init__( ) self._param_parser = _ParamParser(crawler) self._retry_policy = _load_retry_policy(settings) + assert crawler.stats self._stats = crawler.stats self._must_log_request = settings.getbool("ZYTE_API_LOG_REQUESTS", False) self._truncate_limit = settings.getint("ZYTE_API_LOG_REQUESTS_TRUNCATE", 64) @@ -129,6 +130,7 @@ async def engine_started(self): self._session = self._client.session(trust_env=self._trust_env) if not self._cookies_enabled: return + assert self._crawler.engine for middleware in self._crawler.engine.downloader.middleware.middlewares: if isinstance(middleware, self._cookie_mw_cls): self._cookie_jars = middleware.jars @@ -275,6 +277,9 @@ def _process_request_error(self, request, error): f"type={error.parsed.type!r}, request_id={error.request_id!r}) " f"while processing URL ({request.url}): {detail}" ) + assert self._crawler + assert self._crawler.engine + assert self._crawler.spider for status, error_type, close_reason in ( (401, "/auth/key-not-found", "zyte_api_bad_key"), (403, "/auth/account-suspended", "zyte_api_suspended_account"), diff --git a/scrapy_zyte_api/providers.py b/scrapy_zyte_api/providers.py index 93f3fcf5..1a2cd496 100644 --- a/scrapy_zyte_api/providers.py +++ b/scrapy_zyte_api/providers.py @@ -5,6 +5,7 @@ from scrapy.crawler import Crawler from scrapy.utils.defer import maybe_deferred_to_future from scrapy_poet import PageObjectInputProvider +from twisted.internet.defer import Deferred from web_poet import ( AnyResponse, BrowserHtml, @@ -47,7 +48,7 @@ # requires Scrapy >= 2.8 from scrapy.http.request import NO_CALLBACK except ImportError: - NO_CALLBACK = None + NO_CALLBACK = None # type: ignore[assignment] _ITEM_KEYWORDS: Dict[type, str] = { @@ -103,6 +104,7 @@ def is_provided(self, type_: Callable) -> bool: return super().is_provided(strip_annotated(type_)) def _track_auto_fields(self, crawler: Crawler, request: Request, cls: Type): + assert crawler.stats if cls not in _ITEM_KEYWORDS: return if self._should_track_auto_fields is None: @@ -256,8 +258,9 @@ async def __call__( # noqa: C901 }, callback=NO_CALLBACK, ) + assert crawler.engine api_response: ZyteAPITextResponse = await maybe_deferred_to_future( - crawler.engine.download(api_request) + cast("Deferred[ZyteAPITextResponse]", crawler.engine.download(api_request)) ) assert api_response.raw_api_response diff --git a/scrapy_zyte_api/responses.py b/scrapy_zyte_api/responses.py index dd5cb55a..941754f7 100644 --- a/scrapy_zyte_api/responses.py +++ b/scrapy_zyte_api/responses.py @@ -1,10 +1,10 @@ from base64 import b64decode from copy import copy from datetime import datetime -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Tuple, Union, cast from scrapy import Request -from scrapy.http import HtmlResponse, Response, TextResponse +from scrapy.http import Headers, HtmlResponse, Response, TextResponse from scrapy.http.cookies import CookieJar from scrapy.responsetypes import responsetypes @@ -113,7 +113,9 @@ def _prepare_headers(cls, api_response: Dict[str, Any]): class ZyteAPITextResponse(ZyteAPIMixin, HtmlResponse): @classmethod - def from_api_response(cls, api_response: Dict, *, request: Request = None): + def from_api_response( + cls, api_response: Dict, *, request: Optional[Request] = None + ): """Alternative constructor to instantiate the response from the raw Zyte API response. """ @@ -144,7 +146,9 @@ def replace(self, *args, **kwargs): class ZyteAPIResponse(ZyteAPIMixin, Response): @classmethod - def from_api_response(cls, api_response: Dict, *, request: Request = None): + def from_api_response( + cls, api_response: Dict, *, request: Optional[Request] = None + ): """Alternative constructor to instantiate the response from the raw Zyte API response. """ @@ -190,9 +194,13 @@ def _process_response( return ZyteAPITextResponse.from_api_response(api_response, request=request) if api_response.get("httpResponseHeaders") and api_response.get("httpResponseBody"): + # a plain dict here doesn't work correctly on Scrapy < 2.1 + scrapy_headers = Headers() + for header in cast(List[Dict[str, str]], api_response["httpResponseHeaders"]): + scrapy_headers[header["name"].encode()] = header["value"].encode() response_cls = responsetypes.from_args( - headers=api_response["httpResponseHeaders"], - url=api_response["url"], + headers=scrapy_headers, + url=cast(str, api_response["url"]), # FIXME: update this when python-zyte-api supports base64 decoding body=b64decode(api_response["httpResponseBody"]), # type: ignore ) diff --git a/tests/__init__.py b/tests/__init__.py index 55a5b470..12e4b861 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,8 +1,10 @@ from contextlib import asynccontextmanager, contextmanager from os import environ -from typing import Any, Dict, Optional, Type, Union +from typing import Any, Dict, Optional +from packaging.version import Version from scrapy import Spider +from scrapy import __version__ as SCRAPY_VERSION from scrapy.crawler import Crawler from scrapy.utils.misc import load_object from scrapy.utils.test import get_crawler as _get_crawler @@ -14,7 +16,7 @@ _API_KEY = "a" DEFAULT_CLIENT_CONCURRENCY = AsyncClient(api_key=_API_KEY).n_conn -SETTINGS_T = Dict[Union[Type, str], Any] +SETTINGS_T = Dict[str, Any] SETTINGS: SETTINGS_T = { "DOWNLOAD_HANDLERS": { "http": "scrapy_zyte_api.handler.ScrapyZyteAPIDownloadHandler", @@ -25,13 +27,16 @@ "scrapy_zyte_api.ScrapyZyteAPISessionDownloaderMiddleware": 667, }, "REQUEST_FINGERPRINTER_CLASS": "scrapy_zyte_api.ScrapyZyteAPIRequestFingerprinter", - "REQUEST_FINGERPRINTER_IMPLEMENTATION": "2.7", # Silence deprecation warning "SPIDER_MIDDLEWARES": { "scrapy_zyte_api.ScrapyZyteAPISpiderMiddleware": 100, }, "ZYTE_API_KEY": _API_KEY, "TWISTED_REACTOR": "twisted.internet.asyncioreactor.AsyncioSelectorReactor", } +if Version(SCRAPY_VERSION) < Version("2.12"): + SETTINGS["REQUEST_FINGERPRINTER_IMPLEMENTATION"] = ( + "2.7" # Silence deprecation warning + ) try: import scrapy_poet # noqa: F401 except ImportError: diff --git a/tests/test_handler.py b/tests/test_handler.py index 42c1d20c..d36770d5 100644 --- a/tests/test_handler.py +++ b/tests/test_handler.py @@ -474,9 +474,10 @@ async def test_trust_env(enabled): ), ) def test_user_agent_for_build_client(user_agent, expected): - settings: SETTINGS_T = Settings( + settings: Settings = Settings( { - **SETTINGS, + # see https://github.com/python/mypy/issues/16557#issuecomment-1831213673 + **SETTINGS, # type: ignore[dict-item] "_ZYTE_API_USER_AGENT": user_agent, } ) @@ -503,6 +504,7 @@ def parse(self, response): crawler = get_crawler(TestSpider, settings_dict=settings) await crawler.crawl() + assert crawler.stats assert crawler.stats.get_value("finish_reason") == "zyte_api_bad_key" @@ -530,6 +532,7 @@ def parse(self, response): crawler = get_crawler(TestSpider, settings_dict=settings) await crawler.crawl() + assert crawler.stats assert crawler.stats.get_value("finish_reason") == "zyte_api_suspended_account" @@ -552,6 +555,7 @@ def parse(self, response): crawler = get_crawler(TestSpider, settings_dict=settings) await crawler.crawl() + assert crawler.stats assert crawler.stats.get_value("finish_reason") == "zyte_api_suspended_account" diff --git a/tests/test_middlewares.py b/tests/test_middlewares.py index 7804bca3..3959b0f6 100644 --- a/tests/test_middlewares.py +++ b/tests/test_middlewares.py @@ -60,19 +60,21 @@ def spider_output_processor(middleware, request, spider): async def test_preserve_delay(mw_cls, processor, settings, preserve): crawler = get_crawler(settings_dict=settings) await crawler.crawl("a") + assert crawler.engine + assert crawler.spider spider = crawler.spider middleware = create_instance(mw_cls, settings=crawler.settings, crawler=crawler) # AutoThrottle does this. - spider.download_delay = 5 + spider.download_delay = 5 # type: ignore[attr-defined] # No effect on non-Zyte-API requests request = Request("https://example.com") processor(middleware, request, spider) assert "download_slot" not in request.meta _, slot = crawler.engine.downloader._get_slot(request, spider) - assert slot.delay == spider.download_delay + assert slot.delay == spider.download_delay # type: ignore[attr-defined] # On Zyte API requests, the download slot is changed, and its delay may be # set to 0 depending on settings. @@ -166,6 +168,7 @@ def parse(self, response): f"Maximum Zyte API requests for this crawl is set at {zapi_max_requests}" in caplog.text ) + assert crawler.stats assert crawler.stats.get_value("scrapy-zyte-api/success") == zapi_max_requests assert crawler.stats.get_value("scrapy-zyte-api/processed") == zapi_max_requests assert crawler.stats.get_value("item_scraped_count") <= zapi_max_requests + 6 @@ -213,6 +216,7 @@ def parse(self, response): f"Maximum Zyte API requests for this crawl is set at {zapi_max_requests}" in caplog.text ) + assert crawler.stats assert crawler.stats.get_value("scrapy-zyte-api/success") == zapi_max_requests assert crawler.stats.get_value("scrapy-zyte-api/processed") == zapi_max_requests assert crawler.stats.get_value("item_scraped_count") == zapi_max_requests @@ -244,6 +248,7 @@ def parse(self, response): crawler = get_crawler(TestSpider, settings_dict=settings) await crawler.crawl() + assert crawler.stats assert crawler.stats.get_value("finish_reason") == "failed_forbidden_domain" @@ -270,6 +275,7 @@ def parse(self, response): crawler = get_crawler(TestSpider, settings_dict=settings) await crawler.crawl() + assert crawler.stats assert crawler.stats.get_value("finish_reason") == "failed_forbidden_domain" @@ -295,6 +301,7 @@ def parse(self, response): crawler = get_crawler(TestSpider, settings_dict=settings) await crawler.crawl() + assert crawler.stats assert crawler.stats.get_value("finish_reason") == "finished" @@ -319,6 +326,7 @@ def parse(self, response): crawler = get_crawler(TestSpider, settings_dict=settings) await crawler.crawl() + assert crawler.stats assert crawler.stats.get_value("finish_reason") == "finished" @@ -349,6 +357,7 @@ def parse(self, response): crawler = get_crawler(TestSpider, settings_dict=settings) await crawler.crawl() + assert crawler.stats assert crawler.stats.get_value("finish_reason") == "failed_forbidden_domain" @@ -378,7 +387,7 @@ class SPMSpider(Spider): start_urls = ["data:,"] if attribute is not None: - SPMSpider.zyte_smartproxy_enabled = attribute + SPMSpider.zyte_smartproxy_enabled = attribute # type: ignore[attr-defined] settings = { "ZYTE_API_TRANSPARENT_MODE": True, @@ -395,6 +404,7 @@ class SPMSpider(Spider): crawler = get_crawler(SPMSpider, settings_dict=settings) await crawler.crawl() expected = "plugin_conflict" if conflict else "finished" + assert crawler.stats assert crawler.stats.get_value("finish_reason") == expected @@ -432,7 +442,7 @@ class CrawleraSpider(Spider): start_urls = ["data:,"] if attribute is not None: - CrawleraSpider.crawlera_enabled = attribute + CrawleraSpider.crawlera_enabled = attribute # type: ignore[attr-defined] settings = { "ZYTE_API_TRANSPARENT_MODE": True, @@ -449,6 +459,7 @@ class CrawleraSpider(Spider): crawler = get_crawler(CrawleraSpider, settings_dict=settings) await crawler.crawl() expected = "plugin_conflict" if conflict else "finished" + assert crawler.stats assert crawler.stats.get_value("finish_reason") == expected, ( setting, attribute, diff --git a/tests/test_providers.py b/tests/test_providers.py index 6ef746d3..3781943f 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -507,6 +507,8 @@ class SomePage(BasePage): response: AnyResponse class ZyteAPISpider(Spider): + url: str + def start_requests(self): yield Request(self.url, callback=self.parse_) @@ -534,6 +536,8 @@ class SomePage(BasePage): response: AnyResponse class ZyteAPISpider(Spider): + url: str + def start_requests(self): yield Request(self.url, callback=self.parse_) @@ -562,6 +566,8 @@ class SomePage(BasePage): response: AnyResponse class ZyteAPISpider(Spider): + url: str + def start_requests(self): yield Request(self.url, callback=self.parse_) @@ -590,6 +596,8 @@ class SomePage(BasePage): product: Product class ZyteAPISpider(Spider): + url: str + def start_requests(self): yield Request(self.url, callback=self.parse_) @@ -619,6 +627,8 @@ class SomePage(BasePage): product: Product class ZyteAPISpider(Spider): + url: str + def start_requests(self): yield Request(self.url, callback=self.parse_) @@ -651,6 +661,8 @@ class SomePage(ItemPage[Product]): response: AnyResponse class ZyteAPISpider(Spider): + url: str + def start_requests(self): yield Request(self.url, callback=self.parse_) @@ -685,6 +697,8 @@ class SomePage(BasePage): product: Product class ZyteAPISpider(Spider): + url: str + def start_requests(self): yield Request(self.url, callback=self.parse_) @@ -721,6 +735,8 @@ class SomePage(BasePage): product: Product class ZyteAPISpider(Spider): + url: str + def start_requests(self): yield Request(self.url, callback=self.parse_) @@ -755,6 +771,8 @@ class SomePage(BasePage): product: Product class ZyteAPISpider(Spider): + url: str + def start_requests(self): yield Request(self.url, callback=self.parse_) @@ -791,6 +809,8 @@ class SomePage(BasePage): product: Product class ZyteAPISpider(Spider): + url: str + def start_requests(self): yield Request(self.url, callback=self.parse_) @@ -826,6 +846,8 @@ class SomePage(BasePage): html: BrowserHtml class ZyteAPISpider(Spider): + url: str + def start_requests(self): yield Request(self.url, callback=self.parse_) @@ -852,6 +874,8 @@ class SomePage(BasePage): browser_response: BrowserResponse class ZyteAPISpider(Spider): + url: str + def start_requests(self): yield Request(self.url, callback=self.parse_) @@ -879,6 +903,8 @@ class SomePage(BasePage): html: BrowserHtml class ZyteAPISpider(Spider): + url: str + def start_requests(self): yield Request(self.url, callback=self.parse_) @@ -906,6 +932,8 @@ class SomePage(BasePage): http_response: HttpResponse class ZyteAPISpider(Spider): + url: str + def start_requests(self): yield Request(self.url, callback=self.parse_) @@ -937,6 +965,8 @@ class SomePage(BasePage): http_response: HttpResponse class ZyteAPISpider(Spider): + url: str + def start_requests(self): yield Request(self.url, callback=self.parse_) @@ -975,6 +1005,8 @@ class SecondPage(BasePage): response: AnyResponse class ZyteAPISpider(Spider): + url: str + def start_requests(self): yield Request(self.url, callback=self.parse_) @@ -1009,6 +1041,8 @@ class SecondPage(BasePage): response: AnyResponse class ZyteAPISpider(Spider): + url: str + def start_requests(self): yield Request(self.url, callback=self.parse_) @@ -1036,6 +1070,8 @@ def parse_(self, response: DummyResponse, page1: FirstPage, page2: SecondPage): @ensureDeferred async def test_screenshot(mockserver): class ZyteAPISpider(Spider): + url: str + def start_requests(self): yield Request(self.url, callback=self.parse_) diff --git a/tests/test_responses.py b/tests/test_responses.py index bfafc769..fa6cdae6 100644 --- a/tests/test_responses.py +++ b/tests/test_responses.py @@ -64,7 +64,7 @@ def raw_api_response_browser(): "echoData": {"some_value": "here"}, "httpResponseHeaders": [ {"name": "Content-Type", "value": "text/html"}, - {"name": "Content-Length", "value": len(PAGE_CONTENT)}, + {"name": "Content-Length", "value": str(len(PAGE_CONTENT))}, ], "statusCode": 200, "experimental": { @@ -80,7 +80,7 @@ def raw_api_response_body(): "echoData": {"some_value": "here"}, "httpResponseHeaders": [ {"name": "Content-Type", "value": "text/html"}, - {"name": "Content-Length", "value": len(PAGE_CONTENT)}, + {"name": "Content-Length", "value": str(len(PAGE_CONTENT))}, ], "statusCode": 200, "experimental": { @@ -97,7 +97,7 @@ def raw_api_response_mixed(): "echoData": {"some_value": "here"}, "httpResponseHeaders": [ {"name": "Content-Type", "value": "text/html"}, - {"name": "Content-Length", "value": len(PAGE_CONTENT_2)}, + {"name": "Content-Length", "value": str(len(PAGE_CONTENT_2))}, ], "statusCode": 200, "experimental": { @@ -200,7 +200,7 @@ def test_non_utf8_response(): "browserHtml": content, "httpResponseHeaders": [ {"name": "Content-Type", "value": "text/html; charset=iso-8859-1"}, - {"name": "Content-Length", "value": len(content)}, + {"name": "Content-Length", "value": str(len(content))}, ], } @@ -343,7 +343,7 @@ def test__process_response_no_body(): "product": {"name": "shoes"}, } - resp = _process_response(api_response, Request(api_response["url"])) + resp = _process_response(api_response, Request(cast(str, api_response["url"]))) assert isinstance(resp, Response) assert resp.body == b"" @@ -512,7 +512,7 @@ def test__process_response_non_text(): } ], } - resp = _process_response(api_response, Request(api_response["url"])) + resp = _process_response(api_response, Request(cast(str, api_response["url"]))) assert isinstance(resp, Response) with pytest.raises(NotSupported): diff --git a/tests/test_sessions.py b/tests/test_sessions.py index cc249d3c..cd5e401b 100644 --- a/tests/test_sessions.py +++ b/tests/test_sessions.py @@ -393,7 +393,7 @@ class UseChecker(ConstantChecker): def check(self, response: Response, request: Request) -> bool: if response.meta.get(SESSION_INIT_META_KEY, False) is True: return True - return super().check(request, response) + return super().check(response, request) class FalseUseChecker(FalseChecker, UseChecker): @@ -2789,7 +2789,7 @@ class ExceptionRaisingDownloaderMiddleware: async def process_request(self, request: Request, spider: Spider) -> None: if request.meta.get("_is_session_init_request", False): return - raise spider.exception + raise spider.exception # type: ignore[attr-defined] @pytest.mark.parametrize(