From d04f9f4e0ce090526b77b2e5b8337c8ede8ff08c Mon Sep 17 00:00:00 2001 From: Anton Burnashev Date: Thu, 25 Apr 2024 13:20:44 +0300 Subject: [PATCH] Extract BaseReferencePaginator from BasePaginator (#1280) --- dlt/sources/helpers/rest_client/paginators.py | 43 +++++++++++-------- .../helpers/rest_client/test_client.py | 3 +- .../helpers/rest_client/test_paginators.py | 6 +-- 3 files changed, 29 insertions(+), 23 deletions(-) diff --git a/dlt/sources/helpers/rest_client/paginators.py b/dlt/sources/helpers/rest_client/paginators.py index 01f4fa7296..f34557bdfe 100644 --- a/dlt/sources/helpers/rest_client/paginators.py +++ b/dlt/sources/helpers/rest_client/paginators.py @@ -9,7 +9,6 @@ class BasePaginator(ABC): def __init__(self) -> None: self._has_next_page = True - self._next_reference: Optional[str] = None @property def has_next_page(self) -> bool: @@ -21,15 +20,6 @@ def has_next_page(self) -> bool: """ return self._has_next_page - @property - def next_reference(self) -> Optional[str]: - return self._next_reference - - @next_reference.setter - def next_reference(self, value: Optional[str]) -> None: - self._next_reference = value - self._has_next_page = value is not None - @abstractmethod def update_state(self, response: Response) -> None: """Update the paginator state based on the response. @@ -107,15 +97,30 @@ def update_request(self, request: Request) -> None: request.params[self.limit_param] = self.limit -class BaseNextUrlPaginator(BasePaginator): +class BaseReferencePaginator(BasePaginator): + def __init__(self) -> None: + super().__init__() + self.__next_reference: Optional[str] = None + + @property + def _next_reference(self) -> Optional[str]: + return self.__next_reference + + @_next_reference.setter + def _next_reference(self, value: Optional[str]) -> None: + self.__next_reference = value + self._has_next_page = value is not None + + +class BaseNextUrlPaginator(BaseReferencePaginator): def update_request(self, request: Request) -> None: # Handle relative URLs - if self.next_reference: - parsed_url = urlparse(self.next_reference) + if self._next_reference: + parsed_url = urlparse(self._next_reference) if not parsed_url.scheme: - self.next_reference = urljoin(request.url, self.next_reference) + self._next_reference = urljoin(request.url, self._next_reference) - request.url = self.next_reference + request.url = self._next_reference class HeaderLinkPaginator(BaseNextUrlPaginator): @@ -136,7 +141,7 @@ def __init__(self, links_next_key: str = "next") -> None: self.links_next_key = links_next_key def update_state(self, response: Response) -> None: - self.next_reference = response.links.get(self.links_next_key, {}).get("url") + self._next_reference = response.links.get(self.links_next_key, {}).get("url") class JSONResponsePaginator(BaseNextUrlPaginator): @@ -158,10 +163,10 @@ def __init__( def update_state(self, response: Response) -> None: values = jsonpath.find_values(self.next_url_path, response.json()) - self.next_reference = values[0] if values else None + self._next_reference = values[0] if values else None -class JSONResponseCursorPaginator(BasePaginator): +class JSONResponseCursorPaginator(BaseReferencePaginator): """A paginator that uses a cursor query param to paginate. The cursor for the next page is found in the JSON response. """ @@ -182,7 +187,7 @@ def __init__( def update_state(self, response: Response) -> None: values = jsonpath.find_values(self.cursor_path, response.json()) - self.next_reference = values[0] if values else None + self._next_reference = values[0] if values else None def update_request(self, request: Request) -> None: if request.params is None: diff --git a/tests/sources/helpers/rest_client/test_client.py b/tests/sources/helpers/rest_client/test_client.py index 4311026e2e..50defa8edb 100644 --- a/tests/sources/helpers/rest_client/test_client.py +++ b/tests/sources/helpers/rest_client/test_client.py @@ -65,7 +65,8 @@ def test_page_context(self, rest_client: RESTClient) -> None: assert isinstance(page.request, Request) # make request url should be same as next link in paginator if page.paginator.has_next_page: - assert page.paginator.next_reference == page.request.url + paginator = cast(JSONResponsePaginator, page.paginator) + assert paginator._next_reference == page.request.url def test_default_paginator(self, rest_client: RESTClient): pages_iter = rest_client.paginate("/posts") diff --git a/tests/sources/helpers/rest_client/test_paginators.py b/tests/sources/helpers/rest_client/test_paginators.py index 03afb17ca6..042eb6839b 100644 --- a/tests/sources/helpers/rest_client/test_paginators.py +++ b/tests/sources/helpers/rest_client/test_paginators.py @@ -18,7 +18,7 @@ def test_update_state_with_next(self): response = Mock(Response) response.links = {"next": {"url": "http://example.com/next"}} paginator.update_state(response) - assert paginator.next_reference == "http://example.com/next" + assert paginator._next_reference == "http://example.com/next" assert paginator.has_next_page is True def test_update_state_without_next(self): @@ -86,7 +86,7 @@ def test_update_state(self, test_case): paginator = JSONResponsePaginator(next_url_path=next_url_path) response = Mock(Response, json=lambda: test_case["response_json"]) paginator.update_state(response) - assert paginator.next_reference == test_case["expected"]["next_reference"] + assert paginator._next_reference == test_case["expected"]["next_reference"] assert paginator.has_next_page == test_case["expected"]["has_next_page"] # Test update_request from BaseNextUrlPaginator @@ -151,7 +151,7 @@ def test_update_state(self, test_case): ) def test_update_request(self, test_case): paginator = JSONResponsePaginator() - paginator.next_reference = test_case["next_reference"] + paginator._next_reference = test_case["next_reference"] request = Mock(Request, url=test_case["request_url"]) paginator.update_request(request) assert request.url == test_case["expected"]