From 2fd8f21b4569b4b2f96fed16fcf802a5987a6e82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adri=C3=A1n=20Chaves?= Date: Mon, 16 Dec 2024 16:42:17 +0100 Subject: [PATCH] Support the SHUB_APIURL and SHUB_STORAGE environment variables (#177) --- docs/quickstart.rst | 7 +++---- scrapinghub/client/__init__.py | 13 ++++++++----- scrapinghub/hubstorage/client.py | 6 ++++-- scrapinghub/legacy.py | 4 ++-- tests/legacy/test_connection.py | 2 +- 5 files changed, 18 insertions(+), 14 deletions(-) diff --git a/docs/quickstart.rst b/docs/quickstart.rst index 22afcd93..1ea79912 100644 --- a/docs/quickstart.rst +++ b/docs/quickstart.rst @@ -23,15 +23,14 @@ it provides better response time and improved bandwidth usage:: Basic usage ----------- -Instantiate a new client with your Scrapinghub API key:: +Instantiate a new client with your Scrapy Cloud API key:: >>> from scrapinghub import ScrapinghubClient >>> apikey = '84c87545607a4bc0****************' # your API key as a string >>> client = ScrapinghubClient(apikey) -.. note:: - Your Scrapinghub API key is available at https://app.scrapinghub.com/account/apikey - after you sign up with the service. +.. note:: Your Scrapy Cloud API key is available at the bottom of + https://app.zyte.com/o/settings after you sign up. List your deployed projects:: diff --git a/scrapinghub/client/__init__.py b/scrapinghub/client/__init__.py index 80825852..89057af2 100644 --- a/scrapinghub/client/__init__.py +++ b/scrapinghub/client/__init__.py @@ -27,17 +27,20 @@ def request(self, *args, **kwargs): class ScrapinghubClient(object): - """Main class to work with Scrapinghub API. + """Main class to work with the Scrapy Cloud API. - :param auth: (optional) Scrapinghub APIKEY or other SH auth credentials. - If not provided, it will read, respectively, from + :param auth: (optional) Scrapy Cloud API key or other Scrapy Cloud auth + credentials. If not provided, it will read, respectively, from ``SH_APIKEY`` or ``SHUB_JOBAUTH`` environment variables. ``SHUB_JOBAUTH`` is available by default in *Scrapy Cloud*, but it does not provide access to all endpoints (e.g. job scheduling), but it is allowed to access job data, collections, crawl frontier. If you need full access to *Scrapy Cloud* features, you'll need to - provide a Scrapinghub APIKEY through this argument or deploying ``SH_APIKEY``. - :param dash_endpoint: (optional) Scrapinghub Dash panel url. + provide a Scrapy Cloud API key through this argument or deploying + ``SH_APIKEY``. + :param dash_endpoint: (optional) Scrapy Cloud API URL. + If not provided, it will be read from the ``SHUB_APIURL`` environment + variable, or fall back to ``"https://app.zyte.com/api/"``. :param kwargs: (optional) Additional arguments for :class:`~scrapinghub.hubstorage.HubstorageClient` constructor. diff --git a/scrapinghub/hubstorage/client.py b/scrapinghub/hubstorage/client.py index afe5ef85..508c9445 100644 --- a/scrapinghub/hubstorage/client.py +++ b/scrapinghub/hubstorage/client.py @@ -2,6 +2,8 @@ High level Hubstorage client """ import logging +import os + from requests import session, HTTPError, ConnectionError, Timeout from retrying import Retrying from .utils import xauth, urlpathjoin @@ -71,14 +73,14 @@ def __init__(self, auth=None, endpoint=None, connection_timeout=None, Args: auth (str): The client authentication token - endpoint (str): The API root address + endpoint (str, optional): The API root address. If not provided, it will be read from the ``SHUB_STORAGE`` environment variable, or fall back to ``"https://storage.scrapinghub.com/"``. connection_timeout (int): The connection timeout for a _single request_ max_retries (int): The number of time idempotent requests may be retried max_retry_time (int): The time, in seconds, during which the client can retry a request use_msgpack (bool): Flag to enable/disable msgpack use for serialization """ self.auth = xauth(auth) - self.endpoint = endpoint or self.DEFAULT_ENDPOINT + self.endpoint = endpoint or os.getenv("SHUB_STORAGE", self.DEFAULT_ENDPOINT) self.connection_timeout = connection_timeout or self.DEFAULT_CONNECTION_TIMEOUT_S self.user_agent = user_agent or self.DEFAULT_USER_AGENT self.session = self._create_session() diff --git a/scrapinghub/legacy.py b/scrapinghub/legacy.py index 88243469..30789185 100644 --- a/scrapinghub/legacy.py +++ b/scrapinghub/legacy.py @@ -30,7 +30,7 @@ class Connection(object): """Main class to access Scrapinghub API. """ - DEFAULT_ENDPOINT = 'https://app.scrapinghub.com/api/' + DEFAULT_ENDPOINT = 'https://app.zyte.com/api/' API_METHODS = { 'addversion': 'scrapyd/addversion', @@ -66,7 +66,7 @@ def __init__(self, apikey=None, password='', _old_passwd='', warnings.warn("A lot of endpoints support authentication only via apikey.", stacklevel=2) self.apikey = apikey self.password = password or '' - self.url = url or self.DEFAULT_ENDPOINT + self.url = url or os.getenv("SHUB_APIURL", self.DEFAULT_ENDPOINT) self._session = self._create_session() self._connection_timeout = connection_timeout diff --git a/tests/legacy/test_connection.py b/tests/legacy/test_connection.py index f4555be4..b5ec68fe 100644 --- a/tests/legacy/test_connection.py +++ b/tests/legacy/test_connection.py @@ -11,7 +11,7 @@ def test_connection_class_attrs(): - assert Connection.DEFAULT_ENDPOINT == 'https://app.scrapinghub.com/api/' + assert Connection.DEFAULT_ENDPOINT == 'https://app.zyte.com/api/' assert isinstance(Connection.API_METHODS, dict)