Skip to content

Commit

Permalink
Merge pull request #1254 from gulfofmaine/erddapy-2
Browse files Browse the repository at this point in the history
Update erddapy to v2
  • Loading branch information
abkfenris authored Nov 25, 2024
2 parents f77e8bc + 1ff8c34 commit a20ebc2
Show file tree
Hide file tree
Showing 5 changed files with 577 additions and 428 deletions.
106 changes: 48 additions & 58 deletions app/deployments/tasks/error_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,9 @@
from datetime import timedelta
from http import HTTPStatus

import requests
from django.conf import settings
import pandas as pd
from django.utils import timezone
from requests import HTTPError

try:
from pandas.core.indexes.period import DateParseError, parse_time_string
except ImportError:
from pandas._libs.tslibs.parsing import DateParseError, parse_time_string
from httpx import HTTPError, HTTPStatusError

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -57,7 +51,7 @@ def handle_500_time_range_error(timeseries_group, compare_text: str) -> bool:
"""
if "is outside of the variable" in compare_text:
try:
times_str = compare_text.rpartition("actual_range:")[-1].rpartition(")")[0]
times_str = compare_text.rpartition("actual_range: ")[-1].rpartition(")")[0]
except (AttributeError, IndexError) as e:
logger.error(
(
Expand All @@ -70,11 +64,11 @@ def handle_500_time_range_error(timeseries_group, compare_text: str) -> bool:
return False

times = []
for potential_time in times_str.split(" "):
for potential_time in times_str.split(" to "):
try:
time = parse_time_string(potential_time)
times.append(time[0])
except (DateParseError, ValueError):
time = pd.to_datetime(potential_time)
times.append(time)
except ValueError:
pass
times.sort(reverse=True)

Expand Down Expand Up @@ -159,6 +153,9 @@ def handle_500_errors(timeseries_group, compare_text: str) -> bool:
if handle_500_variable_actual_range_error(timeseries_group, compare_text):
return True

if handle_400_unrecognized_variable(timeseries_group, compare_text):
return True

return handle_500_unrecognized_constraint(timeseries_group, compare_text)


Expand Down Expand Up @@ -292,64 +289,57 @@ def handle_404_no_matching_dataset_id(timeseries_group, compare_text: str) -> bo

def handle_http_errors(timeseries_group, error: HTTPError) -> bool: # noqa: PLR0911
"""Handle various types of HTTPErrors. Returns True if handled"""
try:
if error.response.status_code == HTTPStatus.FORBIDDEN:
logger.error(
(
f"403 error loading dataset {timeseries_group[0].dataset.name}. "
"NOAA Coastwatch most likely blacklisted us. "
"Try running the request manually from the worker pod to "
f"replicate the error and access the returned text. {error}"
),
extra=error_extra(timeseries_group),
exc_info=True,
)
return True

if error.response.status_code == HTTPStatus.NOT_FOUND:
logger.error(
(
f"No rows found for {timeseries_group[0].dataset.name} "
f"with constraint {timeseries_group[0].constraints}: {error}"
),
extra=error_extra(timeseries_group),
exc_info=True,
)
return True
if isinstance(error.__cause__, HTTPStatusError):
try:
if error.__cause__.response.status_code == HTTPStatus.FORBIDDEN:
logger.error(
(
f"403 error loading dataset {timeseries_group[0].dataset.name}. "
"NOAA Coastwatch most likely blacklisted us. "
"Try running the request manually from the worker pod to "
f"replicate the error and access the returned text. {error}"
),
extra=error_extra(timeseries_group),
exc_info=True,
)
return True

if error.response.status_code == HTTPStatus.REQUEST_TIMEOUT:
raise BackoffError("408 Backoff encountered") from error
if error.__cause__.response.status_code == HTTPStatus.NOT_FOUND and handle_404_errors(
timeseries_group,
error.__cause__.response.text,
):
return True

if error.response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR:
url = error.request.url
if error.__cause__.response.status_code == HTTPStatus.REQUEST_TIMEOUT:
raise BackoffError("408 Backoff encountered") from error

response_500 = requests.get(url, timeout=settings.ERDDAP_TIMEOUT_SECONDS)
if error.__cause__.response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR:
if handle_500_errors(timeseries_group, error.__cause__.response.text):
return True

if handle_500_errors(timeseries_group, response_500.text):
logger.error(
(
f"500 error loading dataset {timeseries_group[0].dataset.name} "
f"with constraint {timeseries_group[0].constraints}: {error} "
),
extra=error_extra(timeseries_group, error.__cause__.response.text),
exc_info=True,
)
return True

logger.error(
(
f"500 error loading dataset {timeseries_group[0].dataset.name} "
f"with constraint {timeseries_group[0].constraints}: {error} "
f"{error.response.status_code} error loading dataset "
+ timeseries_group[0].dataset.name
+ f" with constraint {timeseries_group[0].constraints}: {error}"
),
extra=error_extra(timeseries_group, response_500.text),
extra=error_extra(timeseries_group),
exc_info=True,
)
return True

logger.error(
(
f"{error.response.status_code} error loading dataset {timeseries_group[0].dataset.name}"
f" with constraint {timeseries_group[0].constraints}: {error}"
),
extra=error_extra(timeseries_group),
exc_info=True,
)
return True

except AttributeError:
pass
except AttributeError:
pass

if handle_400_errors(timeseries_group, str(error), error):
return True
Expand Down
26 changes: 12 additions & 14 deletions app/deployments/tasks/refresh.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
import logging
import time

import pandas as pd
from celery import shared_task
from django.utils import timezone
from pandas import Timedelta
from requests import HTTPError, Timeout
from sentry_sdk import push_scope
from httpx import HTTPError, TimeoutException

try:
from pandas.core.indexes.period import parse_time_string
except ImportError:
from pandas._libs.tslibs.parsing import parse_time_string
# from requests import HTTPError, Timeout
from sentry_sdk import push_scope

from deployments.models import ErddapDataset, ErddapServer, TimeSeries
from deployments.utils.erddap_datasets import filter_dataframe, retrieve_dataframe
Expand All @@ -37,16 +34,16 @@ def update_values_for_timeseries(timeseries: list[TimeSeries]):
timeseries,
)

except HTTPError as error:
if handle_http_errors(timeseries, error):
return

except Timeout as error:
except TimeoutException as error:
raise BackoffError(
f"Timeout when trying to retrieve dataset {timeseries[0].dataset.name} "
f"with constraint {timeseries[0].constraints}: {error}",
) from error

except HTTPError as error:
if handle_http_errors(timeseries, error):
return

except OSError as error:
logger.error(
(
Expand Down Expand Up @@ -92,7 +89,7 @@ def update_values_for_timeseries(timeseries: list[TimeSeries]):
extra_context["variable"] = series.variable
extra_context["value"] = value

if isinstance(value, Timedelta):
if isinstance(value, pd.Timedelta):
logger.info("Converting from Timedelta to seconds")
value = value.seconds

Expand All @@ -101,7 +98,7 @@ def update_values_for_timeseries(timeseries: list[TimeSeries]):
time = row["time (UTC)"]
extra_context["time"] = time

series.value_time = parse_time_string(time)[0]
series.value_time = pd.to_datetime(time)
series.save()

try:
Expand All @@ -113,6 +110,7 @@ def update_values_for_timeseries(timeseries: list[TimeSeries]):
extra=extra_context,
exc_info=True,
)
continue
except TypeError as error:
logger.error(
f"Could not save {series.variable} from {row}: {error}",
Expand Down
5 changes: 3 additions & 2 deletions app/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,15 @@ dependencies = [
"django-redis~=5.4",
"djangorestframework~=3.15",
"djangorestframework-gis~=1.0",
"erddapy==1.2.1",
"erddapy>1.2.1",
"flower~=2.0.1",
"freezegun~=1.5.1",
"geojson~=3.1.0",
"netcdf4~=1.7.1",
"pandas~=1.5.3",
"pandas>1.5.3",
"psycopg2-binary~=2.9.9",
"pystac~=1.11.0",
"requests>=2.32.3",
"scipy>=1.14.1",
"sentry-sdk~=2.18.0",
"slack-sdk~=3.33.0",
Expand Down
Loading

0 comments on commit a20ebc2

Please sign in to comment.