Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update erddapy to v2 #1254

Merged
merged 2 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 48 additions & 58 deletions app/deployments/tasks/error_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,9 @@
from datetime import timedelta
from http import HTTPStatus

import requests
from django.conf import settings
import pandas as pd
from django.utils import timezone
from requests import HTTPError

try:
from pandas.core.indexes.period import DateParseError, parse_time_string
except ImportError:
from pandas._libs.tslibs.parsing import DateParseError, parse_time_string
from httpx import HTTPError, HTTPStatusError

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -57,7 +51,7 @@ def handle_500_time_range_error(timeseries_group, compare_text: str) -> bool:
"""
if "is outside of the variable" in compare_text:
try:
times_str = compare_text.rpartition("actual_range:")[-1].rpartition(")")[0]
times_str = compare_text.rpartition("actual_range: ")[-1].rpartition(")")[0]
except (AttributeError, IndexError) as e:
logger.error(
(
Expand All @@ -70,11 +64,11 @@ def handle_500_time_range_error(timeseries_group, compare_text: str) -> bool:
return False

times = []
for potential_time in times_str.split(" "):
for potential_time in times_str.split(" to "):
try:
time = parse_time_string(potential_time)
times.append(time[0])
except (DateParseError, ValueError):
time = pd.to_datetime(potential_time)
times.append(time)
except ValueError:
pass
times.sort(reverse=True)

Expand Down Expand Up @@ -159,6 +153,9 @@ def handle_500_errors(timeseries_group, compare_text: str) -> bool:
if handle_500_variable_actual_range_error(timeseries_group, compare_text):
return True

if handle_400_unrecognized_variable(timeseries_group, compare_text):
return True

return handle_500_unrecognized_constraint(timeseries_group, compare_text)


Expand Down Expand Up @@ -292,64 +289,57 @@ def handle_404_no_matching_dataset_id(timeseries_group, compare_text: str) -> bo

def handle_http_errors(timeseries_group, error: HTTPError) -> bool: # noqa: PLR0911
"""Handle various types of HTTPErrors. Returns True if handled"""
try:
if error.response.status_code == HTTPStatus.FORBIDDEN:
logger.error(
(
f"403 error loading dataset {timeseries_group[0].dataset.name}. "
"NOAA Coastwatch most likely blacklisted us. "
"Try running the request manually from the worker pod to "
f"replicate the error and access the returned text. {error}"
),
extra=error_extra(timeseries_group),
exc_info=True,
)
return True

if error.response.status_code == HTTPStatus.NOT_FOUND:
logger.error(
(
f"No rows found for {timeseries_group[0].dataset.name} "
f"with constraint {timeseries_group[0].constraints}: {error}"
),
extra=error_extra(timeseries_group),
exc_info=True,
)
return True
if isinstance(error.__cause__, HTTPStatusError):
try:
if error.__cause__.response.status_code == HTTPStatus.FORBIDDEN:
logger.error(
(
f"403 error loading dataset {timeseries_group[0].dataset.name}. "
"NOAA Coastwatch most likely blacklisted us. "
"Try running the request manually from the worker pod to "
f"replicate the error and access the returned text. {error}"
),
extra=error_extra(timeseries_group),
exc_info=True,
)
return True

if error.response.status_code == HTTPStatus.REQUEST_TIMEOUT:
raise BackoffError("408 Backoff encountered") from error
if error.__cause__.response.status_code == HTTPStatus.NOT_FOUND and handle_404_errors(
timeseries_group,
error.__cause__.response.text,
):
return True

if error.response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR:
url = error.request.url
if error.__cause__.response.status_code == HTTPStatus.REQUEST_TIMEOUT:
raise BackoffError("408 Backoff encountered") from error

response_500 = requests.get(url, timeout=settings.ERDDAP_TIMEOUT_SECONDS)
if error.__cause__.response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR:
if handle_500_errors(timeseries_group, error.__cause__.response.text):
return True

if handle_500_errors(timeseries_group, response_500.text):
logger.error(
(
f"500 error loading dataset {timeseries_group[0].dataset.name} "
f"with constraint {timeseries_group[0].constraints}: {error} "
),
extra=error_extra(timeseries_group, error.__cause__.response.text),
exc_info=True,
)
return True

logger.error(
(
f"500 error loading dataset {timeseries_group[0].dataset.name} "
f"with constraint {timeseries_group[0].constraints}: {error} "
f"{error.response.status_code} error loading dataset "
+ timeseries_group[0].dataset.name
+ f" with constraint {timeseries_group[0].constraints}: {error}"
),
extra=error_extra(timeseries_group, response_500.text),
extra=error_extra(timeseries_group),
exc_info=True,
)
return True

logger.error(
(
f"{error.response.status_code} error loading dataset {timeseries_group[0].dataset.name}"
f" with constraint {timeseries_group[0].constraints}: {error}"
),
extra=error_extra(timeseries_group),
exc_info=True,
)
return True

except AttributeError:
pass
except AttributeError:
pass

if handle_400_errors(timeseries_group, str(error), error):
return True
Expand Down
26 changes: 12 additions & 14 deletions app/deployments/tasks/refresh.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
import logging
import time

import pandas as pd
from celery import shared_task
from django.utils import timezone
from pandas import Timedelta
from requests import HTTPError, Timeout
from sentry_sdk import push_scope
from httpx import HTTPError, TimeoutException

try:
from pandas.core.indexes.period import parse_time_string
except ImportError:
from pandas._libs.tslibs.parsing import parse_time_string
# from requests import HTTPError, Timeout
from sentry_sdk import push_scope

from deployments.models import ErddapDataset, ErddapServer, TimeSeries
from deployments.utils.erddap_datasets import filter_dataframe, retrieve_dataframe
Expand All @@ -37,16 +34,16 @@ def update_values_for_timeseries(timeseries: list[TimeSeries]):
timeseries,
)

except HTTPError as error:
if handle_http_errors(timeseries, error):
return

except Timeout as error:
except TimeoutException as error:
raise BackoffError(
f"Timeout when trying to retrieve dataset {timeseries[0].dataset.name} "
f"with constraint {timeseries[0].constraints}: {error}",
) from error

except HTTPError as error:
if handle_http_errors(timeseries, error):
return

except OSError as error:
logger.error(
(
Expand Down Expand Up @@ -92,7 +89,7 @@ def update_values_for_timeseries(timeseries: list[TimeSeries]):
extra_context["variable"] = series.variable
extra_context["value"] = value

if isinstance(value, Timedelta):
if isinstance(value, pd.Timedelta):
logger.info("Converting from Timedelta to seconds")
value = value.seconds

Expand All @@ -101,7 +98,7 @@ def update_values_for_timeseries(timeseries: list[TimeSeries]):
time = row["time (UTC)"]
extra_context["time"] = time

series.value_time = parse_time_string(time)[0]
series.value_time = pd.to_datetime(time)
series.save()

try:
Expand All @@ -113,6 +110,7 @@ def update_values_for_timeseries(timeseries: list[TimeSeries]):
extra=extra_context,
exc_info=True,
)
continue
except TypeError as error:
logger.error(
f"Could not save {series.variable} from {row}: {error}",
Expand Down
5 changes: 3 additions & 2 deletions app/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,15 @@ dependencies = [
"django-redis~=5.4",
"djangorestframework~=3.15",
"djangorestframework-gis~=1.0",
"erddapy==1.2.1",
"erddapy>1.2.1",
"flower~=2.0.1",
"freezegun~=1.5.1",
"geojson~=3.1.0",
"netcdf4~=1.7.1",
"pandas~=1.5.3",
"pandas>1.5.3",
"psycopg2-binary~=2.9.9",
"pystac~=1.11.0",
"requests>=2.32.3",
"scipy>=1.14.1",
"sentry-sdk~=2.18.0",
"slack-sdk~=3.33.0",
Expand Down
Loading
Loading