From 3e192cd338a1651a28d61cc9be86ba2775544b79 Mon Sep 17 00:00:00 2001 From: Bryan Hilbert Date: Thu, 16 Jan 2025 10:24:26 -0500 Subject: [PATCH 1/4] Switch build_table() to use Django models --- jwql/tests/test_bokeh_dashboard.py | 43 ++++++++++++ jwql/tests/test_data_containers.py | 2 +- jwql/website/apps/jwql/apps.py | 30 ++++++++ jwql/website/apps/jwql/bokeh_dashboard.py | 86 +++++++++++------------ jwql/website/apps/jwql/data_containers.py | 57 ++++++++------- 5 files changed, 145 insertions(+), 73 deletions(-) create mode 100644 jwql/tests/test_bokeh_dashboard.py create mode 100644 jwql/website/apps/jwql/apps.py diff --git a/jwql/tests/test_bokeh_dashboard.py b/jwql/tests/test_bokeh_dashboard.py new file mode 100644 index 000000000..ab672c7e7 --- /dev/null +++ b/jwql/tests/test_bokeh_dashboard.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python + +"""Tests for the ``bokeh_dashboard`` module in the ``jwql`` web +application. + +Authors +------- + + - Bryan Hilbert + +Use +--- + + These tests can be run via the command line (omit the -s to + suppress verbose output to stdout): + + :: + + pytest -s test_bokeh_dashboard.py +""" + +import os + +from django import setup +import pandas as pd +import pytest + +from jwql.utils.constants import DEFAULT_MODEL_CHARFIELD, ON_GITHUB_ACTIONS, ON_READTHEDOCS + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jwql.website.jwql_proj.settings") + +# Skip testing this module if on Github Actions +from jwql.website.apps.jwql import bokeh_dashboard # noqa: E402 (module level import not at top of file) + +if not ON_GITHUB_ACTIONS and not ON_READTHEDOCS: + setup() + + +@pytest.mark.skipif(ON_GITHUB_ACTIONS, reason='Requires access to django models.') +def test_build_table_latest_entry(): + tab = bokeh_dashboard.build_table('FilesystemCharacteristics') + assert isinstance(tab, pd.DataFrame) + assert len(tab['date']) > 0 diff --git a/jwql/tests/test_data_containers.py b/jwql/tests/test_data_containers.py index 4b1f1c4ba..275b7a86d 100644 --- a/jwql/tests/test_data_containers.py +++ b/jwql/tests/test_data_containers.py @@ -48,7 +48,7 @@ @pytest.mark.skipif(ON_GITHUB_ACTIONS, reason='Requires access to django models.') def test_build_table(): - tab = data_containers.build_table('filesystem_general') + tab = data_containers.build_table('FilesystemGeneral') assert isinstance(tab, pd.DataFrame) assert len(tab['date']) > 0 diff --git a/jwql/website/apps/jwql/apps.py b/jwql/website/apps/jwql/apps.py new file mode 100644 index 000000000..d8c347d31 --- /dev/null +++ b/jwql/website/apps/jwql/apps.py @@ -0,0 +1,30 @@ +#! /usr/bin/env python + +""" +apps.py is the standard and recommended way to configure application-specific settings +in Django, including tasks like importing additional modules during initialization. + +Author +------ + +B. Hilbert +""" + +from django.apps import AppConfig + + +class JwqlAppConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'jwql' + + def ready(self): + # Import models not defined in models.py here + # By importing these models here, they will be available + # to the build_table() function. + import jwql.website.apps.jwql.monitor_models.bad_pixel + import jwql.website.apps.jwql.monitor_models.bias + import jwql.website.apps.jwql.monitor_models.claw + import jwql.website.apps.jwql.monitor_models.common + import jwql.website.apps.jwql.monitor_models.dark_current + import jwql.website.apps.jwql.monitor_models.readnoise + import jwql.website.apps.jwql.monitor_models.ta diff --git a/jwql/website/apps/jwql/bokeh_dashboard.py b/jwql/website/apps/jwql/bokeh_dashboard.py index a4469be82..14880c721 100644 --- a/jwql/website/apps/jwql/bokeh_dashboard.py +++ b/jwql/website/apps/jwql/bokeh_dashboard.py @@ -30,6 +30,7 @@ placed in the ``jwql`` directory. """ +from collections import defaultdict from datetime import datetime as dt from math import pi from operator import itemgetter @@ -40,17 +41,30 @@ from bokeh.models.layouts import TabPanel, Tabs from bokeh.plotting import figure from bokeh.transform import cumsum +from django import setup +from django.db.models import OuterRef, Subquery import numpy as np import pandas as pd from sqlalchemy import func, and_ import jwql.database.database_interface as di from jwql.database.database_interface import CentralStore -from jwql.utils.constants import ANOMALY_CHOICES_PER_INSTRUMENT, FILTERS_PER_INSTRUMENT, JWST_INSTRUMENT_NAMES_MIXEDCASE +from jwql.utils.constants import (ANOMALY_CHOICES_PER_INSTRUMENT, + FILTERS_PER_INSTRUMENT, + JWST_INSTRUMENT_NAMES_MIXEDCASE, + ON_GITHUB_ACTIONS, + ON_READTHEDOCS + ) from jwql.utils.utils import get_base_url, get_config -from jwql.website.apps.jwql.data_containers import build_table +from jwql.website.apps.jwql.data_containers import build_table, import_all_models from jwql.website.apps.jwql.models import Anomalies +if not ON_GITHUB_ACTIONS and not ON_READTHEDOCS: + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jwql.website.jwql_proj.settings") + setup() + + from jwql.website.apps.jwql.models import get_model_column_names + def build_table_latest_entry(tablename): """Create Pandas dataframe from the most recent entry of a JWQLDB table. @@ -65,46 +79,29 @@ def build_table_latest_entry(tablename): table_meta_data : pandas.DataFrame Pandas data frame version of JWQL database table. """ - # Make dictionary of tablename : class object - # This matches what the user selects in the select element - # in the webform to the python object on the backend. - tables_of_interest = {} - for item in di.__dict__.keys(): - table = getattr(di, item) - if hasattr(table, '__tablename__'): - tables_of_interest[table.__tablename__] = table - - session, _, _, _ = di.load_connection(get_config()['connection_string']) - table_object = tables_of_interest[tablename] # Select table object - - subq = session.query(table_object.instrument, - func.max(table_object.date).label('maxdate') - ).group_by(table_object.instrument).subquery('t2') - - result = session.query(table_object).join( - subq, - and_( - table_object.instrument == subq.c.instrument, - table_object.date == subq.c.maxdate - ) - ) - - # Turn query result into list of dicts - result_dict = [row.__dict__ for row in result.all()] - column_names = table_object.__table__.columns.keys() - - # Build list of column data based on column name. - data = [] - for column in column_names: - column_data = list(map(itemgetter(column), result_dict)) - data.append(column_data) - - data = dict(zip(column_names, data)) + all_models = import_all_models() + table_object = all_models.get(tablename) + column_names = get_model_column_names(table_object) + + if 'instrument' not in column_names: + raise ValueError(f"No 'instrument' column name in {tablename}. Unable to get latest entry by instrument.") + + # Create a subquery to get the latest date for each instrument + subquery = table_object.objects.filter(instrument=OuterRef('instrument')).order_by('-date').values('date')[:1] + + # Query the model with the subquery + most_recent_entries = table_object.objects.filter(date=Subquery(subquery)) + + # Convert the QuerySet into a dictionary + rows = most_recent_entries.values() + data = defaultdict(list) + + for row in rows: + for key, value in row.items(): + data[key].append(value) # Build table. table_meta_data = pd.DataFrame(data) - - session.close() return table_meta_data @@ -360,7 +357,7 @@ def dashboard_filetype_bar_chart(self): # Make Pandas DF for filesystem_instrument # If time delta exists, filter data based on that. - data = build_table('filesystem_instrument') + data = build_table('FilesystemInstrument') # Keep only the rows containing the most recent timestamp data = data[data['date'] == data['date'].max()] @@ -390,8 +387,7 @@ def dashboard_instrument_pie_chart(self): plot : bokeh.plotting.figure Pie chart figure """ - # Replace with jwql.website.apps.jwql.data_containers.build_table - data = build_table('filesystem_instrument') + data = build_table('FilesystemInstrument') # Keep only the rows containing the most recent timestamp data = data[data['date'] == data['date'].max()] @@ -439,7 +435,7 @@ def dashboard_files_per_day(self): A figure with tabs for each instrument. """ - source = build_table('filesystem_general') + source = build_table('FilesystemGeneral') if not pd.isnull(self.delta_t): source = source[(source['date'] >= self.date - self.delta_t) & (source['date'] <= self.date)] @@ -495,7 +491,7 @@ def dashboard_monitor_tracking(self): Numpy array of column values from monitor table. """ - data = build_table('monitor') + data = build_table('Monitor') if not pd.isnull(self.delta_t): data = data[(data['start_time'] >= self.date - self.delta_t) & (data['start_time'] <= self.date)] @@ -551,7 +547,7 @@ def dashboard_exposure_count_by_filter(self): """ # build_table_latest_query will return only the database entries with the latest date. This should # correspond to one row/entry per instrument - data = build_table_latest_entry('filesystem_characteristics') + data = build_table_latest_entry('FilesystemCharacteristics') # Sort by instrument name so that the order of the tabs will always be the same data = data.sort_values('instrument') diff --git a/jwql/website/apps/jwql/data_containers.py b/jwql/website/apps/jwql/data_containers.py index c8bb57fff..46707069b 100644 --- a/jwql/website/apps/jwql/data_containers.py +++ b/jwql/website/apps/jwql/data_containers.py @@ -33,7 +33,7 @@ import os import re import tempfile -from collections import OrderedDict +from collections import defaultdict, OrderedDict from datetime import datetime from operator import getitem, itemgetter @@ -46,13 +46,12 @@ from astroquery.mast import Mast from bs4 import BeautifulSoup from django import forms, setup +from django.apps import apps from django.conf import settings from django.contrib import messages from django.core.exceptions import ObjectDoesNotExist from django.db.models.query import QuerySet -from jwql.database import database_interface as di -from jwql.database.database_interface import load_connection from jwql.edb.engineering_database import get_mnemonic, get_mnemonic_info, mnemonic_inventory from jwql.utils.constants import ( DEFAULT_MODEL_COMMENT, @@ -83,6 +82,7 @@ get_rootnames_for_instrument_proposal, ) + # Increase the limit on the number of entries that can be returned by # a MAST query. Mast._portal_api_connection.PAGESIZE = MAST_QUERY_LIMIT @@ -96,7 +96,7 @@ os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jwql.website.jwql_proj.settings") setup() - from jwql.website.apps.jwql.models import Anomalies, Observation, Proposal, RootFileInfo + from jwql.website.apps.jwql.models import Anomalies, get_model_column_names, Observation, Proposal, RootFileInfo from .forms import ( InstrumentAnomalySubmitForm, @@ -139,36 +139,23 @@ def build_table(tablename): table_meta_data : pandas.DataFrame Pandas data frame version of JWQL database table. """ - # Make dictionary of tablename : class object - # This matches what the user selects in the select element - # in the webform to the python object on the backend. - tables_of_interest = {} - for item in di.__dict__.keys(): - table = getattr(di, item) - if hasattr(table, '__tablename__'): - tables_of_interest[table.__tablename__] = table - - session, _, _, _ = load_connection(get_config()['connection_string']) - table_object = tables_of_interest[tablename] # Select table object - - result = session.query(table_object) + all_models = import_all_models() + table_object = all_models.get(tablename) - # Turn query result into list of dicts - result_dict = [row.__dict__ for row in result.all()] - column_names = table_object.__table__.columns.keys() + result = table_object.objects.all() + column_names = get_model_column_names(table_object) - # Build list of column data based on column name. - data = [] - for column in column_names: - column_data = list(map(itemgetter(column), result_dict)) - data.append(column_data) + # Convert the QuerySet into a dictionary + rows = result.values() + data = defaultdict(list) - data = dict(zip(column_names, data)) + for row in rows: + for key, value in row.items(): + data[key].append(value) # Build table. table_meta_data = pd.DataFrame(data) - session.close() return table_meta_data @@ -1970,6 +1957,22 @@ def get_thumbnail_by_rootname(rootname): return thumbnail_basename +def import_all_models(): + """ + Dynamically import and return all Django models as a dictionary. + Keys are model names (as strings), and values are model classes. + + Returns + ------- + models : dict + Keys are model names, values are model classes + """ + models = {} + for model in apps.get_app_config('jwql').get_models(): + models[model.__name__] = model + return models + + def log_into_mast(request): """Login via astroquery.mast if user authenticated in web app. From e91b1db3bd11dfbfdd570d9b09fdbd6e14d8ba63 Mon Sep 17 00:00:00 2001 From: Bryan Hilbert Date: Thu, 16 Jan 2025 11:25:37 -0500 Subject: [PATCH 2/4] Replace bokeh_dashboard sqlalchemy queries with model queries --- jwql/website/apps/jwql/bokeh_dashboard.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/jwql/website/apps/jwql/bokeh_dashboard.py b/jwql/website/apps/jwql/bokeh_dashboard.py index 14880c721..2266055a1 100644 --- a/jwql/website/apps/jwql/bokeh_dashboard.py +++ b/jwql/website/apps/jwql/bokeh_dashboard.py @@ -49,6 +49,7 @@ import jwql.database.database_interface as di from jwql.database.database_interface import CentralStore +from jwql.website.apps.jwql.monitor_models.common import CentralStorage from jwql.utils.constants import (ANOMALY_CHOICES_PER_INSTRUMENT, FILTERS_PER_INSTRUMENT, JWST_INSTRUMENT_NAMES_MIXEDCASE, @@ -174,16 +175,12 @@ def dashboard_disk_usage(self): # server disk information. config = get_config() - log_data = di.session.query(CentralStore.date, CentralStore.size, CentralStore.available) \ - .filter(CentralStore.area == 'logs') \ - .all() + log_data = list(CentralStorage.objects.filter(area='logs').values('date', 'size', 'available')) # Convert to dataframe log_data = pd.DataFrame(log_data) - preview_data = di.session.query(CentralStore.date, CentralStore.size, CentralStore.available) \ - .filter(CentralStore.area == 'preview_images') \ - .all() + preview_data = list(CentralStorage.objects.filter(area='preview_images').values('date', 'size', 'available')) # Convert to dataframe preview_data = pd.DataFrame(preview_data) @@ -244,7 +241,6 @@ def dashboard_disk_usage(self): tabs = Tabs(tabs=tabs) - di.session.close() return tabs def dashboard_central_store_data_volume(self): @@ -274,7 +270,7 @@ def dashboard_central_store_data_volume(self): for area, color in zip(arealist, colors): # Query for used sizes - results = di.session.query(CentralStore.date, CentralStore.used).filter(CentralStore.area == area).all() + results = list(CentralStorage.objects.filter(area=area).values('date', 'used')) if results: # Convert to dataframe @@ -311,7 +307,7 @@ def dashboard_central_store_data_volume(self): x_axis_label='Date', y_axis_label='Disk Space (TB)') - cen_store_results = di.session.query(CentralStore.date, CentralStore.used).filter(CentralStore.area == 'all').all() + cen_store_results = list(CentralStorage.objects.filter(area='all').values('date', 'used')) # Group by date if cen_store_results: @@ -343,7 +339,6 @@ def dashboard_central_store_data_volume(self): hover_tool.formatters = {'@date': 'datetime'} cen_store_plot.tools.append(hover_tool) - di.session.close() return plot, cen_store_plot def dashboard_filetype_bar_chart(self): From 1ad2d84b8971c7f2c477840cf9c012063f7f2e50 Mon Sep 17 00:00:00 2001 From: Bryan Hilbert Date: Thu, 16 Jan 2025 11:34:13 -0500 Subject: [PATCH 3/4] PEP8 --- jwql/website/apps/jwql/bokeh_dashboard.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jwql/website/apps/jwql/bokeh_dashboard.py b/jwql/website/apps/jwql/bokeh_dashboard.py index 2266055a1..9261a9f6c 100644 --- a/jwql/website/apps/jwql/bokeh_dashboard.py +++ b/jwql/website/apps/jwql/bokeh_dashboard.py @@ -219,7 +219,7 @@ def dashboard_disk_usage(self): y_axis_label='Disk Space (TB)') plots[data['shortname']].line(x='date', y='available', source=source, legend_label='Available', line_dash='dashed', line_color='#C85108', line_width=3) - plots[data['shortname']].circle(x='date', y='available', source=source,color='#C85108', radius=5, radius_dimension='y', radius_units='screen') + plots[data['shortname']].circle(x='date', y='available', source=source, color='#C85108', radius=5, radius_dimension='y', radius_units='screen') plots[data['shortname']].line(x='date', y='used', source=source, legend_label='Used', line_dash='dashed', line_color='#355C7D', line_width=3) plots[data['shortname']].circle(x='date', y='used', source=source, color='#355C7D', radius=5, radius_dimension='y', radius_units='screen') From bd3175c6b432c5a1ad4300a427ace093bf7f26e7 Mon Sep 17 00:00:00 2001 From: Bryan Hilbert Date: Thu, 16 Jan 2025 11:41:53 -0500 Subject: [PATCH 4/4] Skip test if on github --- jwql/tests/test_bokeh_dashboard.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/jwql/tests/test_bokeh_dashboard.py b/jwql/tests/test_bokeh_dashboard.py index ab672c7e7..e2d87d043 100644 --- a/jwql/tests/test_bokeh_dashboard.py +++ b/jwql/tests/test_bokeh_dashboard.py @@ -27,13 +27,11 @@ from jwql.utils.constants import DEFAULT_MODEL_CHARFIELD, ON_GITHUB_ACTIONS, ON_READTHEDOCS -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jwql.website.jwql_proj.settings") - # Skip testing this module if on Github Actions -from jwql.website.apps.jwql import bokeh_dashboard # noqa: E402 (module level import not at top of file) - if not ON_GITHUB_ACTIONS and not ON_READTHEDOCS: + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jwql.website.jwql_proj.settings") setup() + from jwql.website.apps.jwql import bokeh_dashboard # noqa: E402 (module level import not at top of file) @pytest.mark.skipif(ON_GITHUB_ACTIONS, reason='Requires access to django models.')