Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update data_containers and bokeh_dashboard to use Django models #1668

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions jwql/tests/test_bokeh_dashboard.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env python

"""Tests for the ``bokeh_dashboard`` module in the ``jwql`` web
application.

Authors
-------

- Bryan Hilbert

Use
---

These tests can be run via the command line (omit the -s to
suppress verbose output to stdout):

::

pytest -s test_bokeh_dashboard.py
"""

import os

from django import setup
import pandas as pd
import pytest

from jwql.utils.constants import DEFAULT_MODEL_CHARFIELD, ON_GITHUB_ACTIONS, ON_READTHEDOCS

# Skip testing this module if on Github Actions
if not ON_GITHUB_ACTIONS and not ON_READTHEDOCS:
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jwql.website.jwql_proj.settings")
setup()
from jwql.website.apps.jwql import bokeh_dashboard # noqa: E402 (module level import not at top of file)


@pytest.mark.skipif(ON_GITHUB_ACTIONS, reason='Requires access to django models.')
def test_build_table_latest_entry():
tab = bokeh_dashboard.build_table('FilesystemCharacteristics')
assert isinstance(tab, pd.DataFrame)
assert len(tab['date']) > 0
2 changes: 1 addition & 1 deletion jwql/tests/test_data_containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@

@pytest.mark.skipif(ON_GITHUB_ACTIONS, reason='Requires access to django models.')
def test_build_table():
tab = data_containers.build_table('filesystem_general')
tab = data_containers.build_table('FilesystemGeneral')
assert isinstance(tab, pd.DataFrame)
assert len(tab['date']) > 0

Expand Down
30 changes: 30 additions & 0 deletions jwql/website/apps/jwql/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#! /usr/bin/env python

"""
apps.py is the standard and recommended way to configure application-specific settings
in Django, including tasks like importing additional modules during initialization.

Author
------

B. Hilbert
"""

from django.apps import AppConfig


class JwqlAppConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'jwql'

def ready(self):
# Import models not defined in models.py here
# By importing these models here, they will be available
# to the build_table() function.
import jwql.website.apps.jwql.monitor_models.bad_pixel
import jwql.website.apps.jwql.monitor_models.bias
import jwql.website.apps.jwql.monitor_models.claw
import jwql.website.apps.jwql.monitor_models.common
import jwql.website.apps.jwql.monitor_models.dark_current
import jwql.website.apps.jwql.monitor_models.readnoise
import jwql.website.apps.jwql.monitor_models.ta
103 changes: 47 additions & 56 deletions jwql/website/apps/jwql/bokeh_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
placed in the ``jwql`` directory.
"""

from collections import defaultdict
from datetime import datetime as dt
from math import pi
from operator import itemgetter
Expand All @@ -40,17 +41,31 @@
from bokeh.models.layouts import TabPanel, Tabs
from bokeh.plotting import figure
from bokeh.transform import cumsum
from django import setup
from django.db.models import OuterRef, Subquery
import numpy as np
import pandas as pd
from sqlalchemy import func, and_

import jwql.database.database_interface as di
from jwql.database.database_interface import CentralStore
from jwql.utils.constants import ANOMALY_CHOICES_PER_INSTRUMENT, FILTERS_PER_INSTRUMENT, JWST_INSTRUMENT_NAMES_MIXEDCASE
from jwql.website.apps.jwql.monitor_models.common import CentralStorage
from jwql.utils.constants import (ANOMALY_CHOICES_PER_INSTRUMENT,
FILTERS_PER_INSTRUMENT,
JWST_INSTRUMENT_NAMES_MIXEDCASE,
ON_GITHUB_ACTIONS,
ON_READTHEDOCS
)
from jwql.utils.utils import get_base_url, get_config
from jwql.website.apps.jwql.data_containers import build_table
from jwql.website.apps.jwql.data_containers import build_table, import_all_models
from jwql.website.apps.jwql.models import Anomalies

if not ON_GITHUB_ACTIONS and not ON_READTHEDOCS:
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jwql.website.jwql_proj.settings")
setup()

from jwql.website.apps.jwql.models import get_model_column_names


def build_table_latest_entry(tablename):
"""Create Pandas dataframe from the most recent entry of a JWQLDB table.
Expand All @@ -65,46 +80,29 @@ def build_table_latest_entry(tablename):
table_meta_data : pandas.DataFrame
Pandas data frame version of JWQL database table.
"""
# Make dictionary of tablename : class object
# This matches what the user selects in the select element
# in the webform to the python object on the backend.
tables_of_interest = {}
for item in di.__dict__.keys():
table = getattr(di, item)
if hasattr(table, '__tablename__'):
tables_of_interest[table.__tablename__] = table

session, _, _, _ = di.load_connection(get_config()['connection_string'])
table_object = tables_of_interest[tablename] # Select table object

subq = session.query(table_object.instrument,
func.max(table_object.date).label('maxdate')
).group_by(table_object.instrument).subquery('t2')

result = session.query(table_object).join(
subq,
and_(
table_object.instrument == subq.c.instrument,
table_object.date == subq.c.maxdate
)
)

# Turn query result into list of dicts
result_dict = [row.__dict__ for row in result.all()]
column_names = table_object.__table__.columns.keys()

# Build list of column data based on column name.
data = []
for column in column_names:
column_data = list(map(itemgetter(column), result_dict))
data.append(column_data)

data = dict(zip(column_names, data))
all_models = import_all_models()
table_object = all_models.get(tablename)
column_names = get_model_column_names(table_object)

if 'instrument' not in column_names:
raise ValueError(f"No 'instrument' column name in {tablename}. Unable to get latest entry by instrument.")

# Create a subquery to get the latest date for each instrument
subquery = table_object.objects.filter(instrument=OuterRef('instrument')).order_by('-date').values('date')[:1]

# Query the model with the subquery
most_recent_entries = table_object.objects.filter(date=Subquery(subquery))

# Convert the QuerySet into a dictionary
rows = most_recent_entries.values()
data = defaultdict(list)

for row in rows:
for key, value in row.items():
data[key].append(value)

# Build table.
table_meta_data = pd.DataFrame(data)

session.close()
return table_meta_data


Expand Down Expand Up @@ -177,16 +175,12 @@ def dashboard_disk_usage(self):
# server disk information.
config = get_config()

log_data = di.session.query(CentralStore.date, CentralStore.size, CentralStore.available) \
.filter(CentralStore.area == 'logs') \
.all()
log_data = list(CentralStorage.objects.filter(area='logs').values('date', 'size', 'available'))

# Convert to dataframe
log_data = pd.DataFrame(log_data)

preview_data = di.session.query(CentralStore.date, CentralStore.size, CentralStore.available) \
.filter(CentralStore.area == 'preview_images') \
.all()
preview_data = list(CentralStorage.objects.filter(area='preview_images').values('date', 'size', 'available'))

# Convert to dataframe
preview_data = pd.DataFrame(preview_data)
Expand Down Expand Up @@ -225,7 +219,7 @@ def dashboard_disk_usage(self):
y_axis_label='Disk Space (TB)')

plots[data['shortname']].line(x='date', y='available', source=source, legend_label='Available', line_dash='dashed', line_color='#C85108', line_width=3)
plots[data['shortname']].circle(x='date', y='available', source=source,color='#C85108', radius=5, radius_dimension='y', radius_units='screen')
plots[data['shortname']].circle(x='date', y='available', source=source, color='#C85108', radius=5, radius_dimension='y', radius_units='screen')
plots[data['shortname']].line(x='date', y='used', source=source, legend_label='Used', line_dash='dashed', line_color='#355C7D', line_width=3)
plots[data['shortname']].circle(x='date', y='used', source=source, color='#355C7D', radius=5, radius_dimension='y', radius_units='screen')

Expand All @@ -247,7 +241,6 @@ def dashboard_disk_usage(self):

tabs = Tabs(tabs=tabs)

di.session.close()
return tabs

def dashboard_central_store_data_volume(self):
Expand Down Expand Up @@ -277,7 +270,7 @@ def dashboard_central_store_data_volume(self):
for area, color in zip(arealist, colors):

# Query for used sizes
results = di.session.query(CentralStore.date, CentralStore.used).filter(CentralStore.area == area).all()
results = list(CentralStorage.objects.filter(area=area).values('date', 'used'))

if results:
# Convert to dataframe
Expand Down Expand Up @@ -314,7 +307,7 @@ def dashboard_central_store_data_volume(self):
x_axis_label='Date',
y_axis_label='Disk Space (TB)')

cen_store_results = di.session.query(CentralStore.date, CentralStore.used).filter(CentralStore.area == 'all').all()
cen_store_results = list(CentralStorage.objects.filter(area='all').values('date', 'used'))

# Group by date
if cen_store_results:
Expand Down Expand Up @@ -346,7 +339,6 @@ def dashboard_central_store_data_volume(self):
hover_tool.formatters = {'@date': 'datetime'}
cen_store_plot.tools.append(hover_tool)

di.session.close()
return plot, cen_store_plot

def dashboard_filetype_bar_chart(self):
Expand All @@ -360,7 +352,7 @@ def dashboard_filetype_bar_chart(self):

# Make Pandas DF for filesystem_instrument
# If time delta exists, filter data based on that.
data = build_table('filesystem_instrument')
data = build_table('FilesystemInstrument')

# Keep only the rows containing the most recent timestamp
data = data[data['date'] == data['date'].max()]
Expand Down Expand Up @@ -390,8 +382,7 @@ def dashboard_instrument_pie_chart(self):
plot : bokeh.plotting.figure
Pie chart figure
"""
# Replace with jwql.website.apps.jwql.data_containers.build_table
data = build_table('filesystem_instrument')
data = build_table('FilesystemInstrument')

# Keep only the rows containing the most recent timestamp
data = data[data['date'] == data['date'].max()]
Expand Down Expand Up @@ -439,7 +430,7 @@ def dashboard_files_per_day(self):
A figure with tabs for each instrument.
"""

source = build_table('filesystem_general')
source = build_table('FilesystemGeneral')
if not pd.isnull(self.delta_t):
source = source[(source['date'] >= self.date - self.delta_t) & (source['date'] <= self.date)]

Expand Down Expand Up @@ -495,7 +486,7 @@ def dashboard_monitor_tracking(self):
Numpy array of column values from monitor table.
"""

data = build_table('monitor')
data = build_table('Monitor')

if not pd.isnull(self.delta_t):
data = data[(data['start_time'] >= self.date - self.delta_t) & (data['start_time'] <= self.date)]
Expand Down Expand Up @@ -551,7 +542,7 @@ def dashboard_exposure_count_by_filter(self):
"""
# build_table_latest_query will return only the database entries with the latest date. This should
# correspond to one row/entry per instrument
data = build_table_latest_entry('filesystem_characteristics')
data = build_table_latest_entry('FilesystemCharacteristics')

# Sort by instrument name so that the order of the tabs will always be the same
data = data.sort_values('instrument')
Expand Down
Loading
Loading