Skip to content

Commit

Permalink
initial version #240
Browse files Browse the repository at this point in the history
  • Loading branch information
TatianaBurek committed Feb 27, 2023
1 parent 0937fd1 commit cbdba40
Show file tree
Hide file tree
Showing 8 changed files with 1,055 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# ============================*
# ** Copyright UCAR (c) 2023
# ** University Corporation for Atmospheric Research (UCAR)
# ** National Center for Atmospheric Research (NCAR)
# ** Research Applications Lab (RAL)
# ** P.O.Box 3000, Boulder, Colorado, 80307-3000, USA
# ============================*


"""
Class Name: TcmprSeriesLineMedian
"""

from typing import Union

import metcalcpy.util.utils as utils
from plots.tcmpr_plots.tcmpr_series import TcmprSeries
from plots.tcmpr_plots.tcmpr_util import get_median_ci


class TcmprSeriesLineMedian(TcmprSeries):
"""
Represents a Box plot series object
of data points and their plotting style
elements (line colors, etc.)
"""

def __init__(self, config, idx: int, input_data, series_list: list,
series_name: Union[list, tuple]):

super().__init__(config, idx, input_data, series_list, series_name)

def _create_series_points(self) -> dict:
"""
Subset the data for the appropriate series.
Calculate values for each point including CI
Args:
Returns:
dictionary with CI ,point values and number of stats as keys
"""

self._init_series_data()

series_points_results = {'val': [], 'ncl': [], 'ncu': [], 'nstat': [], 'mean': []}

# for each point calculate plot statistic
for indy in self.config.indy_vals:
if utils.is_string_integer(indy):
indy = int(indy)
elif utils.is_string_strictly_float(indy):
indy = float(indy)
point_data = self.series_data.loc[
(self.series_data['LEAD_HR'] == indy)]
point_data = point_data.sort_values(by=['CASE'])

ci_data = get_median_ci(point_data['PLOT'].tolist(), self.config.alpha, self.config.n_min)
if ci_data['ncl'] is not None:
dbl_lo_ci = ci_data['val'] - ci_data['ncl']
else:
dbl_lo_ci = ci_data['val']

if ci_data['ncu'] is not None:
dbl_up_ci = ci_data['ncu'] - ci_data['val']
else:
dbl_up_ci = ci_data['val']

series_points_results['ncl'].append(dbl_lo_ci)
series_points_results['val'].append(ci_data['val'])
series_points_results['ncu'].append(dbl_up_ci)
series_points_results['nstat'].append(len(point_data))

return series_points_results
99 changes: 99 additions & 0 deletions metplotpy/plots/tcmpr_plots/scatter/tcmpr_series_scatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# ============================*
# ** Copyright UCAR (c) 2023
# ** University Corporation for Atmospheric Research (UCAR)
# ** National Center for Atmospheric Research (NCAR)
# ** Research Applications Lab (RAL)
# ** P.O.Box 3000, Boulder, Colorado, 80307-3000, USA
# ============================*


"""
Class Name: TcmprSeriesScatter
"""

import re
from typing import Union

import numpy as np

import metcalcpy.util.utils as utils
from plots.series import Series


class TcmprSeriesScatter(Series):
"""
Represents a Box plot series object
of data points and their plotting style
elements (line colors, etc.)
"""

def __init__(self, config, idx: int, input_data, series_list: list,
series_name: Union[list, tuple]):
self.series_list = series_list
self.series_name = series_name
super().__init__(config, idx, input_data, 1)

def _create_all_fields_values_no_indy(self) -> dict:
"""
Creates a dictionary with two keys that represents each axis
values - dictionaries of field values pairs of all series variables (without indy variable)
:return: dictionary with field-values pairs for each axis
"""
all_fields_values_no_indy = {}

all_fields_values_orig = self.config.get_config_value('series_val_1').copy()
all_fields_values = {}
for x in reversed(list(all_fields_values_orig.keys())):
all_fields_values[x] = all_fields_values_orig.get(x)

if self.config._get_fcst_vars(1):
all_fields_values['fcst_var'] = list(self.config._get_fcst_vars(1).keys())
all_fields_values_no_indy[1] = all_fields_values

return all_fields_values_no_indy

def _create_series_points(self) -> dict:
"""
Subset the data for the appropriate series.
Calculate values for each point including CI
Args:
Returns:
dictionary with CI ,point values and number of stats as keys
"""

# different ways to subset data for normal and derived series
# this is a normal series
all_filters = []

# create a set of filters for this series
for field_ind, field in enumerate(self.all_fields_values_no_indy[self.y_axis].keys()):
if field == 'LEAD':
field = 'LEAD_HR'
filter_value = self.series_name[field_ind]
if isinstance(filter_value, str) and utils.GROUP_SEPARATOR in filter_value:
filter_list = re.findall(utils.DATE_TIME_REGEX, filter_value)
if len(filter_list) == 0:
filter_list = filter_value.split(utils.GROUP_SEPARATOR)
# add the original value
filter_list.append(filter_value)
else:
filter_list = [filter_value]
for i, filter_val in enumerate(filter_list):
if utils.is_string_integer(filter_val):
filter_list[i] = int(filter_val)
elif utils.is_string_strictly_float(filter_val):
filter_list[i] = float(filter_val)

all_filters.append((self.input_data[field].isin(filter_list)))

mask = np.array(all_filters).all(axis=0)
self.series_data = self.input_data.loc[mask]

# sort data by date/time/storm - needed for CI calculations
self.series_data = self.series_data.sort_values(['VALID', 'LEAD', 'STORM_ID'])

series_points_results = {'val': [], 'ncl': [], 'ncu': [], 'nstat': []}
return series_points_results
80 changes: 80 additions & 0 deletions metplotpy/plots/tcmpr_plots/skill/mean/tcmpr_series_skill_mean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# ============================*
# ** Copyright UCAR (c) 2023
# ** University Corporation for Atmospheric Research (UCAR)
# ** National Center for Atmospheric Research (NCAR)
# ** Research Applications Lab (RAL)
# ** P.O.Box 3000, Boulder, Colorado, 80307-3000, USA
# ============================*


"""
Class Name: TcmprSeriesSkill
"""

from typing import Union

import numpy as np
from pandas import DataFrame

import metcalcpy.util.utils as utils
from plots.tcmpr_plots.tcmpr_series import TcmprSeries


class TcmprSeriesSkillMean(TcmprSeries):
"""
Represents a Box plot series object
of data points and their plotting style
elements (line colors, etc.)
"""

def __init__(self, config, idx: int, input_data, series_list: list,
series_name: Union[list, tuple], skill_ref_data: DataFrame = None):
super().__init__(config, idx, input_data, series_list, series_name, skill_ref_data)

def _create_series_points(self) -> dict:
"""
Subset the data for the appropriate series.
Calculate values for each point including CI
Args:
Returns:
dictionary with CI ,point values and number of stats as keys
"""

self._init_series_data()

result_size = len(self.config.indy_vals)
series_points_results = {'val': [None] * result_size,
'nstat': [None] * result_size}
# for each point calculate plot statistic
for i in range(0, result_size):
indy = self.config.indy_vals[i]
if utils.is_string_integer(indy):
indy = int(indy)
elif utils.is_string_strictly_float(indy):
indy = float(indy)
point_data = self.series_data.loc[
(self.series_data["LEAD_HR"] == indy)]

# Skip lead times for which no data is found

if len(point_data) > 0 and self.skill_ref_data is not None and len(self.skill_ref_data) > 0:
point_data = point_data.sort_values(by=['CASE'])
data_ref = self.skill_ref_data.loc[(self.skill_ref_data['LEAD_HR'] == indy)]

# Get the values to be plotted for this lead time
val = None
if i != 0 and data_ref is not None:
cur = np.nanmean(point_data['PLOT'].tolist())
ref = np.nanmean(data_ref['PLOT'].tolist())

if ref is not None and cur is not None:
val = utils.round_half_up(100 * (ref - cur) / ref, 0)

series_points_results['val'][i] = val

series_points_results['nstat'][i] = len(point_data)

return series_points_results
142 changes: 142 additions & 0 deletions metplotpy/plots/tcmpr_plots/skill/mean/tcmpr_skill_mean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import os

import numpy as np
import plotly.graph_objects as go

from metcalcpy.util import utils
from plots.tcmpr_plots.skill.mean.tcmpr_series_skill_mean import TcmprSeriesSkillMean
from plots.tcmpr_plots.skill.tcmpr_skill import TcmprSkill


class TcmprSkillMean(TcmprSkill):
def __init__(self, config_obj, column_info, col, case_data, input_df, baseline_data):
super().__init__(config_obj, column_info, col, case_data, input_df, baseline_data)
print("--------------------------------------------------------")
print(f"Plotting SKILL_MN time series by {self.config_obj.series_val_names[0]}")

self._adjust_titles()
self.cur_baseline = baseline_data['cur_baseline']
self.cur_baseline_data = baseline_data['cur_baseline_data']
self._init_hfip_baseline_for_plot()
self.series_list = self._create_series(self.input_df)
self.case_data = None
if self.config_obj.prefix is None or len(self.config_obj.prefix) == 0:
self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.list_stat_1[0]}_skill_mn.png"
else:
self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.prefix}.png"

# remove the old file if it exist
if os.path.exists(self.plot_filename):
os.remove(self.plot_filename)
self._create_figure()

def _adjust_titles(self):
if self.yaxis_1 is None or len(self.yaxis_1) == 0:
self.yaxis_1 = self.config_obj.list_stat_1[0] + '(' + self.col['units'] + ')'

if self.title is None or len(self.title) == 0:
self.title = 'Mean Skill Scores of ' + self.col['desc'] + ' by ' \
+ self.column_info[self.column_info['COLUMN'] == self.config_obj.series_val_names[0]][
"DESCRIPTION"].tolist()[0]

def _init_hfip_baseline_for_plot(self):
if 'Water Only' in self.title:
print("Plot HFIP Baseline:" + self.cur_baseline)
else:
self.cur_baseline = self.cur_baseline.replace('Error', 'Skill')
self.cur_baseline = self.cur_baseline.replace('HFIP Baseline ', 'HFIP Skill Baseline')
print('Plot HFIP Baseline:' + self.cur_baseline.replace('Error ', ''))

def _add_hfip_baseline(self):
# Add HFIP baseline for each lead time
if self.cur_baseline_data is not None:
baseline_x_values = []
baseline_y_values = []
lead_times = np.unique(self.series_list[0].series_data[self.config_obj.indy_var].tolist())
lead_times.sort()
for ind, lead in enumerate(lead_times):
if lead != 0:
ocd5_data = self.cur_baseline_data.loc[
(self.cur_baseline_data['LEAD'] == lead) & (self.cur_baseline_data['TYPE'] == "OCD5")][
'VALUE'].tolist()
if len(ocd5_data) > 1:
raise ValueError(
f"ERROR: Can't crate HFIP baseline for lead time {lead} : too many values of OCD5 in .dat file")
ocd5_data = ocd5_data[0]
cons_data = self.cur_baseline_data.loc[
(self.cur_baseline_data['LEAD'] == lead) & (self.cur_baseline_data['TYPE'] == "CONS")][
'VALUE'].tolist()
if len(cons_data) > 1:
raise ValueError(
f"ERROR: Can't crate HFIP baseline for lead time {lead} : too many values of CONS in .dat file")
cons_data = cons_data[0]

baseline_lead = utils.round_half_up(100 * (ocd5_data - cons_data) / ocd5_data, 1)
baseline_x_values.append(ind)
baseline_y_values.append(baseline_lead)

self.figure.add_trace(
go.Scatter(x=baseline_x_values,
y=baseline_y_values,
showlegend=True,
mode='markers',
textposition="top right",
name=self.cur_baseline,
marker=dict(size=8,
color='rgb(0,0,255)',
line=dict(
width=1,
color='rgb(0,0,255)'
),
symbol='diamond-cross-open',
)
)
)

def _create_series(self, input_data):
"""
Generate all the series objects that are to be displayed as specified by the plot_disp
setting in the config file. The points are all ordered by datetime. Each series object
is represented by a box in the diagram, so they also contain information
for plot-related/appearance-related settings (which were defined in the config file).
Args:
input_data: The input data in the form of a Pandas dataframe.
This data will be subset to reflect the series data of interest.
Returns:
a list of series objects that are to be displayed
"""
all_fields_values = {'AMODEL': [utils.GROUP_SEPARATOR.join(self.config_obj.skill_ref)],
'fcst_var': self.config_obj.list_stat_1}
permutations = utils.create_permutations_mv(all_fields_values, 0)
ref_model_data_series = TcmprSeriesSkillMean(self.config_obj, 0,
input_data, [], permutations[0])
ref_model_data = ref_model_data_series.series_data

series_list = []

# add series for y1 axis
num_series_y1 = len(self.config_obj.get_series_y(1))
for i, name in enumerate(self.config_obj.get_series_y(1)):
if not isinstance(name, list):
name = [name]
series_obj = TcmprSeriesSkillMean(self.config_obj, i, input_data, series_list, name, ref_model_data)
series_list.append(series_obj)

# add derived for y1 axis
for i, name in enumerate(self.config_obj.get_config_value('derived_series_1')):
# add default operation value if it is not provided
if len(name) == 2:
name.append("DIFF")
# include the series only if the name is valid
if len(name) == 3:
series_obj = TcmprSeriesSkillMean(self.config_obj, num_series_y1 + i, input_data, series_list, name)
series_list.append(series_obj)

# reorder series
series_list = self.config_obj.create_list_by_series_ordering(series_list)

return series_list
Loading

0 comments on commit cbdba40

Please sign in to comment.