From 92478fa206af2eaab18443cd9e349058d6187e60 Mon Sep 17 00:00:00 2001 From: TatianaBurek Date: Fri, 27 Jan 2023 12:39:57 -0700 Subject: [PATCH 1/8] add tickformat to xaxes #240 --- metplotpy/plots/line/line.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metplotpy/plots/line/line.py b/metplotpy/plots/line/line.py index faff5d48..29c2c746 100644 --- a/metplotpy/plots/line/line.py +++ b/metplotpy/plots/line/line.py @@ -413,7 +413,8 @@ def _add_xaxis(self) -> None: }, title_standoff=abs(self.config_obj.parameters['xlab_offset']), tickangle=self.config_obj.x_tickangle, - tickfont={'size': self.config_obj.x_tickfont_size} + tickfont={'size': self.config_obj.x_tickfont_size}, + tickformat='d' ) def _add_yaxis(self) -> None: From 3504ee0639e30448c19c76cb4a798e6637389111 Mon Sep 17 00:00:00 2001 From: TatianaBurek Date: Mon, 27 Feb 2023 14:54:02 -0700 Subject: [PATCH 2/8] initial version #240 --- metplotpy/plots/tcmpr_plots/__init__.py | 0 metplotpy/plots/tcmpr_plots/box/__init__.py | 0 metplotpy/plots/tcmpr_plots/box/tcmpr_box.py | 89 +++++++++++++++++++ .../plots/tcmpr_plots/box/tcmpr_box_point.py | 75 ++++++++++++++++ .../plots/tcmpr_plots/box/tcmpr_point.py | 84 +++++++++++++++++ metplotpy/plots/tcmpr_plots/hfip_baseline.dat | 66 ++++++++++++++ metplotpy/plots/tcmpr_plots/line/__init__.py | 0 .../plots/tcmpr_plots/line/mean/__init__.py | 0 .../plots/tcmpr_plots/line/median/__init__.py | 0 metplotpy/plots/tcmpr_plots/rank/__init__.py | 0 .../plots/tcmpr_plots/relperf/__init__.py | 0 .../plots/tcmpr_plots/scatter/__init__.py | 0 metplotpy/plots/tcmpr_plots/skill/__init__.py | 0 .../plots/tcmpr_plots/skill/mean/__init__.py | 0 .../tcmpr_plots/skill/median/__init__.py | 0 15 files changed, 314 insertions(+) create mode 100644 metplotpy/plots/tcmpr_plots/__init__.py create mode 100644 metplotpy/plots/tcmpr_plots/box/__init__.py create mode 100644 metplotpy/plots/tcmpr_plots/box/tcmpr_box.py create mode 100644 metplotpy/plots/tcmpr_plots/box/tcmpr_box_point.py create mode 100644 metplotpy/plots/tcmpr_plots/box/tcmpr_point.py create mode 100644 metplotpy/plots/tcmpr_plots/hfip_baseline.dat create mode 100644 metplotpy/plots/tcmpr_plots/line/__init__.py create mode 100644 metplotpy/plots/tcmpr_plots/line/mean/__init__.py create mode 100644 metplotpy/plots/tcmpr_plots/line/median/__init__.py create mode 100644 metplotpy/plots/tcmpr_plots/rank/__init__.py create mode 100644 metplotpy/plots/tcmpr_plots/relperf/__init__.py create mode 100644 metplotpy/plots/tcmpr_plots/scatter/__init__.py create mode 100644 metplotpy/plots/tcmpr_plots/skill/__init__.py create mode 100644 metplotpy/plots/tcmpr_plots/skill/mean/__init__.py create mode 100644 metplotpy/plots/tcmpr_plots/skill/median/__init__.py diff --git a/metplotpy/plots/tcmpr_plots/__init__.py b/metplotpy/plots/tcmpr_plots/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/metplotpy/plots/tcmpr_plots/box/__init__.py b/metplotpy/plots/tcmpr_plots/box/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/metplotpy/plots/tcmpr_plots/box/tcmpr_box.py b/metplotpy/plots/tcmpr_plots/box/tcmpr_box.py new file mode 100644 index 00000000..9d7e6bb2 --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/box/tcmpr_box.py @@ -0,0 +1,89 @@ +import os + +import plotly.graph_objects as go + +from plots.tcmpr_plots.box.tcmpr_box_point import TcmprBoxPoint +from plots.tcmpr_plots.tcmpr_series import TcmprSeries + + +class TcmprBox(TcmprBoxPoint): + def __init__(self, config_obj, column_info, col, case_data, input_df, baseline_data): + super().__init__(config_obj, column_info, col, case_data, input_df, baseline_data) + + print("--------------------------------------------------------") + print(f"Plotting BOXPLOT time series by {self.config_obj.series_val_names[0]}") + self._adjust_titles() + self.series_list = self._create_series(self.input_df) + self.case_data = None + self.cur_baseline = baseline_data['cur_baseline'] + self.cur_baseline_data = baseline_data['cur_baseline_data'] + self._init_hfip_baseline_for_plot() + + if self.config_obj.prefix is None or len(self.config_obj.prefix) == 0: + self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.list_stat_1[0]}_boxplot.png" + else: + self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.prefix}.png" + + # remove the old file if it exist + if os.path.exists(self.plot_filename): + os.remove(self.plot_filename) + self._create_figure() + + def _adjust_titles(self): + if self.yaxis_1 is None or len(self.yaxis_1) == 0: + self.yaxis_1 = self.config_obj.list_stat_1[0] + '(' + self.col['units'] + ')' + + if self.title is None or len(self.title) == 0: + self.title = 'Boxplots of ' + self.col['desc'] + ' by ' \ + + self.column_info[self.column_info['COLUMN'] == self.config_obj.series_val_names[0]][ + "DESCRIPTION"].tolist()[0] + + def _draw_series(self, series: TcmprSeries) -> None: + """ + Draws the boxes on the plot + + :param series: Line series object with data and parameters + """ + + # defaults markers and colors for the regular box plot + line_color = dict(color='rgb(0,0,0)') + marker_color = series.color + marker_line_color = series.color + + if len([x for x in series.series_data['PLOT'].tolist() if x is not None]) < self.config_obj.n_min: + line_color = dict(color='rgba(0,0,0,0)') + fillcolor = 'rgba(0,0,0,0)' + marker_symbol = 'circle' + else: + if series.color == 'rgb(0,0,0)' or series.color == 'black' or series.color == '#000000': + fillcolor = '#ffffff' + else: + fillcolor = series.color + marker_symbol = 'circle-open' + + # create a trace + self.figure.add_trace( + go.Box(x=series.series_data['LEAD_HR'], + y=series.series_data['PLOT'], + mean=series.series_points['mean'], + notched=self.config_obj.box_notch, + line=line_color, + fillcolor=fillcolor, + name=series.user_legends, + showlegend=True, + # quartilemethod='linear', #"exclusive", "inclusive", or "linear" + boxmean=self.config_obj.box_avg, + boxpoints='outliers', # outliers, all, False + pointpos=0, + marker=dict(size=4, + color=marker_color, + line=dict( + width=1, + color=marker_line_color + ), + symbol=marker_symbol, + ), + jitter=0 + ), + secondary_y=series.y_axis != 1 + ) diff --git a/metplotpy/plots/tcmpr_plots/box/tcmpr_box_point.py b/metplotpy/plots/tcmpr_plots/box/tcmpr_box_point.py new file mode 100644 index 00000000..7e636dda --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/box/tcmpr_box_point.py @@ -0,0 +1,75 @@ +from plots.tcmpr_plots.tcmpr import Tcmpr +from plots.tcmpr_plots.tcmpr_series import TcmprSeries + + +class TcmprBoxPoint(Tcmpr): + def __init__(self, config_obj, column_info, col, case_data, input_df, baseline_data): + super().__init__(config_obj, column_info, col, case_data, input_df) + + def _init_hfip_baseline_for_plot(self): + if 'Water Only' in self.config_obj.title or self.cur_baseline == 'no': + print("Plot HFIP Baseline:" + self.cur_baseline) + else: + self.cur_baseline_data = self.cur_baseline_data[(self.cur_baseline_data['TYPE'] == 'CONS')] + print('Plot HFIP Baseline:' + self.cur_baseline.replace('Error ', '')) + + def _draw_series(self, series: TcmprSeries) -> None: + pass + + def _create_figure(self): + """ Create a box plot from default and custom parameters""" + + self.figure = self._create_layout() + self._add_xaxis() + self._add_yaxis() + self._add_legend() + + # placeholder for the min and max values for y-axis + yaxis_min = None + yaxis_max = None + + if self.config_obj.xaxis_reverse is True: + self.series_list.reverse() + + # add x ticks for line plots + self.figure.update_layout( + xaxis={ + 'tickmode': 'array', + 'tickvals': self.config_obj.indy_vals, + 'ticktext': self.config_obj.indy_label + } + ) + + for series in self.series_list: + # Don't generate the plot for this series if + # it isn't requested (as set in the config file) + if series.plot_disp: + # collect min-max if we need to sync axis + yaxis_min, yaxis_max = self.find_min_max(series, yaxis_min, yaxis_max) + self._draw_series(series) + + print(f'Range of {self.config_obj.list_stat_1[0]}: {yaxis_min}, {yaxis_max}') + self._add_hfip_baseline() + + self.figure.update_layout(shapes=[dict( + type='line', + yref='y', y0=0, y1=0, + xref='paper', x0=0, x1=0.95, + line={'color': '#727273', + 'dash': 'dot', + 'width': 1}, + )]) + + self.figure.update_layout(boxmode='group') + + # add custom lines + if len(self.series_list) > 0: + self._add_lines( + self.config_obj, + sorted(self.series_list[0].series_data[self.config_obj.indy_var].unique()) + ) + # apply y axis limits + self._yaxis_limits() + + # add x2 axis + self._add_x2axis(self.config_obj.indy_vals) diff --git a/metplotpy/plots/tcmpr_plots/box/tcmpr_point.py b/metplotpy/plots/tcmpr_plots/box/tcmpr_point.py new file mode 100644 index 00000000..d07969e4 --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/box/tcmpr_point.py @@ -0,0 +1,84 @@ +import os + +import plotly.graph_objects as go + +from plots.tcmpr_plots.box.tcmpr_box_point import TcmprBoxPoint +from plots.tcmpr_plots.tcmpr_series import TcmprSeries + + +class TcmprPoint(TcmprBoxPoint): + def __init__(self, config_obj, column_info, col, case_data, input_df, baseline_data): + super().__init__(config_obj, column_info, col, case_data, input_df, baseline_data) + print("--------------------------------------------------------") + print(f"Plotting POINT time series by {self.config_obj.series_val_names[0]}") + + self._adjust_titles() + self.series_list = self._create_series(self.input_df) + self.case_data = None + self.cur_baseline = baseline_data['cur_baseline'] + self.cur_baseline_data = baseline_data['cur_baseline_data'] + self._init_hfip_baseline_for_plot() + + if self.config_obj.prefix is None or len(self.config_obj.prefix) == 0: + self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.list_stat_1[0]}_pointplot.png" + else: + self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.prefix}.png" + + # remove the old file if it exist + if os.path.exists(self.plot_filename): + os.remove(self.plot_filename) + self._create_figure() + + def _adjust_titles(self): + if self.yaxis_1 is None or len(self.yaxis_1) == 0: + self.yaxis_1 = self.config_obj.list_stat_1[0] + '(' + self.col['units'] + ')' + + if self.title is None or len(self.title) == 0: + self.title = 'Point Plots of ' + self.col['desc'] + ' by ' \ + + self.column_info[self.column_info['COLUMN'] == self.config_obj.series_val_names[0]][ + "DESCRIPTION"].tolist()[0] + + def _draw_series(self, series: TcmprSeries) -> None: + """ + Draws the boxes on the plot + + :param series: Line series object with data and parameters + """ + + # defaults markers and colors for the regular box plot + marker_color = series.color + marker_line_color = series.color + + # Point plot + + line_color = dict(color='rgba(0,0,0,0)') + fillcolor = 'rgba(0,0,0,0)' + marker_symbol = 'circle' + boxpoints = 'all' + + # create a trace + self.figure.add_trace( + go.Box(x=series.series_data['LEAD_HR'], + y=series.series_data['PLOT'], + mean=series.series_points['mean'], + notched=self.config_obj.box_notch, + line=line_color, + fillcolor=fillcolor, + name=series.user_legends, + showlegend=True, + # quartilemethod='linear', #"exclusive", "inclusive", or "linear" + boxmean=self.config_obj.box_avg, + boxpoints=boxpoints, # outliers, all, False + pointpos=0, + marker=dict(size=4, + color=marker_color, + line=dict( + width=1, + color=marker_line_color + ), + symbol=marker_symbol, + ), + jitter=0 + ), + secondary_y=series.y_axis != 1 + ) diff --git a/metplotpy/plots/tcmpr_plots/hfip_baseline.dat b/metplotpy/plots/tcmpr_plots/hfip_baseline.dat new file mode 100644 index 00000000..054d63b8 --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/hfip_baseline.dat @@ -0,0 +1,66 @@ +BASIN TYPE VARIABLE LEAD VALUE +AL CONS TK_ERR 000000 7.8 +AL CONS TK_ERR 120000 30.0 +AL CONS TK_ERR 240000 49.8 +AL CONS TK_ERR 360000 69.5 +AL CONS TK_ERR 480000 89.6 +AL CONS TK_ERR 720000 132.0 +AL CONS TK_ERR 960000 175.2 +AL CONS TK_ERR 1200000 221.9 +AL OCD5 TK_ERR 000000 7.7 +AL OCD5 TK_ERR 120000 44.5 +AL OCD5 TK_ERR 240000 93.3 +AL OCD5 TK_ERR 360000 150.9 +AL OCD5 TK_ERR 480000 212.2 +AL OCD5 TK_ERR 720000 317.2 +AL OCD5 TK_ERR 960000 396.5 +AL OCD5 TK_ERR 1200000 473.0 +EP CONS TK_ERR 000000 10.2 +EP CONS TK_ERR 120000 30.1 +EP CONS TK_ERR 240000 48.7 +EP CONS TK_ERR 360000 67.1 +EP CONS TK_ERR 480000 86.8 +EP CONS TK_ERR 720000 119.6 +EP CONS TK_ERR 960000 160.6 +EP CONS TK_ERR 1200000 205.0 +EP OCD5 TK_ERR 000000 10.3 +EP OCD5 TK_ERR 120000 38.6 +EP OCD5 TK_ERR 240000 74.4 +EP OCD5 TK_ERR 360000 114.4 +EP OCD5 TK_ERR 480000 155.0 +EP OCD5 TK_ERR 720000 224.0 +EP OCD5 TK_ERR 960000 267.9 +EP OCD5 TK_ERR 1200000 308.1 +AL CONS ABS(AMAX_WIND-BMAX_WIND) 000000 2.2 +AL CONS ABS(AMAX_WIND-BMAX_WIND) 120000 7.7 +AL CONS ABS(AMAX_WIND-BMAX_WIND) 240000 10.1 +AL CONS ABS(AMAX_WIND-BMAX_WIND) 360000 11.7 +AL CONS ABS(AMAX_WIND-BMAX_WIND) 480000 13.7 +AL CONS ABS(AMAX_WIND-BMAX_WIND) 720000 16.0 +AL CONS ABS(AMAX_WIND-BMAX_WIND) 960000 16.6 +AL CONS ABS(AMAX_WIND-BMAX_WIND) 1200000 17.0 +AL OCD5 ABS(AMAX_WIND-BMAX_WIND) 000000 2.2 +AL OCD5 ABS(AMAX_WIND-BMAX_WIND) 120000 8.3 +AL OCD5 ABS(AMAX_WIND-BMAX_WIND) 240000 11.5 +AL OCD5 ABS(AMAX_WIND-BMAX_WIND) 360000 14.2 +AL OCD5 ABS(AMAX_WIND-BMAX_WIND) 480000 16.1 +AL OCD5 ABS(AMAX_WIND-BMAX_WIND) 720000 17.8 +AL OCD5 ABS(AMAX_WIND-BMAX_WIND) 960000 19.3 +AL OCD5 ABS(AMAX_WIND-BMAX_WIND) 1200000 19.3 +EP CONS ABS(AMAX_WIND-BMAX_WIND) 000000 1.8 +EP CONS ABS(AMAX_WIND-BMAX_WIND) 120000 6.8 +EP CONS ABS(AMAX_WIND-BMAX_WIND) 240000 10.5 +EP CONS ABS(AMAX_WIND-BMAX_WIND) 360000 12.9 +EP CONS ABS(AMAX_WIND-BMAX_WIND) 480000 14.5 +EP CONS ABS(AMAX_WIND-BMAX_WIND) 720000 16.0 +EP CONS ABS(AMAX_WIND-BMAX_WIND) 960000 18.1 +EP CONS ABS(AMAX_WIND-BMAX_WIND) 1200000 18.2 +EP OCD5 ABS(AMAX_WIND-BMAX_WIND) 000000 1.8 +EP OCD5 ABS(AMAX_WIND-BMAX_WIND) 120000 7.0 +EP OCD5 ABS(AMAX_WIND-BMAX_WIND) 240000 11.3 +EP OCD5 ABS(AMAX_WIND-BMAX_WIND) 360000 14.3 +EP OCD5 ABS(AMAX_WIND-BMAX_WIND) 480000 16.4 +EP OCD5 ABS(AMAX_WIND-BMAX_WIND) 720000 18.0 +EP OCD5 ABS(AMAX_WIND-BMAX_WIND) 960000 20.3 +EP OCD5 ABS(AMAX_WIND-BMAX_WIND) 1200000 20.7 + diff --git a/metplotpy/plots/tcmpr_plots/line/__init__.py b/metplotpy/plots/tcmpr_plots/line/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/metplotpy/plots/tcmpr_plots/line/mean/__init__.py b/metplotpy/plots/tcmpr_plots/line/mean/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/metplotpy/plots/tcmpr_plots/line/median/__init__.py b/metplotpy/plots/tcmpr_plots/line/median/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/metplotpy/plots/tcmpr_plots/rank/__init__.py b/metplotpy/plots/tcmpr_plots/rank/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/metplotpy/plots/tcmpr_plots/relperf/__init__.py b/metplotpy/plots/tcmpr_plots/relperf/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/metplotpy/plots/tcmpr_plots/scatter/__init__.py b/metplotpy/plots/tcmpr_plots/scatter/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/metplotpy/plots/tcmpr_plots/skill/__init__.py b/metplotpy/plots/tcmpr_plots/skill/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/metplotpy/plots/tcmpr_plots/skill/mean/__init__.py b/metplotpy/plots/tcmpr_plots/skill/mean/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/metplotpy/plots/tcmpr_plots/skill/median/__init__.py b/metplotpy/plots/tcmpr_plots/skill/median/__init__.py new file mode 100644 index 00000000..e69de29b From 0937fd15916b4c4f097a0f46f598844e5c913bd0 Mon Sep 17 00:00:00 2001 From: TatianaBurek Date: Mon, 27 Feb 2023 14:54:27 -0700 Subject: [PATCH 3/8] initial version #240 --- .../tcmpr_plots/line/mean/tcmpr_line_mean.py | 89 +++ .../line/mean/tcmpr_series_line_mean.py | 79 +++ .../line/median/tcmpr_line_median.py | 79 +++ .../plots/tcmpr_plots/line/tcmpr_line.py | 112 +++ .../plots/tcmpr_plots/plot_tcmpr_hdr.dat | 70 ++ .../plots/tcmpr_plots/rank/tcmpr_rank.py | 240 +++++++ .../tcmpr_plots/relperf/tcmpr_relperf.py | 232 ++++++ .../tcmpr_plots/scatter/tcmpr_scatter.py | 245 +++++++ metplotpy/plots/tcmpr_plots/tcmpr.py | 661 ++++++++++++++++++ metplotpy/plots/tcmpr_plots/tcmpr_config.py | 501 +++++++++++++ metplotpy/plots/tcmpr_plots/tcmpr_series.py | 252 +++++++ 11 files changed, 2560 insertions(+) create mode 100644 metplotpy/plots/tcmpr_plots/line/mean/tcmpr_line_mean.py create mode 100644 metplotpy/plots/tcmpr_plots/line/mean/tcmpr_series_line_mean.py create mode 100644 metplotpy/plots/tcmpr_plots/line/median/tcmpr_line_median.py create mode 100644 metplotpy/plots/tcmpr_plots/line/tcmpr_line.py create mode 100644 metplotpy/plots/tcmpr_plots/plot_tcmpr_hdr.dat create mode 100644 metplotpy/plots/tcmpr_plots/rank/tcmpr_rank.py create mode 100644 metplotpy/plots/tcmpr_plots/relperf/tcmpr_relperf.py create mode 100644 metplotpy/plots/tcmpr_plots/scatter/tcmpr_scatter.py create mode 100644 metplotpy/plots/tcmpr_plots/tcmpr.py create mode 100644 metplotpy/plots/tcmpr_plots/tcmpr_config.py create mode 100644 metplotpy/plots/tcmpr_plots/tcmpr_series.py diff --git a/metplotpy/plots/tcmpr_plots/line/mean/tcmpr_line_mean.py b/metplotpy/plots/tcmpr_plots/line/mean/tcmpr_line_mean.py new file mode 100644 index 00000000..846fd4e5 --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/line/mean/tcmpr_line_mean.py @@ -0,0 +1,89 @@ +import os + +from plots.tcmpr_plots.line.mean.tcmpr_series_line_mean import TcmprSeriesLineMean +from plots.tcmpr_plots.line.tcmpr_line import TcmprLine + + +class TcmprLineMean(TcmprLine): + def __init__(self, config_obj, column_info, col, case_data, input_df, baseline_data): + super().__init__(config_obj, column_info, col, case_data, input_df, None) + print("--------------------------------------------------------") + print(f"Plotting MEAN time series by {self.config_obj.series_val_names[0]}") + + self._adjust_titles() + self.series_list = self._create_series(self.input_df) + self.case_data = None + self.cur_baseline = baseline_data['cur_baseline'] + self.cur_baseline_data = baseline_data['cur_baseline_data'] + self.baseline_lead_time = 'ind' + self._init_hfip_baseline_for_plot() + if self.config_obj.prefix is None or len(self.config_obj.prefix) == 0: + self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.list_stat_1[0]}_mean.png" + else: + self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.prefix}.png" + # remove the old file if it exist + if os.path.exists(self.plot_filename): + os.remove(self.plot_filename) + self._create_figure() + + def _adjust_titles(self): + if self.yaxis_1 is None or len(self.yaxis_1) == 0: + self.yaxis_1 = self.config_obj.list_stat_1[0] + '(' + self.col['units'] + ')' + + if self.title is None or len(self.title) == 0: + self.title = 'Mean of ' + self.col['desc'] + ' by ' \ + + self.column_info[self.column_info['COLUMN'] == self.config_obj.series_val_names[0]][ + "DESCRIPTION"].tolist()[0] + + def _init_hfip_baseline_for_plot(self): + if 'Water Only' in self.config_obj.title or self.cur_baseline == 'no': + print("Plot HFIP Baseline:" + self.cur_baseline) + else: + self.cur_baseline_data = self.cur_baseline_data[(self.cur_baseline_data['TYPE'] == 'CONS')] + print('Plot HFIP Baseline:' + self.cur_baseline.replace('Error ', '')) + + def _create_series(self, input_data): + """ + Generate all the series objects that are to be displayed as specified by the plot_disp + setting in the config file. The points are all ordered by datetime. Each series object + is represented by a box in the diagram, so they also contain information + for plot-related/appearance-related settings (which were defined in the config file). + + Args: + input_data: The input data in the form of a Pandas dataframe. + This data will be subset to reflect the series data of interest. + + Returns: + a list of series objects that are to be displayed + + + """ + series_list = [] + + # add series for y1 axis + num_series_y1 = len(self.config_obj.get_series_y(1)) + for i, name in enumerate(self.config_obj.get_series_y(1)): + if not isinstance(name, list): + name = [name] + series_obj = TcmprSeriesLineMean(self.config_obj, i, input_data, series_list, name) + series_list.append(series_obj) + + # add derived for y1 axis + for i, name in enumerate(self.config_obj.get_config_value('derived_series_1')): + # add default operation value if it is not provided + if len(name) == 2: + name.append("DIFF") + # include the series only if the name is valid + if len(name) == 3: + # add stat if needed + oper = name[2] + name[:] = [(s + ' ' + self.config_obj.list_stat_1[0]) if ' ' not in s else s for s in name[:2]] + name.append(oper) + + series_obj = TcmprSeriesLineMean(self.config_obj, num_series_y1 + i, input_data, series_list, name) + series_list.append(series_obj) + + # reorder series + series_list = self.config_obj.create_list_by_series_ordering(series_list) + + return series_list diff --git a/metplotpy/plots/tcmpr_plots/line/mean/tcmpr_series_line_mean.py b/metplotpy/plots/tcmpr_plots/line/mean/tcmpr_series_line_mean.py new file mode 100644 index 00000000..599ae35e --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/line/mean/tcmpr_series_line_mean.py @@ -0,0 +1,79 @@ +# ============================* +# ** Copyright UCAR (c) 2023 +# ** University Corporation for Atmospheric Research (UCAR) +# ** National Center for Atmospheric Research (NCAR) +# ** Research Applications Lab (RAL) +# ** P.O.Box 3000, Boulder, Colorado, 80307-3000, USA +# ============================* + + +""" +Class Name: TcmprSeriesLineMean + """ + +from typing import Union + +import numpy as np + +import metcalcpy.util.utils as utils +from plots.tcmpr_plots.tcmpr_series import TcmprSeries +from plots.tcmpr_plots.tcmpr_util import get_mean_ci + + +class TcmprSeriesLineMean(TcmprSeries): + """ + Represents a Box plot series object + of data points and their plotting style + elements (line colors, etc.) + + """ + + def __init__(self, config, idx: int, input_data, series_list: list, + series_name: Union[list, tuple]): + super().__init__(config, idx, input_data, series_list, series_name) + + def _create_series_points(self) -> dict: + """ + Subset the data for the appropriate series. + Calculate values for each point including CI + + Args: + + Returns: + dictionary with CI ,point values and number of stats as keys + """ + + self._init_series_data() + + series_points_results = {'val': [], 'ncl': [], 'ncu': [], 'nstat': [], 'mean': []} + + # for each point calculate plot statistic + for indy in self.config.indy_vals: + if utils.is_string_integer(indy): + indy = int(indy) + elif utils.is_string_strictly_float(indy): + indy = float(indy) + point_data = self.series_data.loc[self.series_data['LEAD_HR'] == indy] + point_data = point_data.sort_values(by=['CASE']) + + ci_data = get_mean_ci(point_data['PLOT'].tolist(), self.config.alpha, self.config.n_min) + if ci_data['ncl'] is not None: + dbl_lo_ci = ci_data['val'] - ci_data['ncl'] + else: + dbl_lo_ci = ci_data['val'] + + if ci_data['ncu'] is not None: + dbl_up_ci = ci_data['ncu'] - ci_data['val'] + else: + dbl_up_ci = ci_data['val'] + + series_points_results['ncl'].append(dbl_lo_ci) + series_points_results['val'].append(ci_data['val']) + series_points_results['ncu'].append(dbl_up_ci) + series_points_results['nstat'].append(len(point_data)) + if series_points_results['nstat'] == 0: + series_points_results['mean'].append(None) + else: + series_points_results['mean'].append(np.nanmean(point_data['PLOT'].tolist())) + + return series_points_results diff --git a/metplotpy/plots/tcmpr_plots/line/median/tcmpr_line_median.py b/metplotpy/plots/tcmpr_plots/line/median/tcmpr_line_median.py new file mode 100644 index 00000000..e6cb97d6 --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/line/median/tcmpr_line_median.py @@ -0,0 +1,79 @@ +import os + +from plots.tcmpr_plots.line.median.tcmpr_series_line_median import TcmprSeriesLineMedian +from plots.tcmpr_plots.line.tcmpr_line import TcmprLine + + +class TcmprLineMedian(TcmprLine): + def __init__(self, config_obj, column_info, col, case_data, input_df): + super().__init__(config_obj, column_info, col, case_data, input_df, None) + print("--------------------------------------------------------") + print(f"Plotting MEDIAN time series by {self.config_obj.series_val_names[0]}") + + print("Plot HFIP Baseline:" + self.cur_baseline) + self._adjust_titles() + self.series_list = self._create_series(self.input_df) + self.case_data = None + if self.config_obj.prefix is None or len(self.config_obj.prefix) == 0: + self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.list_stat_1[0]}_median.png" + else: + self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.prefix}.png" + + # remove the old file if it exist + if os.path.exists(self.plot_filename): + os.remove(self.plot_filename) + self._create_figure() + + def _adjust_titles(self): + if self.yaxis_1 is None or len(self.yaxis_1) == 0: + self.yaxis_1 = self.config_obj.list_stat_1[0] + '(' + self.col['units'] + ')' + + if self.title is None or len(self.title) == 0: + self.title = 'Median of ' + self.col['desc'] + ' by ' \ + + self.column_info[self.column_info['COLUMN'] == self.config_obj.series_val_names[0]][ + "DESCRIPTION"].tolist()[0] + + def _create_series(self, input_data): + """ + Generate all the series objects that are to be displayed as specified by the plot_disp + setting in the config file. The points are all ordered by datetime. Each series object + is represented by a box in the diagram, so they also contain information + for plot-related/appearance-related settings (which were defined in the config file). + + Args: + input_data: The input data in the form of a Pandas dataframe. + This data will be subset to reflect the series data of interest. + + Returns: + a list of series objects that are to be displayed + + + """ + series_list = [] + + # add series for y1 axis + num_series_y1 = len(self.config_obj.get_series_y(1)) + for i, name in enumerate(self.config_obj.get_series_y(1)): + if not isinstance(name, list): + name = [name] + series_obj = TcmprSeriesLineMedian(self.config_obj, i, input_data, series_list, name) + series_list.append(series_obj) + + # add derived for y1 axis + for i, name in enumerate(self.config_obj.get_config_value('derived_series_1')): + # add default operation value if it is not provided + if len(name) == 2: + name.append("DIFF") + # include the series only if the name is valid + if len(name) == 3: + # add stat if needed + oper = name[2] + name[:] = [(s + ' ' + self.config_obj.list_stat_1[0]) if ' ' not in s else s for s in name[:2]] + name.append(oper) + series_obj = TcmprSeriesLineMedian(self.config_obj, num_series_y1 + i, input_data, series_list, name) + series_list.append(series_obj) + + # reorder series + series_list = self.config_obj.create_list_by_series_ordering(series_list) + + return series_list diff --git a/metplotpy/plots/tcmpr_plots/line/tcmpr_line.py b/metplotpy/plots/tcmpr_plots/line/tcmpr_line.py new file mode 100644 index 00000000..a690faac --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/line/tcmpr_line.py @@ -0,0 +1,112 @@ +import plotly.graph_objects as go + +from plots.tcmpr_plots.tcmpr import Tcmpr +from plots.tcmpr_plots.tcmpr_series import TcmprSeries + + +class TcmprLine(Tcmpr): + def __init__(self, config_obj, column_info, col, case_data, input_df, baseline_data): + super().__init__(config_obj, column_info, col, case_data, input_df) + + def _create_figure(self): + """ Create a box plot from default and custom parameters""" + + self.figure = self._create_layout() + self._add_xaxis() + self._add_yaxis() + self._add_legend() + + # placeholder for the min and max values for y-axis + yaxis_min = None + yaxis_max = None + + if self.config_obj.xaxis_reverse is True: + self.series_list.reverse() + # calculate stag adjustments + stag_adjustments = self._calc_stag_adjustments() + + x_points_index = list(range(0, len(self.config_obj.indy_vals))) + # add x ticks for line plots + odered_indy_label = self.config_obj.create_list_by_plot_val_ordering(self.config_obj.indy_label) + self.figure.update_layout( + xaxis={ + 'tickmode': 'array', + 'tickvals': x_points_index, + 'ticktext': odered_indy_label + } + ) + + for series in self.series_list: + # Don't generate the plot for this series if + # it isn't requested (as set in the config file) + if series.plot_disp: + # collect min-max if we need to sync axis + yaxis_min, yaxis_max = self.find_min_max(series, yaxis_min, yaxis_max) + x_points_index_adj = x_points_index + stag_adjustments[series.idx] + self._draw_series(series, x_points_index_adj) + + print(f'Range of {self.config_obj.list_stat_1[0]}: {yaxis_min}, {yaxis_max}') + + self._add_hfip_baseline() + + self.figure.update_layout(shapes=[dict( + type='line', + yref='y', y0=0, y1=0, + xref='paper', x0=0, x1=0.95, + line={'color': '#727273', + 'dash': 'dot', + 'width': 1}, + )]) + + # add custom lines + if len(self.series_list) > 0: + self._add_lines( + self.config_obj, + sorted(self.series_list[0].series_data[self.config_obj.indy_var].unique()) + ) + # apply y axis limits + self._yaxis_limits() + + # add x2 axis + self._add_x2axis(list(range(0, len(self.config_obj.indy_vals)))) + + def _draw_series(self, series: TcmprSeries, x_points_index_adj: list) -> None: + """ + Draws the boxes on the plot + + :param series: Line series object with data and parameters + """ + + y_points = series.series_points['val'] + # show or not ci + # see if any ci values in not 0 + no_ci_up = all(v == 0 for v in series.series_points['ncu']) + no_ci_lo = all(v == 0 for v in series.series_points['ncl']) + error_y_visible = True + if (no_ci_up is True and no_ci_lo is True) or self.config_obj.series_ci[series.idx] == 'NONE' or \ + self.config_obj.series_ci[series.idx] is False: + error_y_visible = False + # create a trace + self.figure.add_trace( + go.Scatter(x=x_points_index_adj, + y=y_points, + showlegend=True, + mode='lines+markers', + textposition="top right", + name=self.config_obj.user_legends[series.idx], + line={'color': self.config_obj.colors_list[series.idx], + 'width': self.config_obj.linewidth_list[series.idx], + 'dash': self.config_obj.linestyles_list[series.idx]}, + marker_symbol=self.config_obj.marker_list[series.idx], + marker_color=self.config_obj.colors_list[series.idx], + marker_line_color=self.config_obj.colors_list[series.idx], + marker_size=self.config_obj.marker_size[series.idx], + error_y={'type': 'data', + 'symmetric': False, + 'array': series.series_points['ncu'], + 'arrayminus': series.series_points['ncl'], + 'visible': error_y_visible, + 'thickness': self.config_obj.linewidth_list[series.idx]} + ), + secondary_y=series.y_axis != 1 + ) diff --git a/metplotpy/plots/tcmpr_plots/plot_tcmpr_hdr.dat b/metplotpy/plots/tcmpr_plots/plot_tcmpr_hdr.dat new file mode 100644 index 00000000..5fe13205 --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/plot_tcmpr_hdr.dat @@ -0,0 +1,70 @@ +COLUMN DESCRIPTION UNITS +VERSION "MET Version" NA +AMODEL "ADeck Model" NA +BMODEL "BDeck Model" NA +DESC "Description" NA +STORM_ID "Storm ID" NA +BASIN "Basin" NA +CYCLONE "Cyclone Number" NA +STORM_NAME "Storm Name" NA +INIT "Initialization Time" "YYYYMMDD_HHMMSS" +LEAD "Lead Time" "HHMMSS" +VALID "Valid Time" "YYYYMMDD_HHMMSS" +INIT_MASK "Initialization Masking Region" NA +VALID_MASK "Valid Masking Region" NA +LINE_TYPE "Line Type" NA +TOTAL "Total Number" NA +INDEX "Index of Total" NA +LEVEL "Storm Severity" NA +WATCH_WARN "Watch/Warning Status" NA +INITIALS "Forecaster Initials" NA +ALAT "ADeck Latitude" "deg" +ALON "ADeck Longitude" "deg" +BLAT "BDeck Latitude" "deg" +BLON "BDeck Longitude" "deg" +TK_ERR "Track Error" "nm" +X_ERR "X Track Error" "nm" +Y_ERR "Y Track Error" "nm" +ALTK_ERR "Along-Track Error" "nm" +CRTK_ERR "Cross-Track Error" "nm" +ADLAND "ADeck Distance to Land" "nm" +BDLAND "BDeck Distance to Land" "nm" +AMSLP "ADeck Minimum Sea Level Pressure" "mb" +BMSLP "BDeck Minimum Sea Level Pressure" "mb" +AMAX_WIND "ADeck Maximum Wind Speed" "kt" +BMAX_WIND "BDeck Maximum Wind Speed" "kt" +AAL_WIND_34 "ADeck All Quadarants Radius of 34kt Winds" "nm" +BAL_WIND_34 "BDeck All Quadrants Radius of 34kt Winds" "nm" +ANE_WIND_34 "ADeck Northeast Quadrant Radius of 34kt Winds" "nm" +BNE_WIND_34 "BDeck Northeast Quadrant Radius of 34kt Winds" "nm" +ASE_WIND_34 "ADeck Southeast Quadrant Radius of 34kt Winds" "nm" +BSE_WIND_34 "BDeck Southeast Quadrant Radius of 34kt Winds" "nm" +ASW_WIND_34 "ADeck Southwest Quadrant Radius of 34kt Winds" "nm" +BSW_WIND_34 "BDeck Southwest Quadrant Radius of 34kt Winds" "nm" +ANW_WIND_34 "ADeck Northwest Quadrant Radius of 34kt Winds" "nm" +BNW_WIND_34 "BDeck Northwest Quadrant Radius of 34kt Winds" "nm" +AAL_WIND_50 "ADeck All Quadarants Radius of 50kt Winds" "nm" +BAL_WIND_50 "BDeck All Quadrants Radius of 50kt Winds" "nm" +ANE_WIND_50 "ADeck Northeast Quadrant Radius of 50kt Winds" "nm" +BNE_WIND_50 "BDeck Northeast Quadrant Radius of 50kt Winds" "nm" +ASE_WIND_50 "ADeck Southeast Quadrant Radius of 50kt Winds" "nm" +BSE_WIND_50 "BDeck Southeast Quadrant Radius of 50kt Winds" "nm" +ASW_WIND_50 "ADeck Southwest Quadrant Radius of 50kt Winds" "nm" +BSW_WIND_50 "BDeck Southwest Quadrant Radius of 50kt Winds" "nm" +ANW_WIND_50 "ADeck Northwest Quadrant Radius of 50kt Winds" "nm" +BNW_WIND_50 "BDeck Northwest Quadrant Radius of 50kt Winds" "nm" +AAL_WIND_64 "ADeck All Quadarants Radius of 64kt Winds" "nm" +BAL_WIND_64 "BDeck All Quadrants Radius of 64kt Winds" "nm" +ANE_WIND_64 "ADeck Northeast Quadrant Radius of 64kt Winds" "nm" +BNE_WIND_64 "BDeck Northeast Quadrant Radius of 64kt Winds" "nm" +ASE_WIND_64 "ADeck Southeast Quadrant Radius of 64kt Winds" "nm" +BSE_WIND_64 "BDeck Southeast Quadrant Radius of 64kt Winds" "nm" +ASW_WIND_64 "ADeck Southwest Quadrant Radius of 64kt Winds" "nm" +BSW_WIND_64 "BDeck Southwest Quadrant Radius of 64kt Winds" "nm" +ANW_WIND_64 "ADeck Northwest Quadrant Radius of 64kt Winds" "nm" +BNW_WIND_64 "BDeck Northwest Quadrant Radius of 64kt Winds" "nm" +BAVG_WIND_34 "BDeck averege of 34kt Winds" "nm" +AAVG_WIND_34 "ADeck averege of 34kt Winds" "nm" +BAVG_WIND_50 "BDeck averege of 50kt Winds" "nm" +AAVG_WIND_50 "ADeck averege of 50kt Winds" "nm" + diff --git a/metplotpy/plots/tcmpr_plots/rank/tcmpr_rank.py b/metplotpy/plots/tcmpr_plots/rank/tcmpr_rank.py new file mode 100644 index 00000000..19f4a792 --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/rank/tcmpr_rank.py @@ -0,0 +1,240 @@ +# ============================* +# ** Copyright UCAR (c) 2023 +# ** University Corporation for Atmospheric Research (UCAR) +# ** National Center for Atmospheric Research (NCAR) +# ** Research Applications Lab (RAL) +# ** P.O.Box 3000, Boulder, Colorado, 80307-3000, USA +# ============================* + + +""" +Class Name:TcmprRank + """ + +import os + +import plotly.graph_objects as go + +from plots.tcmpr_plots.tcmpr import Tcmpr +from plots.tcmpr_plots.tcmpr_series import TcmprSeries +from plots.tcmpr_plots.tcmpr_util import get_case_data + + +class TcmprRank(Tcmpr): + """ Generates a TcmprRank plot for 1 or more traces + + """ + + def __init__(self, config_obj, column_info, col, case_data, input_df): + """ Creates a rank plot, based on + settings indicated by parameters. + + Args: + + """ + + # init common layout + super().__init__(config_obj, column_info, col, case_data, input_df) + print("--------------------------------------------------------") + + if not self.config_obj.use_ee: + raise Exception("ERROR: Cannot plot relative rank frequency when event equalization is disabled.") + print("Creating Rank plot") + print("Plot HFIP Baseline:" + self.cur_baseline) + + self._adjust_titles() + # Create a list of series objects. + # Each series object contains all the necessary information for plotting, + # such as line color, marker symbol, + # line width, and criteria needed to subset the input dataframe. + self.series_list = self._create_series(self.input_df) + + # Get the case data when necessary + if self.case_data is None: + self.case_data = get_case_data(self.input_df, self.config_obj.series_vals_1, self.config_obj.indy_vals, + self.config_obj.rp_diff, len(self.series_list)) + + if self.config_obj.prefix is None or len(self.config_obj.prefix) == 0: + self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.list_stat_1[0]}_rank.png" + else: + self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.prefix}.png" + + # remove the old file if it exist + if os.path.exists(self.plot_filename): + os.remove(self.plot_filename) + # create figure + # pylint:disable=assignment-from-no-return + # Need to have a self.figure that we can pass along to + # the methods in base_plot.py (BasePlot class methods) to + # create binary versions of the plot. + self._create_figure() + + def _adjust_titles(self): + if self.yaxis_1 is None or len(self.yaxis_1) == 0: + self.yaxis_1 = 'Percent of Cases' + + if self.title is None or len(self.title) == 0: + self.title = self.config_obj.series_vals_1[0][0] + ' ' + \ + self.column_info[ + self.column_info['COLUMN'] == self.config_obj.series_val_names[0]][ + "DESCRIPTION"].tolist()[0] + ' ' \ + + self.col['desc'] + 'Rank Frequency' + + def _create_figure(self): + """ Create a box plot from default and custom parameters""" + + self.figure = self._create_layout() + self._add_xaxis() + self._add_yaxis() + self._add_legend() + + if self.config_obj.xaxis_reverse is True: + self.series_list.reverse() + # calculate stag adjustments + stag_adjustments = self._calc_stag_adjustments() + + x_points_index = list(range(0, len(self.config_obj.indy_vals))) + # add x ticks for line plots + self.figure.update_layout( + xaxis={ + 'tickmode': 'array', + 'tickvals': x_points_index, + 'ticktext': self.config_obj.indy_label + } + ) + + rank_str = ["Best", "2nd", "3rd", "Worst"] + n_series = len(self.config_obj.get_series_y(1)) + rank_str_index = min(3, n_series - 1) + legend_str = rank_str[0: rank_str_index] + if n_series >= 5: + for i in range(4, n_series - 1, -1): + legend_str.append(str(i) + "th") + legend_str.append(rank_str[3]) + self.config_obj.user_legends = legend_str + yaxis_min = None + yaxis_max = None + for series in self.series_list: + # Don't generate the plot for this series if + # it isn't requested (as set in the config file) + if series.plot_disp: + x_points_index_adj = x_points_index + stag_adjustments[series.idx] + series.create_rank_points(self.case_data) + yaxis_min, yaxis_max = self.find_min_max(series, yaxis_min, yaxis_max) + self._draw_series(series, x_points_index_adj) + + # Draw a reference line at 100/n_series + self.figure.add_hline(y=100 / len(self.series_list), line_width=1, line_dash="solid", line_color="#e5e7e9") + + print(f'Range of {self.config_obj.list_stat_1[0]}: {yaxis_min}, {yaxis_max}') + # Draw an invisible line to create a CI legend + self.figure.add_trace( + go.Scatter(x=[0], + y=[0], + showlegend=True, + mode='lines', + visible='legendonly', + line={'color': '#7b7d7d', + 'width': 1, + 'dash': 'dot'}, + name=str(int(100 * (1 - self.config_obj.alpha))) + '% CI' + ) + ) + + # add custom lines + if len(self.series_list) > 0: + self._add_lines( + self.config_obj, + sorted(self.series_list[0].series_data[self.config_obj.indy_var].unique()) + ) + # apply y axis limits + self._yaxis_limits() + # add x2 axis + self._add_x2axis(list(range(0, len(self.config_obj.indy_vals)))) + + def _draw_series(self, series: TcmprSeries, x_points_index_adj: list) -> None: + """ + Draws the line on the plot + + :param series: Line series object with data and parameters + """ + + color = self.config_obj.colors_list[series.idx] + width = self.config_obj.linewidth_list[series.idx] + dash = self.config_obj.linestyles_list[series.idx] + series_ci = self.config_obj.series_ci[series.idx] + + y_points = series.series_points['val'] + # show or not ci + # see if any ci values in not 0 + no_ci_up = all(v is None or v == 0 for v in series.series_points['ncu']) + no_ci_lo = all(v is None or v == 0 for v in series.series_points['ncl']) + error_y_visible = True + if (no_ci_up is True and no_ci_lo is True) or series_ci is False: + error_y_visible = False + + rank_min_text = [str(series.idx + 1)] * len(x_points_index_adj) + + # create a trace + self.figure.add_trace( + go.Scatter(x=x_points_index_adj, + y=y_points, + showlegend=True, + mode='lines+text', + textposition="middle center", + name=self.config_obj.user_legends[series.idx], + line={'color': color, + 'width': width, + 'dash': dash}, + text=rank_min_text, + textfont={ + 'size': 18, + 'color': color + } + ) + ) + + if error_y_visible is True: + # add ci lo + self.figure.add_trace( + go.Scatter(x=x_points_index_adj, + y=series.series_points['ncl'], + showlegend=False, + mode='lines', + name=self.config_obj.user_legends[series.idx], + line={'color': color, + 'width': width, + 'dash': 'dot'} + ) + ) + + # add ci up + self.figure.add_trace( + go.Scatter(x=x_points_index_adj, + y=series.series_points['ncu'], + showlegend=False, + mode='lines', + name=self.config_obj.user_legends[series.idx], + line={'color': color, + 'width': width, + 'dash': 'dot'} + ) + ) + + # For the BEST and WORST series, plot the RANK_MIN values + if len(series.rank_min_val) > 0: + self.figure.add_trace( + go.Scatter( + x=x_points_index_adj, + y=series.rank_min_val, + showlegend=False, + mode="text", + name="RANK MIN", + text=rank_min_text, + textfont={ + 'size': 18 + }, + textposition="middle center" + ), + secondary_y=series.y_axis != 1 + ) diff --git a/metplotpy/plots/tcmpr_plots/relperf/tcmpr_relperf.py b/metplotpy/plots/tcmpr_plots/relperf/tcmpr_relperf.py new file mode 100644 index 00000000..da8dc9e1 --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/relperf/tcmpr_relperf.py @@ -0,0 +1,232 @@ +import os + +import numpy as np +import plotly.graph_objects as go + +from metcalcpy.util import utils +from plots.tcmpr_plots.tcmpr import Tcmpr +from plots.tcmpr_plots.tcmpr_series import TcmprSeries +from plots.tcmpr_plots.tcmpr_util import get_case_data + + +class TcmprRelPerf(Tcmpr): + def __init__(self, config_obj, column_info, col, case_data, input_df): + super().__init__(config_obj, column_info, col, case_data, input_df) + print("--------------------------------------------------------") + + if not self.config_obj.use_ee: + raise Exception("ERROR: Cannot plot relative performance when event equalization is disabled.") + print(f"Plotting RELPERF time series by {self.config_obj.series_val_names[0]}") + + print("Plot HFIP Baseline:" + self.cur_baseline) + self._adjust_titles() + self.series_list = self._create_series(self.input_df) + if self.case_data is None: + self.case_data = get_case_data(self.input_df, self.config_obj.series_vals_1, self.config_obj.indy_vals, + self.config_obj.rp_diff, len(self.series_list)) + + for series in self.series_list: + series.create_relperf_points(self.case_data) + + if self.config_obj.prefix is None or len(self.config_obj.prefix) == 0: + self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.list_stat_1[0]}_relperf.png" + else: + self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.prefix}.png" + # remove the old file if it exist + if os.path.exists(self.plot_filename): + os.remove(self.plot_filename) + self.user_legends = self._create_user_legends() + self._create_figure() + + def _create_user_legends(self): + all_user_legends = self.config_obj.get_config_value('user_legend') + legend_list = [] + series = self.config_obj.get_series_y_relperf(1) + + for idx, ser_components in enumerate(series): + if idx >= len(all_user_legends) or all_user_legends[idx].strip() == '': + # user did not provide the legend - create it + legend_list.append(' '.join(map(str, ser_components))) + else: + # user provided a legend - use it + legend_list.append(all_user_legends[idx]) + + # add to legend list legends for y2-axis series + num_series_y1 = len(self.config_obj.get_series_y(1)) + + for idx, ser_components in enumerate(self.get_config_value('derived_series_1')): + # index of the legend + legend_idx = idx + num_series_y1 + if legend_idx >= len(all_user_legends) or all_user_legends[legend_idx].strip() == '': + # user did not provide the legend - create it + legend_list.append(utils.get_derived_curve_name(ser_components)) + else: + # user provided a legend - use it + legend_list.append(all_user_legends[legend_idx]) + + return self.config_obj.create_list_by_series_ordering(legend_list) + + def _create_figure(self): + """ Create a box plot from default and custom parameters""" + + self.figure = self._create_layout() + self._add_xaxis() + self._add_yaxis() + self._add_legend() + + if self.config_obj.xaxis_reverse is True: + self.series_list.reverse() + + x_points_index = list(range(0, len(self.config_obj.indy_vals))) + # add x ticks for line plots + + self.figure.update_layout( + xaxis={ + 'tickmode': 'array', + 'tickvals': x_points_index, + 'ticktext': self.config_obj.indy_label + } + ) + yaxis_min = None + yaxis_max = None + + for series in self.series_list: + # Don't generate the plot for this series if + # it isn't requested (as set in the config file) + if series.plot_disp: + # collect min-max if we need to sync axis + yaxis_min, yaxis_max = self.find_min_max(series, yaxis_min, yaxis_max) + self._draw_series(series, x_points_index) + + series = TcmprSeries(self.config_obj, len(self.series_list), self.input_df, [], ['TIE']) + series.create_relperf_points(self.case_data) + yaxis_min, yaxis_max = self.find_min_max(series, yaxis_min, yaxis_max) + print(f'Range of {self.config_obj.list_stat_1[0]}: {yaxis_min}, {yaxis_max}') + tie_conf = { + 'line_color': '#808080', + 'name': 'TIE', + 'line_width': 1, + 'line_dash': 'solid', + 'marker_symbol': 'asterisk-open', + 'marker_size': self.config_obj.marker_size[-1], + 'series_ci': True + } + self._draw_series(series, x_points_index, tie_conf) + self.figure.update_layout(shapes=[dict( + type='line', + yref='y', y0=0, y1=0, + xref='paper', x0=0, x1=0.95, + line={'color': '#e5e7e9', + 'dash': 'solid', + 'width': 1}, + )]) + + # Draw an invisible line to create a CI legend + self.figure.add_trace( + go.Scatter(x=[0], + y=[0], + showlegend=True, + mode='lines', + visible='legendonly', + line={'color': '#7b7d7d', + 'width': 1, + 'dash': 'dot'}, + name=str(int(100 * (1 - self.config_obj.alpha))) + '% CI' + ) + ) + + # add custom lines + if len(self.series_list) > 0: + self._add_lines( + self.config_obj, + sorted(self.series_list[0].series_data[self.config_obj.indy_var].unique()) + ) + # apply y axis limits + self._yaxis_limits() + + # add x2 axis + self._add_x2axis(list(range(0, len(self.config_obj.indy_vals)))) + + def _draw_series(self, series: TcmprSeries, x_points_index_adj: list, tie_conf=None) -> None: + """ + Draws the boxes on the plot + + :param series: Line series object with data and parameters + """ + + if tie_conf is None: + color = self.config_obj.colors_list[series.idx] + width = self.config_obj.linewidth_list[series.idx] + dash = self.config_obj.linestyles_list[series.idx] + marker_symbol = self.config_obj.marker_list[series.idx] + marker_size = self.config_obj.marker_size[series.idx] + name = self.user_legends[series.idx] + else: + color = tie_conf['line_color'] + width = tie_conf['line_width'] + dash = tie_conf['line_dash'] + marker_symbol = tie_conf['marker_symbol'] + marker_size = tie_conf['marker_size'] + name = tie_conf['name'] + + y_points = series.series_points['val'] + + # create a trace + self.figure.add_trace( + go.Scatter(x=x_points_index_adj, + y=y_points, + showlegend=True, + mode='lines+markers', + textposition="top right", + name=name, + line={'color': color, + 'width': width, + 'dash': dash}, + marker_symbol=marker_symbol, + marker_color=color, + marker_line_color=color, + marker_size=marker_size + ), + secondary_y=series.y_axis != 1 + ) + + # Plot relative performance confidence intervals + if series.idx >= series.series_len: + idx = 0 + else: + idx = series.idx + if self.config_obj.series_ci[idx]: + self.figure.add_trace( + go.Scatter(x=x_points_index_adj, + y=series.series_points['ncu'], + showlegend=False, + mode='lines', + line={'color': color, + 'width': width, + 'dash': 'dot'}, + ), + secondary_y=series.y_axis != 1 + ) + self.figure.add_trace( + go.Scatter(x=x_points_index_adj, + y=series.series_points['ncl'], + showlegend=False, + mode='lines', + line={'color': color, + 'width': width, + 'dash': 'dot'}, + ), + secondary_y=series.y_axis != 1 + ) + + def _adjust_titles(self): + if self.yaxis_1 is None or len(self.yaxis_1) == 0: + self.yaxis_1 = 'Percent of Cases' + + if self.title is None or len(self.title) == 0: + # self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.prefix}.png" + + self.title = f"Relative Performance of {self.col['desc']}" + if len(np.unique(self.config_obj.rp_diff)) == 1: + self.title = f"{self.title} Difference {self.config_obj.rp_diff[0]}{self.col['units']}" + self.title = f'{self.title} by {self.column_info[self.column_info["COLUMN"] == self.config_obj.series_val_names[0]]["DESCRIPTION"].tolist()[0]}' diff --git a/metplotpy/plots/tcmpr_plots/scatter/tcmpr_scatter.py b/metplotpy/plots/tcmpr_plots/scatter/tcmpr_scatter.py new file mode 100644 index 00000000..86636ba6 --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/scatter/tcmpr_scatter.py @@ -0,0 +1,245 @@ +# ============================* +# ** Copyright UCAR (c) 2020 +# ** University Corporation for Atmospheric Research (UCAR) +# ** National Center for Atmospheric Research (NCAR) +# ** Research Applications Lab (RAL) +# ** P.O.Box 3000, Boulder, Colorado, 80307-3000, USA +# ============================* + + +""" +Class Name: tcmpr.py + """ + +import os + +import plotly.graph_objects as go + +from metplotpy.plots.constants import PLOTLY_AXIS_LINE_COLOR, PLOTLY_AXIS_LINE_WIDTH, PLOTLY_PAPER_BGCOOR +from plots.tcmpr_plots.scatter.tcmpr_series_scatter import TcmprSeriesScatter +from plots.tcmpr_plots.tcmpr import Tcmpr +from plots.tcmpr_plots.tcmpr_util import get_dep_column + + +class TcmprScatter(Tcmpr): + """ Generates a Plotly box plot for 1 or more traces + where each box is represented by a text point data file. + """ + + def __init__(self, config_obj, column_info, col, case_data, input_df): + """ Creates a box plot, based on + settings indicated by parameters. + + Args: + + """ + + # init common layout + super().__init__(config_obj, column_info, col, case_data, input_df) + print("--------------------------------------------------------") + print("Creating Scatter plot") + print("Plot HFIP Baseline:" + self.cur_baseline) + + is_series_valid = len(self.config_obj.series_val_names) == 1 and self.config_obj.series_val_names[0] == 'LEAD' + is_indy_valid = self.config_obj.indy_var == 'LEAD' + if is_series_valid is False and is_indy_valid is True: + self.config_obj.parameters['series_val_1'] = {} + self.config_obj.parameters['series_val_1'][self.config_obj.indy_var] = self.config_obj.indy_vals + self.config_obj.series_vals_1 = [self.config_obj.indy_vals] + self.config_obj.all_series_vals = [self.config_obj.indy_vals] + self.config_obj.series_val_names = [self.config_obj.indy_var] + self.config_obj.indy_vals = [] + self.config_obj.indy_var = '' + self.config_obj.list_stat_1 = [] + self.config_obj.all_series_y1 = self.config_obj._get_all_series_y(1) + if len(self.config_obj.marker_list) != len(self.config_obj.all_series_y1): + self.config_obj.marker_list = ['circle'] * len(self.config_obj.all_series_y1) + if len(self.config_obj.linewidth_list) != len(self.config_obj.all_series_y1): + self.config_obj.linewidth_list = [1] * len(self.config_obj.all_series_y1) + if len(self.config_obj.linestyles_list) != len(self.config_obj.all_series_y1): + self.config_obj.linestyles_list = [None] * len(self.config_obj.all_series_y1) + if len(self.config_obj.user_legends) != len(self.config_obj.all_series_y1): + self.config_obj.user_legends = [str(i) for i in self.config_obj.all_series_vals[0]] + if len(self.config_obj.colors_list) != len(self.config_obj.all_series_y1): + self.config_obj.colors_list = self.config_obj.scatter_color_list[0: len(self.config_obj.all_series_y1)] + elif is_series_valid is True: + self.indy_vals = [] + self.indy_var = '' + self.list_stat_1 = [] + self.all_series_y1 = self.config_obj._get_all_series_y(1) + else: + raise ValueError("LEAD values are not specified") + + out_file_x = self.config_obj.scatter_x[-1].replace(')', '').replace('(', '_') + out_file_y = self.config_obj.scatter_y[-1].replace(')', '').replace('(', '_') + + if self.config_obj.prefix is None or len(self.config_obj.prefix) == 0: + self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{out_file_x}_vs_{out_file_y}_scatter.png" + else: + self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.prefix}.png" + + # remove the old file if it exist + if os.path.exists(self.plot_filename): + os.remove(self.plot_filename) + + self._adjust_titles() + + # Create a list of series objects. + # Each series object contains all the necessary information for plotting, + # such as line color, marker symbol, + # line width, and criteria needed to subset the input dataframe. + self.series_list = self._create_series(self.input_df) + + # create figure + # pylint:disable=assignment-from-no-return + # Need to have a self.figure that we can pass along to + # the methods in base_plot.py (BasePlot class methods) to + # create binary versions of the plot. + self._create_figure() + + def _adjust_titles(self): + for ind, scatter_x_val in enumerate(self.config_obj.scatter_x): + print("Processing scatter columns: " + scatter_x_val + " versus", self.config_obj.scatter_y[ind]) + # Get the data to be plotted + col_x = get_dep_column(scatter_x_val, self.column_info, self.input_df) + self.input_df['SCATTER_X'] = col_x['val'] + # Get the data to be plotted + col_y = get_dep_column(self.config_obj.scatter_y[ind], self.column_info, self.input_df) + self.input_df['SCATTER_Y'] = col_y['val'] + + if self.yaxis_1 is None or len(self.yaxis_1) == 0: + self.yaxis_1 = self.config_obj.scatter_y[ind] + " (" + col_y['units'] + ')' + if self.config_obj.xaxis == 'test x_label': + self.config_obj.xaxis = scatter_x_val + " (" + col_x['units'] + ')' + + if self.title is None or len(self.title) == 0: + self.title = "Scatter plot of
" + col_x['desc'] + '
versus ' + col_y['desc'] + + def _create_series(self, input_data): + """ + Generate all the series objects that are to be displayed as specified by the plot_disp + setting in the config file. The points are all ordered by datetime. Each series object + is represented by a box in the diagram, so they also contain information + for plot-related/appearance-related settings (which were defined in the config file). + + Args: + input_data: The input data in the form of a Pandas dataframe. + This data will be subset to reflect the series data of interest. + + Returns: + a list of series objects that are to be displayed + + + """ + series_list = [] + + # add series for y1 axis + for i, name in enumerate(self.config_obj.get_series_y(1)): + if not isinstance(name, list): + name = [name] + series_obj = TcmprSeriesScatter(self.config_obj, i, input_data, series_list, name) + series_list.append(series_obj) + + # reorder series + series_list = self.config_obj.create_list_by_series_ordering(series_list) + + return series_list + + def _create_figure(self): + """ Create a box plot from default and custom parameters""" + + self.figure = self._create_layout() + self._add_xaxis() + self._add_yaxis() + self._add_legend() + + for series in self.series_list: + # Don't generate the plot for this series if + # it isn't requested (as set in the config file) + if series.plot_disp: + self._draw_series(series) + + values = [*self.input_df['SCATTER_X'], *self.input_df['SCATTER_Y']] + # Draw a 1 to 1 reference line + if (self.elements_with_string(self.config_obj.scatter_x, '_WIND_') > 0 and self.elements_with_string( + self.config_obj.scatter_y, '_WIND_') > 0) \ + or (self.elements_with_string(self.config_obj.scatter_x, 'AMAX_WIND') > 0 and self.elements_with_string(self.config_obj.scatter_y, 'BMAX_WIND') > 0) \ + or (self.elements_with_string(self.config_obj.scatter_x, 'BMAX_WIND') > 0 and self.elements_with_string(self.config_obj.scatter_y, 'AMAX_WIND') > 0): + + xrange = [min(values) - 1, max(values) + 1] + yrange = [min(values) - 1, max(values) + 1] + self.figure.update_layout(yaxis={'range': yrange, 'autorange': False}) + self.figure.update_layout(xaxis={'range': xrange, 'autorange': False}) + + self.figure.add_trace( + go.Scatter(x=xrange, + y=yrange, + line={'color': '#7b7d7d', + 'dash': 'dash', + 'width': 1}, + showlegend=False, + mode='lines', + name='No-Skill' + )) + else: + xrange = [min(self.input_df['SCATTER_X']) - 1, max(self.input_df['SCATTER_X']) + 1] + yrange = [min(self.input_df['SCATTER_Y']) - 1, max(self.input_df['SCATTER_Y']) + 1] + self.figure.update_layout(yaxis={'range': yrange, 'autorange': False}) + self.figure.update_layout(xaxis={'range': xrange, 'autorange': False}) + # Draw a reference line at 0 + self.figure.add_hline(y=yrange[0], line_width=1, line_dash="dash", line_color="#7b7d7d") + + def _draw_series(self, series: TcmprSeriesScatter) -> None: + + # Create a point plot + self.figure.add_trace( + go.Scatter(x=series.series_data['SCATTER_X'], + y=series.series_data['SCATTER_Y'], + showlegend=True, + mode='markers', + name=self.config_obj.user_legends[series.idx], + marker=dict( + color=PLOTLY_PAPER_BGCOOR, + size=8, + line=dict( + color=self.config_obj.colors_list[series.idx], + width=1 + ) + ), + ), + secondary_y=series.y_axis != 1 + ) + + def _add_xaxis(self) -> None: + """ + Configures and adds x-axis to the plot + """ + self.figure.update_xaxes(title_text=self.config_obj.xaxis, + linecolor=PLOTLY_AXIS_LINE_COLOR, + linewidth=PLOTLY_AXIS_LINE_WIDTH, + showgrid=self.config_obj.grid_on, + ticks="outside", + zeroline=False, + gridwidth=self.config_obj.parameters['grid_lwd'], + gridcolor=self.config_obj.blended_grid_col, + automargin=True, + title_font={ + 'size': self.config_obj.x_title_font_size + }, + title_standoff=abs(self.config_obj.parameters['xlab_offset']), + tickangle=self.config_obj.x_tickangle, + tickfont={'size': self.config_obj.x_tickfont_size}, + tickformat='d', + tickmode='auto' + ) + # reverse xaxis if needed + if hasattr(self.config_obj, 'xaxis_reverse') and self.config_obj.xaxis_reverse is True: + self.figure.update_xaxes(autorange="reversed") + + @staticmethod + def elements_with_string(list_of_str, pattern): + count = 0 + for s in list_of_str: + if pattern in s: + count = count + 1 + return count diff --git a/metplotpy/plots/tcmpr_plots/tcmpr.py b/metplotpy/plots/tcmpr_plots/tcmpr.py new file mode 100644 index 00000000..46132c0b --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/tcmpr.py @@ -0,0 +1,661 @@ +# ============================* +# ** Copyright UCAR (c) 2020 +# ** University Corporation for Atmospheric Research (UCAR) +# ** National Center for Atmospheric Research (NCAR) +# ** Research Applications Lab (RAL) +# ** P.O.Box 3000, Boulder, Colorado, 80307-3000, USA +# ============================* + + +""" +Class Name: tcmpr.py + """ +import copy +import glob +import os +import sys +from datetime import datetime +from typing import Union + +import numpy as np +import pandas as pd +import plotly.graph_objects as go +import yaml +from plotly.graph_objects import Figure +from plotly.subplots import make_subplots + +import metcalcpy.util.utils as calc_util +from metcalcpy.event_equalize import event_equalize +from metplotpy.plots import util +from metplotpy.plots.base_plot import BasePlot +from metplotpy.plots.constants import PLOTLY_AXIS_LINE_COLOR, PLOTLY_AXIS_LINE_WIDTH, PLOTLY_PAPER_BGCOOR +from plots.tcmpr_plots.tcmpr_config import TcmprConfig +from plots.tcmpr_plots.tcmpr_series import TcmprSeries +from plots.tcmpr_plots.tcmpr_util import init_hfip_baseline, common_member, get_dep_column + +PLOTS_WITH_BASELINE = ['boxplot', 'point', 'mean', 'skill_mn'] + + +class Tcmpr(BasePlot): + """ Generates a Plotly box plot for 1 or more traces + where each box is represented by a text point data file. + """ + + def __init__(self, config_obj, column_info, col, case_data, input_df): + """ Creates a box plot, based on + settings indicated by parameters. + + Args: + @param parameters: dictionary containing user defined parameters + + """ + + # init common layout + super().__init__(None, "tcmpr_defaults.yaml") + + self.series_list = [] + + # instantiate a BoxConfig object, which holds all the necessary settings from the + # config file that represents the BasePlot object (Box). + self.config_obj = config_obj + + # Read in input data, location specified in config file + self.input_df = input_df + + # Read the TCMPR column information from a data file. + self.column_info = column_info + + self.cur_baseline = "no" + self.cur_baseline_data = None + + self.case_data = case_data + + self.col = col + self.title = self.config_obj.title + self.baseline_lead_time = 'lead' + self.yaxis_1 = self.config_obj.yaxis_1 + + self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.list_stat_1[0]}_{self.config_obj.plot_list}.png" + # Check that we have all the necessary settings for each series + # TODO implement the consistency check if no series values were specified + # is_config_consistent = self.config_obj._config_consistency_check() + # if not is_config_consistent: + # raise ValueError("The number of series defined by series_val_1/2 and derived curves is" + # " inconsistent with the number of settings" + # " required for describing each series. Please check" + # " the number of your configuration file's plot_i," + # " plot_disp, series_order, user_legend," + # " colors, and series_symbols settings.") + + # if series and indy_vals were not provided - use all values from the file + if len(self.config_obj.indy_vals) == 0 and self.config_obj.indy_var != '': + self.config_obj.indy_vals = sorted(self.input_df[self.config_obj.indy_var].unique()) + + def _create_series(self, input_data): + """ + Generate all the series objects that are to be displayed as specified by the plot_disp + setting in the config file. The points are all ordered by datetime. Each series object + is represented by a box in the diagram, so they also contain information + for plot-related/appearance-related settings (which were defined in the config file). + + Args: + input_data: The input data in the form of a Pandas dataframe. + This data will be subset to reflect the series data of interest. + + Returns: + a list of series objects that are to be displayed + + + """ + series_list = [] + + # add series for y1 axis + num_series_y1 = len(self.config_obj.get_series_y(1)) + for i, name in enumerate(self.config_obj.get_series_y(1)): + if not isinstance(name, list): + name = [name] + series_obj = TcmprSeries(self.config_obj, i, input_data, series_list, name) + series_list.append(series_obj) + + # add derived for y1 axis + for i, name in enumerate(self.config_obj.get_config_value('derived_series_1')): + # add default operation value if it is not provided + if len(name) == 2: + name.append("DIFF") + # include the series only if the name is valid + if len(name) == 3: + # add stat if needed + oper = name[2] + name[:] = [(s + ' ' + self.config_obj.list_stat_1[0]) if ' ' not in s else s for s in name[:2]] + name.append(oper) + series_obj = TcmprSeries(self.config_obj, num_series_y1 + i, input_data, series_list, name) + series_list.append(series_obj) + + # reorder series + series_list = self.config_obj.create_list_by_series_ordering(series_list) + + return series_list + + def _calc_stag_adjustments(self) -> list: + """ + Calculates the x-axis adjustment for each point if requested. + It needed so hte points and CIs for each x-axis values don't be placed on top of each other + + :return: the list of the adjustment values + """ + + # get the total number of series + num_stag = len(self.config_obj.all_series_y1) + + # calculate staggering values + + dbl_adj_scale = (len(self.config_obj.indy_vals) - 1) / 100 + stag_vals = np.linspace(-(num_stag / 2) * dbl_adj_scale, + (num_stag / 2) * dbl_adj_scale, + num_stag, + True) + stag_vals = stag_vals + dbl_adj_scale / 2 + return stag_vals + + def _add_hfip_baseline(self): + # Add HFIP baseline for each lead time + if self.cur_baseline_data is not None: + baseline_x_values = [] + baseline_y_values = [] + lead_times = self.config_obj.indy_vals + lead_times.sort() + for ind, lead in enumerate(lead_times): + # Get data for the current lead time + baseline_lead = self.cur_baseline_data[(self.cur_baseline_data['LEAD_HR'] == lead)][ + 'VALUE'].tolist() + if self.baseline_lead_time == 'ind': + current_leads = [ind] * len(baseline_lead) + else: + current_leads = [lead] * len(baseline_lead) + + baseline_x_values.extend(current_leads) + baseline_y_values.extend(baseline_lead) + + self.figure.add_trace( + go.Scatter(x=baseline_x_values, + y=baseline_y_values, + showlegend=True, + mode='markers', + textposition="top right", + name=self.cur_baseline, + marker=dict(size=8, + color='rgb(0,0,255)', + line=dict( + width=1, + color='rgb(0,0,255)' + ), + symbol='diamond-cross-open', + ) + ) + ) + + def _yaxis_limits(self) -> None: + """ + Apply limits on y2 axis if needed + """ + if len(self.config_obj.parameters['ylim']) > 0: + self.figure.update_layout(yaxis={'range': [self.config_obj.parameters['ylim'][0], + self.config_obj.parameters['ylim'][1]], + 'autorange': False}) + + def _create_layout(self) -> Figure: + """ + Creates a new layout based on the properties from the config file + including plots size, annotation and title + + :return: Figure object + """ + # create annotation + annotation_caption = {'text': util.apply_weight_style(self.config_obj.parameters['plot_caption'], + self.config_obj.parameters['caption_weight']), + 'align': 'left', + 'showarrow': False, + 'xref': 'paper', + 'yref': 'paper', + 'x': self.config_obj.parameters['caption_align'], + 'y': self.config_obj.caption_offset, + 'font': { + 'size': self.config_obj.caption_size, + 'color': self.config_obj.parameters['caption_col'] + } + } + annotation_subtitle = {'text': util.apply_weight_style(self.config_obj.subtitle, + 1), + 'align': 'center', + 'showarrow': False, + 'xref': 'paper', + 'yref': 'paper', + 'x': 0.5, + 'y': -0.26, + 'font': { + 'size': self.config_obj.caption_size, + 'color': self.config_obj.parameters['caption_col'] + } + } + + # create title + title = {'text': util.apply_weight_style(self.title, + self.config_obj.parameters['title_weight']), + 'font': { + 'size': self.config_obj.title_font_size, + }, + 'y': self.config_obj.title_offset, + 'x': self.config_obj.parameters['title_align'], + 'xanchor': 'center', + 'yanchor': 'top', + 'xref': 'paper' + } + + # create a layout and allow y2 axis + fig = make_subplots(specs=[[{"secondary_y": True}]]) + + # add size, annotation, title + fig.update_layout( + width=self.config_obj.plot_width, + height=self.config_obj.plot_height, + margin=self.config_obj.plot_margins, + paper_bgcolor=PLOTLY_PAPER_BGCOOR, + annotations=[annotation_caption, annotation_subtitle], + title=title, + plot_bgcolor=PLOTLY_PAPER_BGCOOR + ) + + return fig + + def _add_xaxis(self) -> None: + """ + Configures and adds x-axis to the plot + """ + self.figure.update_xaxes(title_text=self.config_obj.xaxis, + linecolor=PLOTLY_AXIS_LINE_COLOR, + linewidth=PLOTLY_AXIS_LINE_WIDTH, + showgrid=self.config_obj.grid_on, + ticks="outside", + zeroline=False, + gridwidth=self.config_obj.parameters['grid_lwd'], + gridcolor=self.config_obj.blended_grid_col, + automargin=True, + title_font={ + 'size': self.config_obj.x_title_font_size + }, + title_standoff=abs(self.config_obj.parameters['xlab_offset']), + tickangle=self.config_obj.x_tickangle, + tickfont={'size': self.config_obj.x_tickfont_size}, + tickformat='d' + ) + # reverse xaxis if needed + if hasattr(self.config_obj, 'xaxis_reverse') and self.config_obj.xaxis_reverse is True: + self.figure.update_xaxes(autorange="reversed") + + def _add_yaxis(self) -> None: + """ + Configures and adds y-axis to the plot + """ + self.figure.update_yaxes(title_text= + util.apply_weight_style(self.yaxis_1, + self.config_obj.parameters['ylab_weight']), + secondary_y=False, + linecolor=PLOTLY_AXIS_LINE_COLOR, + linewidth=PLOTLY_AXIS_LINE_WIDTH, + showgrid=self.config_obj.grid_on, + zeroline=False, + ticks="inside", + gridwidth=self.config_obj.parameters['grid_lwd'], + gridcolor=self.config_obj.blended_grid_col, + automargin=True, + title_font={ + 'size': self.config_obj.y_title_font_size + }, + title_standoff=self.config_obj.parameters['ylab_offset'], + tickangle=self.config_obj.y_tickangle, + tickfont={'size': self.config_obj.y_tickfont_size}, + exponentformat='none' + ) + + def _add_x2axis(self, vals) -> None: + """ + Creates x2axis based on the properties from the config file + and attaches it to the initial Figure + + """ + if self.config_obj.show_nstats: + # new_list = ['' +str(x) +''+'
AAA' for x in n_stats + + n_stats = [''] * len(self.config_obj.indy_vals) + + for ind, val_for_indy in enumerate(n_stats): + if self.config_obj.use_ee is True and len(self.series_list) > 0: + n_stats[ind] = n_stats[ind] + '' + str( + self.series_list[0].series_points['nstat'][ind]) + '
' + else: + for series in self.series_list: + n_stats[ind] = n_stats[ind] + '' + str( + series.series_points['nstat'][ind]) + '
' + + self.figure.update_layout(xaxis2={'title_text': util.apply_weight_style('', + self.config_obj.parameters[ + 'x2lab_weight'] + ), + 'linecolor': PLOTLY_AXIS_LINE_COLOR, + 'linewidth': PLOTLY_AXIS_LINE_WIDTH, + 'overlaying': 'x', + 'side': 'top', + 'showgrid': False, + 'zeroline': False, + 'title_font': {'size': self.config_obj.x2_title_font_size}, + 'title_standoff': abs(self.config_obj.parameters['x2lab_offset']), + 'tickmode': 'array', + 'tickvals': vals, + 'ticktext': n_stats, + 'tickangle': self.config_obj.x2_tickangle, + 'tickfont': {'size': self.config_obj.x2_tickfont_size}, + 'scaleanchor': 'x' + } + ) + # reverse x2axis if needed + if self.config_obj.xaxis_reverse is True: + self.figure.update_layout(xaxis2={'autorange': "reversed"}) + + # need to add an invisible line with all values = None + self.figure.add_trace( + go.Scatter(y=[None] * len(vals), x=vals, + xaxis='x2', showlegend=False) + ) + + def _add_legend(self) -> None: + """ + Creates a plot legend based on the properties from the config file + and attaches it to the initial Figure + """ + self.figure.update_layout(legend={'x': self.config_obj.bbox_x, + 'y': self.config_obj.bbox_y, + 'xanchor': 'center', + 'yanchor': 'top', + 'bordercolor': self.config_obj.legend_border_color, + 'borderwidth': self.config_obj.legend_border_width, + 'orientation': self.config_obj.legend_orientation, + 'font': { + 'size': self.config_obj.legend_size, + 'color': "black" + }, + 'traceorder': 'normal' + }) + if hasattr(self.config_obj, 'xaxis_reverse') and self.config_obj.xaxis_reverse is True: + self.figure.update_layout(legend={'traceorder': 'reversed'}) + + def save_to_file(self): + """Saves the image to a file specified in the config file. + Prints a message if fails + + Args: + + Returns: + + """ + + # Create the directory for the output plot if it doesn't already exist + dirname = os.path.dirname(os.path.abspath(self.plot_filename)) + if not os.path.exists(dirname): + os.mkdir(dirname) + print(f'Creating image file: {self.plot_filename}') + if self.figure: + try: + self.figure.write_image(file=self.plot_filename, format='png', + width=self.config_obj.plot_width, + height=self.config_obj.plot_height, + scale=2) + except FileNotFoundError: + print("Can't save to file " + self.plot_filename) + except ValueError as ex: + print(ex) + else: + print("Oops! The figure was not created. Can't save.") + + @staticmethod + def find_min_max(series: TcmprSeries, yaxis_min: Union[float, None], + yaxis_max: Union[float, None]) -> tuple: + """ + Finds min and max value between provided min and max and y-axis CI values of this series + if yaxis_min or yaxis_max is None - min/max value of the series is returned + + :param series: series to use for calculations + :param yaxis_min: previously calculated min value + :param yaxis_max: previously calculated max value + :return: a tuple with calculated min/max + """ + # calculate series upper and lower limits of CIs + + # Skip lead times for which no data is found + if len(series.series_data) == 0: + return yaxis_min, yaxis_max + + # Get the values to be plotted for this lead times + all_values = series.series_points['val'] + + if 'ncl' in series.series_points: + all_values = all_values + series.series_points['ncl'] + if 'ncu' in series.series_points: + all_values = all_values + series.series_points['ncu'] + + if len(all_values) == 0: + return yaxis_min, yaxis_max + + low_range = min([v for v in all_values if v is not None]) + upper_range = max([v for v in all_values if v is not None]) + + # find min max + if yaxis_min is None or yaxis_max is None: + return low_range, upper_range + + return min(yaxis_min, low_range), max(yaxis_max, upper_range) + + +def main(config_filename=None): + """ + Generates a sample, default, box plot using a combination of + default and custom config files on sample data found in this directory. + The location of the input data is defined in either the default or + custom config file. + Args: + @param config_filename: default is None, the name of the custom config file to apply + """ + + # Retrieve the contents of the custom config file to over-ride + # or augment settings defined by the default config file. + # with open("./custom_box.yaml", 'r') as stream: + if not config_filename: + config_file = util.read_config_from_command_line() + else: + config_file = config_filename + with open(config_file, 'r') as stream: + try: + docs = yaml.load(stream, Loader=yaml.FullLoader) + except yaml.YAMLError as exc: + print(exc) + + # Determine location of the default YAML config files and then + # read defaults stored in YAML formatted file into the dictionary + if 'METPLOTPY_BASE' in os.environ: + location = os.path.join(os.environ['METPLOTPY_BASE'], 'metplotpy/plots/config') + else: + location = os.path.realpath(os.path.join(os.path.dirname(os.path.dirname(__file__)), 'config')) + + with open(os.path.join(location, "tcmpr_defaults.yaml"), 'r') as stream: + try: + defaults = yaml.load(stream, Loader=yaml.FullLoader) + except yaml.YAMLError as exc: + print(exc) + + # merge user defined parameters into defaults if they exist + docs = {**defaults, **docs} + + config_obj = TcmprConfig(docs) + + tcst_files = [] + # list all .tcst files in tcst_dir + if config_obj.tcst_dir is not None and len(config_obj.tcst_dir) > 0 and os.path.exists(config_obj.tcst_dir): + tcst_files = glob.glob(config_obj.tcst_dir + 'tc_pairs*.tcst') + # add specific files + for file in config_obj.tcst_files: + if file not in tcst_files: + tcst_files.append(file) + + input_df = read_tcst_files(config_obj, tcst_files) + + # Apply event equalization, if requested + if config_obj.use_ee is True: + output_data = pd.DataFrame() + series = copy.deepcopy(config_obj.parameters['series_val_1']) + if 'skill_mn' in config_obj.plot_list or 'skill_md' in config_obj.plot_list: + series['AMODEL'].extend(config_obj.skill_ref) + + for series_var, series_var_vals in series.items(): + series_data = input_df[input_df[series_var].isin(series_var_vals)] + series_data = event_equalize(series_data, '', config_obj.parameters['series_val_1'], [], [], True, False) + if output_data.empty: + output_data = series_data + else: + output_data.append(series_data) + + input_df = output_data + # input_df = output_data.copy(deep=True) + # output_data = output_data.drop(columns=['equalize', 'VALID_TIME']) + # output_data.to_csv('/Users/tatiana/PycharmProjects/METplotpy/metplotpy/plots/tcmpr_plots/tc_pairs_2.tcst', index=False, sep='\t', na_rep='NA') + + input_df.rename({'equalize': 'CASE'}, axis=1, inplace=True) + # Sort the data by the CASE column + input_df = input_df.sort_values(by=['CASE', 'AMODEL']) + input_df.reset_index(drop=True, inplace=True) + + # Define a demo and retro column + # TODO these values neve get used - maybe need to remove + if config_obj.demo_yr is not None and config_obj.demo_yr != 'NA': + demo_yr_obj = datetime.strptime(str(config_obj.demo_yr), '%Y') + input_df.loc[input_df['VALID_TIME'] >= demo_yr_obj, "TYPE"] = "DEMO" + input_df.loc[input_df['VALID_TIME'] < demo_yr_obj, "TYPE"] = "RETRO" + + print_data_info(input_df, config_obj.series_val_names[0]) + + # Read the TCMPR column information from a data file. + column_info = pd.read_csv(os.path.join(sys.path[0], config_obj.column_info_file), + sep=r'\s+', header='infer', + quotechar='"', skipinitialspace=True, encoding='utf-8') + + col_to_plot = get_dep_column(config_obj.list_stat_1[0], column_info, input_df) + input_df['PLOT'] = col_to_plot['val'] + + baseline_data = None + if common_member(config_obj.plot_list, PLOTS_WITH_BASELINE): + baseline_data = init_hfip_baseline(config_obj, config_obj.baseline_file, input_df) + + plot = None + common_case_data = None + for plot_type in config_obj.plot_list: + try: + if plot_type == 'boxplot': + from plots.tcmpr_plots.box.tcmpr_box import TcmprBox + plot = TcmprBox(config_obj, column_info, col_to_plot, common_case_data, input_df, baseline_data) + elif plot_type == 'point': + from plots.tcmpr_plots.box.tcmpr_point import TcmprPoint + plot = TcmprPoint(config_obj, column_info, col_to_plot, common_case_data, input_df, baseline_data) + elif plot_type == 'mean': + from plots.tcmpr_plots.line.mean.tcmpr_line_mean import TcmprLineMean + plot = TcmprLineMean(config_obj, column_info, col_to_plot, common_case_data, input_df, baseline_data) + elif plot_type == 'median': + from plots.tcmpr_plots.line.median.tcmpr_line_median import TcmprLineMedian + plot = TcmprLineMedian(config_obj, column_info, col_to_plot, common_case_data, input_df) + elif plot_type == 'relperf': + from plots.tcmpr_plots.relperf.tcmpr_relperf import TcmprRelPerf + plot = TcmprRelPerf(config_obj, column_info, col_to_plot, common_case_data, input_df) + elif plot_type == 'rank': + from plots.tcmpr_plots.rank.tcmpr_rank import TcmprRank + plot = TcmprRank(config_obj, column_info, col_to_plot, common_case_data, input_df) + elif plot_type == 'scatter': + from plots.tcmpr_plots.scatter.tcmpr_scatter import TcmprScatter + plot = TcmprScatter(config_obj, column_info, col_to_plot, common_case_data, input_df) + elif plot_type == 'skill_mn': + from plots.tcmpr_plots.skill.mean.tcmpr_skill_mean import TcmprSkillMean + plot = TcmprSkillMean(config_obj, column_info, col_to_plot, common_case_data, input_df, baseline_data) + elif plot_type == 'skill_md': + from plots.tcmpr_plots.skill.median.tcmpr_skill_median import TcmprSkillMedian + plot = TcmprSkillMedian(config_obj, column_info, col_to_plot, common_case_data, input_df) + + plot.save_to_file() + plot.show_in_browser() + if common_case_data is None: + common_case_data = plot.case_data + + except (ValueError, Exception) as ve: + print(ve) + + +def print_data_info(input_df, series): + # Print information about the dataset. + info_list = ["AMODEL", "BMODEL", "BASIN", "CYCLONE", + "STORM_NAME", "LEAD_HR", "LEVEL", "WATCH_WARN"] + for info in info_list: + uniq_list = input_df[info].unique() + if pd.isna(uniq_list).any(): + vals = 'NA' + else: + vals = ','.join(map(str, uniq_list)) + print(f'Found {len(uniq_list)} unique entries for {info}: {vals}') + # Get the unique series entries from the data + series_uniq = input_df[series].unique() + + # List unique series entries + print( + f'Found {len(series_uniq)} unique value(s) for the {series} series: {",".join(map(str, series_uniq))}') + + +def read_tcst_files(config_obj, tcst_files): + all_fields_values = copy.deepcopy(config_obj.parameters['series_val_1']) + all_fields_values.update(config_obj.parameters['fixed_vars_vals_input']) + if 'skill_mn' in config_obj.plot_list or 'skill_md' in config_obj.plot_list: + all_fields_values['AMODEL'].extend(config_obj.skill_ref) + input_df = None + for file in tcst_files: + if os.path.exists(file): + print(f'Reading track data:{file}') + file_df = pd.read_csv(file, sep=r'\s+|;|:', header='infer', engine="python") + file_df['LEAD_HR'] = file_df['LEAD'] / 10000 + file_df['LEAD_HR'] = file_df['LEAD_HR'].astype('int') + all_filters = [] + # create a set of filters + + for field, value in all_fields_values.items(): + filter_list = value + for i, filter_val in enumerate(filter_list): + if calc_util.is_string_integer(filter_val): + filter_list[i] = int(filter_val) + elif calc_util.is_string_strictly_float(filter_val): + filter_list[i] = float(filter_val) + + all_filters.append(file_df[field].isin(filter_list)) + + all_filters.append(file_df['LEAD_HR'].isin(config_obj.parameters['indy_vals'])) + + # use numpy to select the rows where any record evaluates to True + mask = np.array(all_filters).all(axis=0) + + file_df['VALID_TIME'] = pd.to_datetime(file_df['VALID'], format='%Y%m%d_%H%M%S') # 20170417_060000 + # Define a case column + file_df['equalize'] = file_df.loc[:, 'BMODEL'].astype(str) \ + + ':' + file_df.loc[:, 'STORM_ID'].astype(str) \ + + ':' + file_df.loc[:, 'INIT'].astype(str) \ + + ':' + file_df.loc[:, 'LEAD_HR'].astype(str) \ + + ':' + file_df.loc[:, 'VALID'].astype(str) + if input_df is None: + input_df = file_df.loc[mask] + else: + input_df = pd.concat([input_df, file_df.loc[mask]]) + return input_df + + +if __name__ == "__main__": + main() diff --git a/metplotpy/plots/tcmpr_plots/tcmpr_config.py b/metplotpy/plots/tcmpr_plots/tcmpr_config.py new file mode 100644 index 00000000..4c177d31 --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/tcmpr_config.py @@ -0,0 +1,501 @@ +# ============================* +# ** Copyright UCAR (c) 2020 +# ** University Corporation for Atmospheric Research (UCAR) +# ** National Center for Atmospheric Research (NCAR) +# ** Research Applications Lab (RAL) +# ** P.O.Box 3000, Boulder, Colorado, 80307-3000, USA +# ============================* + + +""" +Class Name: tcmpr_config.py + +Holds values set in the box plot config file(s) +""" + +import itertools + +import metcalcpy.util.utils as utils +from .. import constants +from .. import util +from ..config import Config + + +class TcmprConfig(Config): + """ + Prepares and organises Line plot parameters + """ + + def __init__(self, parameters: dict) -> None: + """ Reads in the plot settings from a box plot config file. + + Args: + @param parameters: dictionary containing user defined parameters + + """ + super().__init__(parameters) + + ############################################## + self.plot_list = self._get_plot() + self.tcst_files = self._get_tcst_files() + self.tcst_dir = self._get_tcst_dir() + self.rp_diff = self._get_rp_diff() + self.hfip_bsln = self._get_hfip_bsln() + self.footnote_flag = self._get_bool('footnote_flag') + self.n_min = self.get_config_value('n_min') + self.scatter_x = self.get_config_value('scatter_x') + self.scatter_y = self.get_config_value('scatter_y') + self.demo_yr = self.get_config_value('demo_yr') # not used in Rscript. not sure if we need it + self.alpha = self.get_config_value('alpha') + + # Check the relative scatter settings + if len(self.scatter_x) != len(self.scatter_y): + raise ValueError("ERROR: The number of scatter_x and scatter_y variables specified must match each other") + self.skill_ref = self.get_config_value('skill_ref') + + ############################################## + + # plot parameters + self.grid_on = self._get_bool('grid_on') + self.plot_width = self.calculate_plot_dimension('plot_width', 'pixels') + self.plot_height = self.calculate_plot_dimension('plot_height', 'pixels') + self.plot_margins = self.parameters['mar'] + self.blended_grid_col = util.alpha_blending(self.parameters['grid_col'], 0.5) + self.plot_stat = self._get_plot_stat() + self.show_nstats = self._get_bool('show_nstats') + self.xaxis_reverse = self._get_bool('xaxis_reverse') + self.sync_yaxes = self._get_bool('sync_yaxes') + self.create_html = self._get_bool('create_html') + + ############################################## + # caption parameters + self.caption_size = int(constants.DEFAULT_CAPTION_FONTSIZE + * self.get_config_value('caption_size')) + self.caption_offset = self.parameters['caption_offset'] - 3.1 + + ############################################## + # title parameters + self.title_font_size = self.parameters['title_size'] * constants.DEFAULT_TITLE_FONT_SIZE + self.title_offset = self.parameters['title_offset'] * constants.DEFAULT_TITLE_OFFSET + self.y_title_font_size = self.parameters['ylab_size'] + constants.DEFAULT_TITLE_FONTSIZE + + ############################################## + # y-axis parameters + self.y_tickangle = self.parameters['ytlab_orient'] + if self.y_tickangle in constants.YAXIS_ORIENTATION.keys(): + self.y_tickangle = constants.YAXIS_ORIENTATION[self.y_tickangle] + self.y_tickfont_size = self.parameters['ytlab_size'] + constants.DEFAULT_TITLE_FONTSIZE + + ############################################## + + # x-axis parameters + self.x_title_font_size = self.parameters['xlab_size'] + constants.DEFAULT_TITLE_FONTSIZE + self.x_tickangle = self.parameters['xtlab_orient'] + if self.x_tickangle in constants.XAXIS_ORIENTATION.keys(): + self.x_tickangle = constants.XAXIS_ORIENTATION[self.x_tickangle] + self.x_tickfont_size = self.parameters['xtlab_size'] + constants.DEFAULT_TITLE_FONTSIZE + self.xaxis = util.apply_weight_style(self.xaxis, self.parameters['xlab_weight']) + + ############################################## + # x2-axis parameters + self.x2_title_font_size = self.parameters['x2lab_size'] + constants.DEFAULT_TITLE_FONTSIZE + self.x2_tickangle = self.parameters['x2tlab_orient'] + if self.x2_tickangle in constants.XAXIS_ORIENTATION.keys(): + self.x2_tickangle = constants.XAXIS_ORIENTATION[self.x2_tickangle] + self.x2_tickfont_size = self.parameters['x2tlab_size'] + constants.DEFAULT_TITLE_FONTSIZE + + ############################################## + # series parameters + self.series_ordering = self.get_config_value('series_order') + # Make the series ordering zero-based + self.series_ordering_zb = [sorder - 1 for sorder in self.series_ordering] + self.plot_disp = self._get_plot_disp() + self.series_ci = self._get_series_ci() + self.colors_list = self._get_colors() + self.all_series_y1 = self._get_all_series_y(1) + self.num_series = self.calculate_number_of_series() + self.linewidth_list = self._get_linewidths() + self.linestyles_list = self._get_linestyles() + self.marker_list = self._get_markers() + self.marker_size = self._get_markers_size() + + ############################################## + # legend parameters + self.user_legends = self._get_user_legends() + self.bbox_x = 0.5 + self.parameters['legend_inset']['x'] + self.bbox_y = -0.12 + self.parameters['legend_inset']['y'] + 0.25 + self.legend_size = int(constants.DEFAULT_LEGEND_FONTSIZE * self.parameters['legend_size']) + if self.parameters['legend_box'].lower() == 'n': + self.legend_border_width = 0 # Don't draw a box around legend labels + else: + self.legend_border_width = 2 # Enclose legend labels in a box + + if self.parameters['legend_ncol'] == 1: + self.legend_orientation = 'v' + else: + self.legend_orientation = 'h' + self.legend_border_color = "black" + + self.box_avg = self._get_bool('box_avg') + self.box_notch = self._get_bool('box_notch') + + self.plot_dir = self.parameters['plot_dir'] + self.prefix = self.parameters['prefix'] + self.subtitle = self._get_subtitle() + self.baseline_file = self.parameters['baseline_file'] + self.column_info_file = self.parameters['column_info_file'] + + # TODO validate list_stat_1. It should have only one element for now + # TODO validate series_vals_1. It should have only one key element for now + + def _get_subtitle(self): + if self.parameters['subtitle'] is not None and len(self.parameters['subtitle']) > 0: + return self.parameters['subtitle'] + else: + filter_str = '' + for key, value in self.parameters['series_val_1'].items(): + csv = ','.join(value) + filter_str = f'{filter_str} -{key} {csv}' + for key, value in self.parameters['fixed_vars_vals_input'].items(): + csv = ','.join(value) + filter_str = f'{filter_str} -{key} {csv}' + return filter_str + + def _get_tcst_dir(self): + return self.get_config_value('tcst_dir') + + def _get_linestyles(self) -> list: + """ + Retrieve all the line styles. Convert line style names from + the config file into plotly python's line style names. + + Args: + + Returns: + line_styles: a list of the plotly line styles + """ + line_styles = self.get_config_value('series_line_style') + line_style_list = [] + for line_style in line_styles: + if line_style in constants.LINE_STYLE_TO_PLOTLY_DASH.keys(): + line_style_list.append(constants.LINE_STYLE_TO_PLOTLY_DASH[line_style]) + else: + line_style_list.append(None) + return self.create_list_by_series_ordering(line_style_list) + + def _get_markers(self) -> list: + """ + Retrieve all the markers. Convert marker names from + the config file into plotly python's marker names. + + Args: + + Returns: + markers: a list of the plotly markers + """ + markers = self.get_config_value('series_symbols') + markers_list = [] + for marker in markers: + if marker in constants.AVAILABLE_PLOTLY_MARKERS_LIST: + # the recognized plotly marker names: + # circle-open (for small circle), circle, triangle-up, + # square, diamond, or hexagon + markers_list.append(marker) + else: + markers_list.append(constants.PCH_TO_PLOTLY_MARKER[marker]) + return self.create_list_by_series_ordering(markers_list) + + def _get_markers_size(self) -> list: + """ + Retrieve all the markers. Convert marker names from + the config file into plotly python's marker names. + + Args: + + Returns: + markers: a list of the plotly markers + """ + markers_size = self.get_config_value('series_symbols_size') + return self.create_list_by_series_ordering(markers_size) + + def _get_plot(self) -> list: + plot_list = self.get_config_value('plot_list') + # TODO validate plots BOXPLOT, POINT, MEAN, MEDIAN, RELPERF, RANK, SKILL_MN, SKILL_MD + return plot_list + + def _get_tcst_files(self) -> list: + tcst_files = self.get_config_value('tcst_files') + return tcst_files + + def _get_rp_diff(self) -> list: + rp_diff = self.get_config_value('rp_diff') + # TODO validate rp_diff + if len(rp_diff) == 1: + rp_diff = [rp_diff[0]] * len(self.indy_vals) + return rp_diff + + def _get_hfip_bsln(self) -> str: + hfip_bsln = str(self.get_config_value('hfip_bsln')) + # TODO validate hfip_bsln (no, 0, 5, 10 year goal) + return hfip_bsln + + def _get_plot_disp(self) -> list: + """ + Retrieve the values that determine whether to display a particular series + and convert them to bool if needed + + Args: + + Returns: + A list of boolean values indicating whether or not to + display the corresponding series + """ + + plot_display_config_vals = self.get_config_value('plot_disp') + plot_display_bools = [] + for val in plot_display_config_vals: + if isinstance(val, bool): + plot_display_bools.append(val) + + if isinstance(val, str): + plot_display_bools.append(val.upper() == 'TRUE') + + return self.create_list_by_series_ordering(plot_display_bools) + + def _get_series_ci(self) -> list: + """ + Retrieve the values that determine whether to display a particular series + and convert them to bool if needed + + Args: + + Returns: + A list of boolean values indicating whether or not to + display the corresponding series + """ + + series_ci_config_vals = self.get_config_value('series_ci') + series_ci_bools = [] + for val in series_ci_config_vals: + if isinstance(val, bool): + series_ci_bools.append(val) + + if isinstance(val, str): + series_ci_bools.append(val.upper() == 'TRUE') + + return self.create_list_by_series_ordering(series_ci_bools) + + def _get_fcst_vars(self, index): + """ + Retrieve a list of the inner keys (fcst_vars) to the fcst_var_val dictionary. + + Args: + index: identifier used to differentiate between fcst_var_val_1 and + fcst_var_val_2 config settings + Returns: + a list containing all the fcst variables requested in the + fcst_var_val setting in the config file. This will be + used to subset the input data that corresponds to a particular series. + + """ + if index == 1: + fcst_var_val_dict = self.get_config_value('fcst_var_val_1') + if not fcst_var_val_dict: + fcst_var_val_dict = {} + elif index == 2: + fcst_var_val_dict = self.get_config_value('fcst_var_val_2') + if not fcst_var_val_dict: + fcst_var_val_dict = {} + else: + fcst_var_val_dict = {} + + return fcst_var_val_dict + + def _get_plot_stat(self) -> str: + """ + Retrieves the plot_stat setting from the config file. + There will be many statistics values + (ie stat_name=PODY or stat_name=FAR in data file) that + correspond to a specific time (init or valid, as specified + in the config file), for a combination of + model, vx_mask, fcst_var,etc. We require a single value + to represent this combination. Acceptable values are sum, mean, and median. + + Returns: + stat_to_plot: one of the following values for the plot_stat: MEAN, MEDIAN, or SUM + """ + + accepted_stats = ['MEAN', 'MEDIAN', 'SUM'] + stat_to_plot = self.get_config_value('plot_stat').upper() + + if stat_to_plot not in accepted_stats: + raise ValueError( + "An unsupported statistic was set for the plot_stat setting. Supported values are sum, mean, and median.") + return stat_to_plot + + def _config_consistency_check(self) -> bool: + """ + Checks that the number of settings defined for plot_ci, + plot_disp, series_order, user_legend colors, and series_symbols + are consistent. + + Args: + + Returns: + True if the number of settings for each of the above + settings is consistent with the number of + series (as defined by the cross product of the model + and vx_mask defined in the series_val_1 setting) + + """ + # Determine the number of series based on the number of + # permutations from the series_var setting in the + # config file + + # Numbers of values for other settings for series + num_plot_disp = len(self.plot_disp) + num_series_ord = len(self.series_ordering) + num_colors = len(self.colors_list) + num_legends = len(self.user_legends) + status = False + + if self.num_series == num_plot_disp == \ + num_series_ord == num_colors \ + == num_legends: + status = True + return status + + def _get_user_legends(self, legend_label_type: str = '') -> list: + """ + Retrieve the text that is to be displayed in the legend at the bottom of the plot. + Each entry corresponds to a series. + + Args: + @parm legend_label_type: The legend label, such as 'Performance' that indicates + the type of series box. Used when the user hasn't + indicated a legend. + + Returns: + a list consisting of the series label to be displayed in the plot legend. + + """ + + all_user_legends = self.get_config_value('user_legend') + legend_list = [] + + # create legend list for y-axis series + + series = self.get_series_y(1) + for idx, ser_components in enumerate(series): + if idx >= len(all_user_legends) or all_user_legends[idx].strip() == '': + # user did not provide the legend - create it + legend_list.append(ser_components[0]) + else: + # user provided a legend - use it + legend_list.append(all_user_legends[idx]) + + # add to legend list legends for y2-axis series + num_series_y1 = len(self.get_series_y(1)) + + for idx, ser_components in enumerate(self.get_config_value('derived_series_1')): + # index of the legend + legend_idx = idx + num_series_y1 + if legend_idx >= len(all_user_legends) or all_user_legends[legend_idx].strip() == '': + # user did not provide the legend - create it + legend_list.append(utils.get_derived_curve_name(ser_components)) + else: + # user provided a legend - use it + legend_list.append(all_user_legends[legend_idx]) + + return self.create_list_by_series_ordering(legend_list) + + def get_series_y(self, axis: int) -> list: + """ + Creates an array of series components (excluding derived) tuples for the specified y-axis + :param axis: y-axis (1 or 2) + :return: an array of series components tuples + """ + all_fields_values_orig = self.get_config_value('series_val_' + str(axis)).copy() + all_fields_values = {} + for x in reversed(list(all_fields_values_orig.keys())): + all_fields_values[x] = all_fields_values_orig.get(x) + + all_fields_values['fcst_var'] = self.list_stat_1 + + return utils.create_permutations_mv(all_fields_values, 0) + + def get_series_y_relperf(self, axis: int) -> list: + """ + Creates an array of series components (excluding derived) tuples for the specified y-axis + :param axis: y-axis (1 or 2) + :return: an array of series components tuples + """ + all_fields_values_orig = self.get_config_value('series_val_' + str(axis)).copy() + all_fields_values = {} + for x in reversed(list(all_fields_values_orig.keys())): + all_fields_values[x] = all_fields_values_orig.get(x) + + all_fields_values['fcst_var'] = ['Better'] + + return utils.create_permutations_mv(all_fields_values, 0) + + def _get_all_series_y(self, axis: int) -> list: + """ + Creates an array of all series (including derived) components tuples + for the specified y-axis + :param axis: y-axis (1 or 2) + :return: an array of series components tuples + """ + all_series = self.get_series_y(axis) + + # add derived series if exist + if self.get_config_value('derived_series_' + str(axis)): + all_series = all_series + self.get_config_value('derived_series_' + str(axis)) + + return all_series + + def calculate_number_of_series(self) -> int: + """ + From the number of items in the permutation list, + determine how many series "objects" are to be plotted. + + Args: + + Returns: + the number of series + + """ + # Retrieve the lists from the series_val_1 dictionary + series_vals_list = self.series_vals_1.copy() + if isinstance(self.fcst_var_val_1, list) is True: + fcst_vals = self.fcst_var_val_1 + elif isinstance(self.fcst_var_val_1, dict) is True: + fcst_vals = list(self.fcst_var_val_1.values()) + else: + fcst_vals = list() + fcst_vals_flat = [item for sublist in fcst_vals for item in sublist] + series_vals_list.append(fcst_vals_flat) + + # Utilize itertools' product() to create the cartesian product of all elements + # in the lists to produce all permutations of the series_val values and the + # fcst_var_val values. + permutations = list(itertools.product(*series_vals_list)) + + if self.series_vals_2: + series_vals_list_2 = self.series_vals_2.copy() + if isinstance(self.fcst_var_val_2, list) is True: + fcst_vals_2 = self.fcst_var_val_2 + elif isinstance(self.fcst_var_val_2, dict) is True: + fcst_vals_2 = list(self.fcst_var_val_2.values()) + else: + fcst_vals_2 = list() + fcst_vals_2_flat = [item for sublist in fcst_vals_2 for item in sublist] + series_vals_list_2.append(fcst_vals_2_flat) + permutations_2 = list(itertools.product(*series_vals_list_2)) + permutations.extend(permutations_2) + + total = len(permutations) + # add derived + total = total + len(self.get_config_value('derived_series_1')) + + return total diff --git a/metplotpy/plots/tcmpr_plots/tcmpr_series.py b/metplotpy/plots/tcmpr_plots/tcmpr_series.py new file mode 100644 index 00000000..8ea3d5d4 --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/tcmpr_series.py @@ -0,0 +1,252 @@ +# ============================* +# ** Copyright UCAR (c) 2020 +# ** University Corporation for Atmospheric Research (UCAR) +# ** National Center for Atmospheric Research (NCAR) +# ** Research Applications Lab (RAL) +# ** P.O.Box 3000, Boulder, Colorado, 80307-3000, USA +# ============================* + + +""" +Class Name: TcmprSeries + """ + +import re +from typing import Union + +import numpy as np +import pandas as pd +from pandas import DataFrame + +import metcalcpy.util.utils as utils +from .tcmpr_util import get_prop_ci +from ..series import Series + + +class TcmprSeries(Series): + """ + Represents a tcmpr plot series object + of data points and their plotting style + elements (line colors, etc.) + + """ + + def __init__(self, config, idx: int, input_data, series_list: list, + series_name: Union[list, tuple], skill_ref_data: DataFrame = None): + self.series_list = series_list + self.series_name = series_name + self.rank_min_val = [] + self.series_len = len(config.get_series_y(1)) + len(config.get_config_value('derived_series_1')) + self.skill_ref_data = skill_ref_data + if idx >= self.series_len: + super().__init__(config, 0, input_data, 1) + else: + super().__init__(config, idx, input_data, 1) + self.idx = idx + + def _create_all_fields_values_no_indy(self) -> dict: + """ + Creates a dictionary with two keys that represents each axis + values - dictionaries of field values pairs of all series variables (without indy variable) + :return: dictionary with field-values pairs for each axis + """ + all_fields_values_no_indy = {} + + all_fields_values_orig = self.config.get_config_value('series_val_1').copy() + all_fields_values = {} + for x in reversed(list(all_fields_values_orig.keys())): + all_fields_values[x] = all_fields_values_orig.get(x) + + if self.config._get_fcst_vars(1): + all_fields_values['fcst_var'] = list(self.config._get_fcst_vars(1).keys()) + all_fields_values_no_indy[1] = all_fields_values + + return all_fields_values_no_indy + + def _create_series_points(self) -> dict: + """ + Subset the data for the appropriate series. + Calculate values for each point including CI + + Args: + + Returns: + dictionary with CI ,point values and number of stats as keys + """ + + self._init_series_data() + + series_points_results = {'val': [], 'ncl': [], 'ncu': [], 'nstat': [], 'mean': []} + + # for each point calculate plot statistic + for indy in self.config.indy_vals: + if utils.is_string_integer(indy): + indy = int(indy) + elif utils.is_string_strictly_float(indy): + indy = float(indy) + point_data = self.series_data.loc[ + (self.series_data["LEAD_HR"] == indy)] + point_data = point_data.sort_values(by=['CASE']) + + series_points_results['nstat'].append(len(point_data.index)) + if len(point_data) == 0: + series_points_results['mean'].append(None) + else: + series_points_results['mean'].append(np.nanmean(point_data['PLOT'].tolist())) + + return series_points_results + + def _init_series_data(self): + # different ways to subset data for normal and derived series + + if len(self.series_name) == 1 or self.series_name[-1] not in utils.OPERATION_TO_SIGN.keys(): + # this is a normal series + all_filters = [] + + # create a set of filters for this series + for field_ind, field in enumerate(self.all_fields_values_no_indy[self.y_axis].keys()): + filter_value = self.series_name[field_ind] + if isinstance(filter_value, str) and utils.GROUP_SEPARATOR in filter_value: + filter_list = re.findall(utils.DATE_TIME_REGEX, filter_value) + if len(filter_list) == 0: + filter_list = filter_value.split(utils.GROUP_SEPARATOR) + # add the original value + filter_list.append(filter_value) + else: + filter_list = [filter_value] + for i, filter_val in enumerate(filter_list): + if utils.is_string_integer(filter_val): + filter_list[i] = int(filter_val) + elif utils.is_string_strictly_float(filter_val): + filter_list[i] = float(filter_val) + + all_filters.append((self.input_data[field].isin(filter_list))) + + # use numpy to select the rows where any record evaluates to True + mask = np.array(all_filters).all(axis=0) + self.series_data = self.input_data.loc[mask] + + # sort data by date/time/storm - needed for CI calculations + self.series_data = self.series_data.sort_values(['VALID', 'LEAD', 'STORM_ID']) + + else: + # this is a derived series + + # the name of the 1st series + series_name_1 = self.series_name[0].split() + # the name of the 2nd series + series_name_2 = self.series_name[1].split() + # operation + operation = self.series_name[2] + + # find original series data + series_data_1 = None + series_data_2 = None + for series in self.series_list: + if set(series_name_1) == set(series.series_name): + series_data_1 = series.series_data + if set(series_name_2) == set(series.series_name): + series_data_2 = series.series_data + + self._calculate_derived_values(operation, series_data_1, series_data_2) + + def create_relperf_points(self, case_data): + print('Case_data size =' + str(len(case_data.index))) + for indy in self.config.indy_vals: + + if utils.is_string_integer(indy): + indy = int(indy) + elif utils.is_string_strictly_float(indy): + indy = float(indy) + print("indy float =" + str(indy)) + if self.idx >= self.series_len: + series_val = self.series_name[0] + else: + series_val = self.config.series_vals_1[0][self.idx] + case_data_indy = case_data[case_data['LEAD_HR'] == indy] + # Get counts + n_cur = len(case_data_indy[case_data_indy['PLOT'] == series_val]) + n_tot = len(case_data_indy) + # Compute the current relative performance and CI + s = get_prop_ci(n_cur, n_tot, self.config.n_min, self.config.alpha) + self.series_points['ncl'].append(s['ncl']) + self.series_points['val'].append(s['val']) + self.series_points['ncu'].append(s['ncu']) + + def create_rank_points(self, case_data): + for indy in self.config.indy_vals: + if utils.is_string_integer(indy): + indy = int(indy) + elif utils.is_string_strictly_float(indy): + indy = float(indy) + case_data_indy = case_data[case_data['LEAD_HR'] == indy] + case_data_indy = case_data_indy.dropna(subset=['RANK_RANDOM'], how='all') + # Get counts + n_cur = len(case_data_indy[case_data_indy['RANK_RANDOM'] == self.idx + 1]) + n_tot = len(case_data_indy) + # Compute the current relative performance and CI + s = get_prop_ci(n_cur, n_tot, self.config.n_min, self.config.alpha) + ci_data = s['val'] + + self.series_points['ncl'].append(s['ncl']) + self.series_points['val'].append(ci_data) + self.series_points['ncu'].append(s['ncu']) + + if self.idx == 0 or len(self.config.all_series_y1) == self.idx + 1: + case_data_indy = case_data[case_data['LEAD_HR'] == indy] + case_data_indy = case_data_indy.dropna(subset=['RANK_MIN'], how='all') + n_cur = len(case_data_indy[case_data_indy['RANK_MIN'] == self.idx + 1]) + n_tot = len(case_data_indy) + rank_min = 100 * n_cur / n_tot + self.rank_min_val.append(rank_min) + + def _calculate_derived_values(self, + operation: str, + series_data_1: DataFrame, + series_data_2: DataFrame) -> None: + """ + Validates if both DataFrames have the same fcst_valid_beg values and if it is TRUE + Calculates derived statistic for the each box based on data from the 1st + and 2nd data frames + For example, if the operation is 'DIFF' the diferensires between values from + the 1st and the 2nd frames will be calculated + This method also calculates CI(s) + + :param operation: statistic to calculate + :param series_data_1: 1st data frame sorted by fcst_init_beg + :param series_data_2: 2nd data frame sorted by fcst_init_beg + """ + + # for each independent value + for indy in self.config.indy_vals: + if utils.is_string_integer(indy): + indy = int(indy) + elif utils.is_string_strictly_float(indy): + indy = float(indy) + + stats_indy_1 = \ + series_data_1.loc[series_data_1['LEAD_HR'] == indy] + stats_indy_2 = \ + series_data_2.loc[series_data_2['LEAD_HR'] == indy] + + # validate data + + unique_dates = \ + stats_indy_1[['VALID', 'LEAD_HR', 'BMODEL', 'STORM_ID']].drop_duplicates().shape[0] + + if stats_indy_1.shape[0] != unique_dates: + raise ValueError( + 'Derived curve can\'t be calculated. ' + 'Multiple values for one valid date/LEAD_HR') + + # data should be sorted by fcst_init_beg !!!!! + stats_values = utils.calc_derived_curve_value(stats_indy_1['PLOT'].tolist(), + stats_indy_2['PLOT'].tolist(), + operation) + + stats_indy_1['PLOT'] = stats_values + + if self.series_data is None: + self.series_data = stats_indy_1 + else: + self.series_data = pd.concat([self.series_data, stats_indy_1], sort=False) From cbdba40e6ab4d1d8e6019e53900170878d5c6af9 Mon Sep 17 00:00:00 2001 From: TatianaBurek Date: Mon, 27 Feb 2023 14:54:39 -0700 Subject: [PATCH 4/8] initial version #240 --- .../line/median/tcmpr_series_line_median.py | 75 ++++ .../scatter/tcmpr_series_scatter.py | 99 +++++ .../skill/mean/tcmpr_series_skill_mean.py | 80 ++++ .../skill/mean/tcmpr_skill_mean.py | 142 +++++++ .../skill/median/tcmpr_series_skill_median.py | 79 ++++ .../skill/median/tcmpr_skill_median.py | 85 +++++ .../plots/tcmpr_plots/skill/tcmpr_skill.py | 142 +++++++ metplotpy/plots/tcmpr_plots/tcmpr_util.py | 353 ++++++++++++++++++ 8 files changed, 1055 insertions(+) create mode 100644 metplotpy/plots/tcmpr_plots/line/median/tcmpr_series_line_median.py create mode 100644 metplotpy/plots/tcmpr_plots/scatter/tcmpr_series_scatter.py create mode 100644 metplotpy/plots/tcmpr_plots/skill/mean/tcmpr_series_skill_mean.py create mode 100644 metplotpy/plots/tcmpr_plots/skill/mean/tcmpr_skill_mean.py create mode 100644 metplotpy/plots/tcmpr_plots/skill/median/tcmpr_series_skill_median.py create mode 100644 metplotpy/plots/tcmpr_plots/skill/median/tcmpr_skill_median.py create mode 100644 metplotpy/plots/tcmpr_plots/skill/tcmpr_skill.py create mode 100644 metplotpy/plots/tcmpr_plots/tcmpr_util.py diff --git a/metplotpy/plots/tcmpr_plots/line/median/tcmpr_series_line_median.py b/metplotpy/plots/tcmpr_plots/line/median/tcmpr_series_line_median.py new file mode 100644 index 00000000..801167ec --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/line/median/tcmpr_series_line_median.py @@ -0,0 +1,75 @@ +# ============================* +# ** Copyright UCAR (c) 2023 +# ** University Corporation for Atmospheric Research (UCAR) +# ** National Center for Atmospheric Research (NCAR) +# ** Research Applications Lab (RAL) +# ** P.O.Box 3000, Boulder, Colorado, 80307-3000, USA +# ============================* + + +""" +Class Name: TcmprSeriesLineMedian + """ + +from typing import Union + +import metcalcpy.util.utils as utils +from plots.tcmpr_plots.tcmpr_series import TcmprSeries +from plots.tcmpr_plots.tcmpr_util import get_median_ci + + +class TcmprSeriesLineMedian(TcmprSeries): + """ + Represents a Box plot series object + of data points and their plotting style + elements (line colors, etc.) + + """ + + def __init__(self, config, idx: int, input_data, series_list: list, + series_name: Union[list, tuple]): + + super().__init__(config, idx, input_data, series_list, series_name) + + def _create_series_points(self) -> dict: + """ + Subset the data for the appropriate series. + Calculate values for each point including CI + + Args: + + Returns: + dictionary with CI ,point values and number of stats as keys + """ + + self._init_series_data() + + series_points_results = {'val': [], 'ncl': [], 'ncu': [], 'nstat': [], 'mean': []} + + # for each point calculate plot statistic + for indy in self.config.indy_vals: + if utils.is_string_integer(indy): + indy = int(indy) + elif utils.is_string_strictly_float(indy): + indy = float(indy) + point_data = self.series_data.loc[ + (self.series_data['LEAD_HR'] == indy)] + point_data = point_data.sort_values(by=['CASE']) + + ci_data = get_median_ci(point_data['PLOT'].tolist(), self.config.alpha, self.config.n_min) + if ci_data['ncl'] is not None: + dbl_lo_ci = ci_data['val'] - ci_data['ncl'] + else: + dbl_lo_ci = ci_data['val'] + + if ci_data['ncu'] is not None: + dbl_up_ci = ci_data['ncu'] - ci_data['val'] + else: + dbl_up_ci = ci_data['val'] + + series_points_results['ncl'].append(dbl_lo_ci) + series_points_results['val'].append(ci_data['val']) + series_points_results['ncu'].append(dbl_up_ci) + series_points_results['nstat'].append(len(point_data)) + + return series_points_results diff --git a/metplotpy/plots/tcmpr_plots/scatter/tcmpr_series_scatter.py b/metplotpy/plots/tcmpr_plots/scatter/tcmpr_series_scatter.py new file mode 100644 index 00000000..4042c288 --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/scatter/tcmpr_series_scatter.py @@ -0,0 +1,99 @@ +# ============================* +# ** Copyright UCAR (c) 2023 +# ** University Corporation for Atmospheric Research (UCAR) +# ** National Center for Atmospheric Research (NCAR) +# ** Research Applications Lab (RAL) +# ** P.O.Box 3000, Boulder, Colorado, 80307-3000, USA +# ============================* + + +""" +Class Name: TcmprSeriesScatter + """ + +import re +from typing import Union + +import numpy as np + +import metcalcpy.util.utils as utils +from plots.series import Series + + +class TcmprSeriesScatter(Series): + """ + Represents a Box plot series object + of data points and their plotting style + elements (line colors, etc.) + + """ + + def __init__(self, config, idx: int, input_data, series_list: list, + series_name: Union[list, tuple]): + self.series_list = series_list + self.series_name = series_name + super().__init__(config, idx, input_data, 1) + + def _create_all_fields_values_no_indy(self) -> dict: + """ + Creates a dictionary with two keys that represents each axis + values - dictionaries of field values pairs of all series variables (without indy variable) + :return: dictionary with field-values pairs for each axis + """ + all_fields_values_no_indy = {} + + all_fields_values_orig = self.config.get_config_value('series_val_1').copy() + all_fields_values = {} + for x in reversed(list(all_fields_values_orig.keys())): + all_fields_values[x] = all_fields_values_orig.get(x) + + if self.config._get_fcst_vars(1): + all_fields_values['fcst_var'] = list(self.config._get_fcst_vars(1).keys()) + all_fields_values_no_indy[1] = all_fields_values + + return all_fields_values_no_indy + + def _create_series_points(self) -> dict: + """ + Subset the data for the appropriate series. + Calculate values for each point including CI + + Args: + + Returns: + dictionary with CI ,point values and number of stats as keys + """ + + # different ways to subset data for normal and derived series + # this is a normal series + all_filters = [] + + # create a set of filters for this series + for field_ind, field in enumerate(self.all_fields_values_no_indy[self.y_axis].keys()): + if field == 'LEAD': + field = 'LEAD_HR' + filter_value = self.series_name[field_ind] + if isinstance(filter_value, str) and utils.GROUP_SEPARATOR in filter_value: + filter_list = re.findall(utils.DATE_TIME_REGEX, filter_value) + if len(filter_list) == 0: + filter_list = filter_value.split(utils.GROUP_SEPARATOR) + # add the original value + filter_list.append(filter_value) + else: + filter_list = [filter_value] + for i, filter_val in enumerate(filter_list): + if utils.is_string_integer(filter_val): + filter_list[i] = int(filter_val) + elif utils.is_string_strictly_float(filter_val): + filter_list[i] = float(filter_val) + + all_filters.append((self.input_data[field].isin(filter_list))) + + mask = np.array(all_filters).all(axis=0) + self.series_data = self.input_data.loc[mask] + + # sort data by date/time/storm - needed for CI calculations + self.series_data = self.series_data.sort_values(['VALID', 'LEAD', 'STORM_ID']) + + series_points_results = {'val': [], 'ncl': [], 'ncu': [], 'nstat': []} + return series_points_results diff --git a/metplotpy/plots/tcmpr_plots/skill/mean/tcmpr_series_skill_mean.py b/metplotpy/plots/tcmpr_plots/skill/mean/tcmpr_series_skill_mean.py new file mode 100644 index 00000000..d8b414eb --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/skill/mean/tcmpr_series_skill_mean.py @@ -0,0 +1,80 @@ +# ============================* +# ** Copyright UCAR (c) 2023 +# ** University Corporation for Atmospheric Research (UCAR) +# ** National Center for Atmospheric Research (NCAR) +# ** Research Applications Lab (RAL) +# ** P.O.Box 3000, Boulder, Colorado, 80307-3000, USA +# ============================* + + +""" +Class Name: TcmprSeriesSkill + """ + +from typing import Union + +import numpy as np +from pandas import DataFrame + +import metcalcpy.util.utils as utils +from plots.tcmpr_plots.tcmpr_series import TcmprSeries + + +class TcmprSeriesSkillMean(TcmprSeries): + """ + Represents a Box plot series object + of data points and their plotting style + elements (line colors, etc.) + + """ + + def __init__(self, config, idx: int, input_data, series_list: list, + series_name: Union[list, tuple], skill_ref_data: DataFrame = None): + super().__init__(config, idx, input_data, series_list, series_name, skill_ref_data) + + def _create_series_points(self) -> dict: + """ + Subset the data for the appropriate series. + Calculate values for each point including CI + + Args: + + Returns: + dictionary with CI ,point values and number of stats as keys + """ + + self._init_series_data() + + result_size = len(self.config.indy_vals) + series_points_results = {'val': [None] * result_size, + 'nstat': [None] * result_size} + # for each point calculate plot statistic + for i in range(0, result_size): + indy = self.config.indy_vals[i] + if utils.is_string_integer(indy): + indy = int(indy) + elif utils.is_string_strictly_float(indy): + indy = float(indy) + point_data = self.series_data.loc[ + (self.series_data["LEAD_HR"] == indy)] + + # Skip lead times for which no data is found + + if len(point_data) > 0 and self.skill_ref_data is not None and len(self.skill_ref_data) > 0: + point_data = point_data.sort_values(by=['CASE']) + data_ref = self.skill_ref_data.loc[(self.skill_ref_data['LEAD_HR'] == indy)] + + # Get the values to be plotted for this lead time + val = None + if i != 0 and data_ref is not None: + cur = np.nanmean(point_data['PLOT'].tolist()) + ref = np.nanmean(data_ref['PLOT'].tolist()) + + if ref is not None and cur is not None: + val = utils.round_half_up(100 * (ref - cur) / ref, 0) + + series_points_results['val'][i] = val + + series_points_results['nstat'][i] = len(point_data) + + return series_points_results diff --git a/metplotpy/plots/tcmpr_plots/skill/mean/tcmpr_skill_mean.py b/metplotpy/plots/tcmpr_plots/skill/mean/tcmpr_skill_mean.py new file mode 100644 index 00000000..a124d394 --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/skill/mean/tcmpr_skill_mean.py @@ -0,0 +1,142 @@ +import os + +import numpy as np +import plotly.graph_objects as go + +from metcalcpy.util import utils +from plots.tcmpr_plots.skill.mean.tcmpr_series_skill_mean import TcmprSeriesSkillMean +from plots.tcmpr_plots.skill.tcmpr_skill import TcmprSkill + + +class TcmprSkillMean(TcmprSkill): + def __init__(self, config_obj, column_info, col, case_data, input_df, baseline_data): + super().__init__(config_obj, column_info, col, case_data, input_df, baseline_data) + print("--------------------------------------------------------") + print(f"Plotting SKILL_MN time series by {self.config_obj.series_val_names[0]}") + + self._adjust_titles() + self.cur_baseline = baseline_data['cur_baseline'] + self.cur_baseline_data = baseline_data['cur_baseline_data'] + self._init_hfip_baseline_for_plot() + self.series_list = self._create_series(self.input_df) + self.case_data = None + if self.config_obj.prefix is None or len(self.config_obj.prefix) == 0: + self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.list_stat_1[0]}_skill_mn.png" + else: + self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.prefix}.png" + + # remove the old file if it exist + if os.path.exists(self.plot_filename): + os.remove(self.plot_filename) + self._create_figure() + + def _adjust_titles(self): + if self.yaxis_1 is None or len(self.yaxis_1) == 0: + self.yaxis_1 = self.config_obj.list_stat_1[0] + '(' + self.col['units'] + ')' + + if self.title is None or len(self.title) == 0: + self.title = 'Mean Skill Scores of ' + self.col['desc'] + ' by ' \ + + self.column_info[self.column_info['COLUMN'] == self.config_obj.series_val_names[0]][ + "DESCRIPTION"].tolist()[0] + + def _init_hfip_baseline_for_plot(self): + if 'Water Only' in self.title: + print("Plot HFIP Baseline:" + self.cur_baseline) + else: + self.cur_baseline = self.cur_baseline.replace('Error', 'Skill') + self.cur_baseline = self.cur_baseline.replace('HFIP Baseline ', 'HFIP Skill Baseline') + print('Plot HFIP Baseline:' + self.cur_baseline.replace('Error ', '')) + + def _add_hfip_baseline(self): + # Add HFIP baseline for each lead time + if self.cur_baseline_data is not None: + baseline_x_values = [] + baseline_y_values = [] + lead_times = np.unique(self.series_list[0].series_data[self.config_obj.indy_var].tolist()) + lead_times.sort() + for ind, lead in enumerate(lead_times): + if lead != 0: + ocd5_data = self.cur_baseline_data.loc[ + (self.cur_baseline_data['LEAD'] == lead) & (self.cur_baseline_data['TYPE'] == "OCD5")][ + 'VALUE'].tolist() + if len(ocd5_data) > 1: + raise ValueError( + f"ERROR: Can't crate HFIP baseline for lead time {lead} : too many values of OCD5 in .dat file") + ocd5_data = ocd5_data[0] + cons_data = self.cur_baseline_data.loc[ + (self.cur_baseline_data['LEAD'] == lead) & (self.cur_baseline_data['TYPE'] == "CONS")][ + 'VALUE'].tolist() + if len(cons_data) > 1: + raise ValueError( + f"ERROR: Can't crate HFIP baseline for lead time {lead} : too many values of CONS in .dat file") + cons_data = cons_data[0] + + baseline_lead = utils.round_half_up(100 * (ocd5_data - cons_data) / ocd5_data, 1) + baseline_x_values.append(ind) + baseline_y_values.append(baseline_lead) + + self.figure.add_trace( + go.Scatter(x=baseline_x_values, + y=baseline_y_values, + showlegend=True, + mode='markers', + textposition="top right", + name=self.cur_baseline, + marker=dict(size=8, + color='rgb(0,0,255)', + line=dict( + width=1, + color='rgb(0,0,255)' + ), + symbol='diamond-cross-open', + ) + ) + ) + + def _create_series(self, input_data): + """ + Generate all the series objects that are to be displayed as specified by the plot_disp + setting in the config file. The points are all ordered by datetime. Each series object + is represented by a box in the diagram, so they also contain information + for plot-related/appearance-related settings (which were defined in the config file). + + Args: + input_data: The input data in the form of a Pandas dataframe. + This data will be subset to reflect the series data of interest. + + Returns: + a list of series objects that are to be displayed + + + """ + all_fields_values = {'AMODEL': [utils.GROUP_SEPARATOR.join(self.config_obj.skill_ref)], + 'fcst_var': self.config_obj.list_stat_1} + permutations = utils.create_permutations_mv(all_fields_values, 0) + ref_model_data_series = TcmprSeriesSkillMean(self.config_obj, 0, + input_data, [], permutations[0]) + ref_model_data = ref_model_data_series.series_data + + series_list = [] + + # add series for y1 axis + num_series_y1 = len(self.config_obj.get_series_y(1)) + for i, name in enumerate(self.config_obj.get_series_y(1)): + if not isinstance(name, list): + name = [name] + series_obj = TcmprSeriesSkillMean(self.config_obj, i, input_data, series_list, name, ref_model_data) + series_list.append(series_obj) + + # add derived for y1 axis + for i, name in enumerate(self.config_obj.get_config_value('derived_series_1')): + # add default operation value if it is not provided + if len(name) == 2: + name.append("DIFF") + # include the series only if the name is valid + if len(name) == 3: + series_obj = TcmprSeriesSkillMean(self.config_obj, num_series_y1 + i, input_data, series_list, name) + series_list.append(series_obj) + + # reorder series + series_list = self.config_obj.create_list_by_series_ordering(series_list) + + return series_list diff --git a/metplotpy/plots/tcmpr_plots/skill/median/tcmpr_series_skill_median.py b/metplotpy/plots/tcmpr_plots/skill/median/tcmpr_series_skill_median.py new file mode 100644 index 00000000..edcb1ba3 --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/skill/median/tcmpr_series_skill_median.py @@ -0,0 +1,79 @@ +# ============================* +# ** Copyright UCAR (c) 2023 +# ** University Corporation for Atmospheric Research (UCAR) +# ** National Center for Atmospheric Research (NCAR) +# ** Research Applications Lab (RAL) +# ** P.O.Box 3000, Boulder, Colorado, 80307-3000, USA +# ============================* + + +""" +Class Name: TcmprSeriesSkill + """ + +from typing import Union + +import numpy as np +from pandas import DataFrame + +import metcalcpy.util.utils as utils +from plots.tcmpr_plots.tcmpr_series import TcmprSeries + + +class TcmprSeriesSkillMedian(TcmprSeries): + """ + Represents a Box plot series object + of data points and their plotting style + elements (line colors, etc.) + + """ + + def __init__(self, config, idx: int, input_data, series_list: list, + series_name: Union[list, tuple], skill_ref_data: DataFrame = None): + super().__init__(config, idx, input_data, series_list, series_name, skill_ref_data) + + def _create_series_points(self) -> dict: + """ + Subset the data for the appropriate series. + Calculate values for each point including CI + + Args: + + Returns: + dictionary with CI ,point values and number of stats as keys + """ + + self._init_series_data() + result_size = len(self.config.indy_vals) + series_points_results = {'val': [None] * result_size, + 'nstat': [None] * result_size} + # for each point calculate plot statistic + for i in range(0, result_size): + indy = self.config.indy_vals[i] + if utils.is_string_integer(indy): + indy = int(indy) + elif utils.is_string_strictly_float(indy): + indy = float(indy) + point_data = self.series_data.loc[ + (self.series_data['LEAD_HR'] == indy)] + + # Skip lead times for which no data is found + + if len(point_data) > 0 and self.skill_ref_data is not None and len(self.skill_ref_data) > 0: + point_data = point_data.sort_values(by=['CASE']) + data_ref = self.skill_ref_data.loc[(self.skill_ref_data['LEAD_HR'] == indy)] + + # Get the values to be plotted for this lead time + val = None + if i != 0 and data_ref is not None: + cur = np.nanmedian(point_data['PLOT'].tolist()) + ref = np.nanmedian(data_ref['PLOT'].tolist()) + + if ref is not None and cur is not None: + val = utils.round_half_up(100 * (ref - cur) / ref, 0) + + series_points_results['val'][i] = val + + series_points_results['nstat'][i] = len(point_data) + + return series_points_results diff --git a/metplotpy/plots/tcmpr_plots/skill/median/tcmpr_skill_median.py b/metplotpy/plots/tcmpr_plots/skill/median/tcmpr_skill_median.py new file mode 100644 index 00000000..ae24a0df --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/skill/median/tcmpr_skill_median.py @@ -0,0 +1,85 @@ +import os + +from metcalcpy.util import utils +from plots.tcmpr_plots.skill.median.tcmpr_series_skill_median import TcmprSeriesSkillMedian +from plots.tcmpr_plots.skill.tcmpr_skill import TcmprSkill + + +class TcmprSkillMedian(TcmprSkill): + def __init__(self, config_obj, column_info, col, case_data, input_df): + super().__init__(config_obj, column_info, col, case_data, input_df, None) + print("--------------------------------------------------------") + print(f"Plotting SKILL_MD time series by {self.config_obj.series_val_names[0]}") + + print("Plot HFIP Baseline:" + self.cur_baseline) + + self._adjust_titles() + self.series_list = self._create_series(self.input_df) + self.case_data = None + + if self.config_obj.prefix is None or len(self.config_obj.prefix) == 0: + self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.list_stat_1[0]}_skill_md.png" + else: + self.plot_filename = f"{self.config_obj.plot_dir}{os.path.sep}{self.config_obj.prefix}.png" + + # remove the old file if it exist + if os.path.exists(self.plot_filename): + os.remove(self.plot_filename) + self._create_figure() + + def _adjust_titles(self): + if self.yaxis_1 is None or len(self.yaxis_1) == 0: + self.yaxis_1 = self.config_obj.list_stat_1[0] + '(' + self.col['units'] + ')' + + if self.title is None or len(self.title) == 0: + self.title = 'Median Skill Scores of ' + self.col['desc'] + ' by ' \ + + self.column_info[self.column_info['COLUMN'] == self.config_obj.series_val_names[0]][ + "DESCRIPTION"].tolist()[0] + + def _create_series(self, input_data): + """ + Generate all the series objects that are to be displayed as specified by the plot_disp + setting in the config file. The points are all ordered by datetime. Each series object + is represented by a box in the diagram, so they also contain information + for plot-related/appearance-related settings (which were defined in the config file). + + Args: + input_data: The input data in the form of a Pandas dataframe. + This data will be subset to reflect the series data of interest. + + Returns: + a list of series objects that are to be displayed + + + """ + all_fields_values = {'AMODEL': [utils.GROUP_SEPARATOR.join(self.config_obj.skill_ref)], + 'fcst_var': self.config_obj.list_stat_1} + permutations = utils.create_permutations_mv(all_fields_values, 0) + ref_model_data_series = TcmprSeriesSkillMedian(self.config_obj, 0, + input_data, [], permutations[0]) + ref_model_data = ref_model_data_series.series_data + + series_list = [] + + # add series for y1 axis + num_series_y1 = len(self.config_obj.get_series_y(1)) + for i, name in enumerate(self.config_obj.get_series_y(1)): + if not isinstance(name, list): + name = [name] + series_obj = TcmprSeriesSkillMedian(self.config_obj, i, input_data, series_list, name, ref_model_data) + series_list.append(series_obj) + + # add derived for y1 axis + for i, name in enumerate(self.config_obj.get_config_value('derived_series_1')): + # add default operation value if it is not provided + if len(name) == 2: + name.append("DIFF") + # include the series only if the name is valid + if len(name) == 3: + series_obj = TcmprSeriesSkillMedian(self.config_obj, num_series_y1 + i, input_data, series_list, name) + series_list.append(series_obj) + + # reorder series + series_list = self.config_obj.create_list_by_series_ordering(series_list) + + return series_list diff --git a/metplotpy/plots/tcmpr_plots/skill/tcmpr_skill.py b/metplotpy/plots/tcmpr_plots/skill/tcmpr_skill.py new file mode 100644 index 00000000..40264e91 --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/skill/tcmpr_skill.py @@ -0,0 +1,142 @@ +from typing import Union +import plotly.graph_objects as go + +from plots.tcmpr_plots.tcmpr import Tcmpr +from plots.tcmpr_plots.tcmpr_series import TcmprSeries +from metcalcpy.util import utils + + +class TcmprSkill(Tcmpr): + def __init__(self, config_obj, column_info, col, case_data, input_df, baseline_data): + super().__init__(config_obj, column_info, col, case_data, input_df) + + def _create_figure(self): + """ Create a box plot from default and custom parameters""" + + self.figure = self._create_layout() + self._add_xaxis() + self._add_yaxis() + self._add_legend() + + # placeholder for the min and max values for y-axis + yaxis_min = None + yaxis_max = None + + if self.config_obj.xaxis_reverse is True: + self.series_list.reverse() + # calculate stag adjustments + stag_adjustments = self._calc_stag_adjustments() + + x_points_index = list(range(0, len(self.config_obj.indy_vals))) + # add x ticks for line plots + odered_indy_label = self.config_obj.create_list_by_plot_val_ordering(self.config_obj.indy_label) + self.figure.update_layout( + xaxis={ + 'tickmode': 'array', + 'tickvals': x_points_index, + 'ticktext': odered_indy_label + } + ) + + for series in self.series_list: + # Don't generate the plot for this series if + # it isn't requested (as set in the config file) + if series.plot_disp: + # collect min-max if we need to sync axis + yaxis_min, yaxis_max = self.find_min_max(series, yaxis_min, yaxis_max) + + x_points_index_adj = x_points_index + stag_adjustments[series.idx] + self._draw_series(series, x_points_index_adj) + + print(f'Range of {self.config_obj.list_stat_1[0]}: {yaxis_min}, {yaxis_max}') + + self._add_hfip_baseline() + + self.figure.update_layout(shapes=[dict( + type='line', + yref='y', y0=0, y1=0, + xref='paper', x0=0, x1=0.95, + line={'color': '#727273', + 'dash': 'dot', + 'width': 1}, + )]) + + # add custom lines + if len(self.series_list) > 0: + self._add_lines( + self.config_obj, + sorted(self.series_list[0].series_data[self.config_obj.indy_var].unique()) + ) + # apply y axis limits + self._yaxis_limits() + + # add x2 axis + self._add_x2axis(list(range(0, len(self.config_obj.indy_vals)))) + + def _draw_series(self, series: TcmprSeries, x_points_index_adj: list) -> None: + """ + Draws the boxes on the plot + + :param series: Line series object with data and parameters + """ + + y_points = series.series_points['val'] + + # create a trace + self.figure.add_trace( + go.Scatter(x=x_points_index_adj, + y=y_points, + showlegend=True, + mode='lines+markers', + textposition="top right", + name=self.config_obj.user_legends[series.idx], + line={'color': self.config_obj.colors_list[series.idx], + 'width': self.config_obj.linewidth_list[series.idx], + 'dash': self.config_obj.linestyles_list[series.idx]}, + marker_symbol=self.config_obj.marker_list[series.idx], + marker_color=self.config_obj.colors_list[series.idx], + marker_line_color=self.config_obj.colors_list[series.idx], + marker_size=self.config_obj.marker_size[series.idx], + + ), + secondary_y=series.y_axis != 1 + ) + + def _find_min_max(self, series: TcmprSeries, yaxis_min: Union[float, None], + yaxis_max: Union[float, None]) -> tuple: + """ + Finds min and max value between provided min and max and y-axis CI values of this series + if yaxis_min or yaxis_max is None - min/max value of the series is returned + + :param series: series to use for calculations + :param yaxis_min: previously calculated min value + :param yaxis_max: previously calculated max value + :return: a tuple with calculated min/max + """ + # calculate series upper and lower limits of CIs + + # Skip lead times for which no data is found + if len(series.series_data) == 0: + return yaxis_min, yaxis_max + + + if self.cur_baseline_data is not None and len(self.cur_baseline_data) > 0: + OCD5 = self.cur_baseline_data.loc[(self.cur_baseline_data['LEAD'].isin( self.config_obj.indy_vals)) & (self.cur_baseline_data['TYPE'] == 'OCD5')]['VALUE'].tolist() + CONS = self.cur_baseline_data.loc[(self.cur_baseline_data['LEAD'].isin( self.config_obj.indy_vals)) & (self.cur_baseline_data['TYPE'] == 'CONS')]['VALUE'].tolist() + baseline_lead = [ utils.round_half_up(100 * (ocd5-cons)/ocd5,1) for (ocd5,cons) in zip(OCD5,CONS )] + else: + baseline_lead = [] + + + # Get the values to be plotted for this lead times + all_values = series.series_points['val'] + baseline_lead + low_range = min([v for v in all_values if v is not None]) + upper_range = max([v for v in all_values if v is not None]) + + # find min max + if yaxis_min is None or yaxis_max is None: + return low_range, upper_range + + return min(yaxis_min, low_range), max(yaxis_max, upper_range) + + diff --git a/metplotpy/plots/tcmpr_plots/tcmpr_util.py b/metplotpy/plots/tcmpr_plots/tcmpr_util.py new file mode 100644 index 00000000..29d596e7 --- /dev/null +++ b/metplotpy/plots/tcmpr_plots/tcmpr_util.py @@ -0,0 +1,353 @@ +import os +import re +import sys + +import numpy as np +import pandas as pd +from scipy.stats import norm + +import metcalcpy.util.tost_paired as tp +import metcalcpy.util.utils as calc_util + + +def get_case_data(series_data, series_vals, indy_vals, rp_diff, total): + """ + Build a table with summary information for each case. + + :param series_data: + :param series_vals: + :param indy_vals: + :param rp_diff: + :param total: + :return: + """ + + # Build a set of unique cases + case_data = pd.DataFrame() + case_data['CASE'] = series_data['CASE'] + case_data['LEAD'] = series_data['LEAD'] + case_data['LEAD_HR'] = series_data['LEAD_HR'] + case_data['MIN'] = [None] * len(series_data) + case_data['MAX'] = [None] * len(series_data) + case_data['WIN'] = [None] * len(series_data) + case_data['DIFF'] = [None] * len(series_data) + case_data['RP_THRESH'] = [None] * len(series_data) + case_data['DIFF_TEST'] = [None] * len(series_data) + case_data['RESULT'] = [None] * len(series_data) + case_data['PLOT'] = [None] * len(series_data) + case_data['RANK_RANDOM'] = [None] * len(series_data) + case_data['RANK_MIN'] = [None] * len(series_data) + case_data = case_data.drop_duplicates() + case_data.reset_index(inplace=True, drop=True) + + # Check for equal numbers of entries for each case + list_of_counts = series_data['CASE'].value_counts().tolist() + count = sum(map(lambda x: x != total, list_of_counts)) + if count != 0: + raise SystemExit('ERROR: Must have the same number of entries for each case.') + # Compute summary info for each case + series_vals_sorted = series_vals[0].copy() + series_vals_sorted.sort() + case_data['MIN'] = series_data.groupby('CASE')['PLOT'].min().tolist() + case_data['MAX'] = series_data.groupby('CASE')['PLOT'].max().tolist() + case_data['WIN'] = series_data.groupby('CASE')['PLOT'].apply(find_winner, s_v=series_vals_sorted).tolist() + case_data['DIFF'] = case_data['MAX'] - case_data['MIN'] + case_data['RP_THRESH'] = case_data['LEAD_HR'].apply(find_thresh, args=(indy_vals, rp_diff)) + case_data['DIFF_TEST'] = case_data.apply(lambda x: f'{x["DIFF"]:.5f}' + str(x['RP_THRESH']), axis=1) + case_data['RESULT'] = case_data.apply(lambda x: eval(x['DIFF_TEST']), axis=1) + case_data['PLOT'] = case_data.apply(lambda x: x['WIN'] if x['RESULT'] is True else 'TIE', axis=1) + case_data['RANK_RANDOM'] = series_data.groupby('CASE')['PLOT'].apply(rank_random).tolist() + case_data['RANK_MIN'] = series_data.groupby('CASE')['PLOT'].apply(rank_min).tolist() + return case_data + + +def find_winner(x, s_v): + """ + functions for case data + :param x: + :param s_v: + :return: + """ + values = x.tolist() + if sum(1 for _ in filter(None.__ne__, values)) != len(values): + return None + return s_v[values.index(min(values))] + + +def find_thresh(x, indy_vals, rp_diff): + """ + # Aggregation functions for case data. + :param rp_diff: + :param indy_vals: + :param x: + :return: + """ + thresh_ind = [i for i in range(len(indy_vals)) if indy_vals[i] == x][0] + return rp_diff[thresh_ind] + + +def rank_random(x): + values = x.tolist() + values_cleaned = [i for i in values if i is not None] + a = np.random.uniform(low=0, high=1, size=len(values_cleaned)) + zipped_lists = zip(values_cleaned, a) + sorted_zipped_lists = sorted(zipped_lists) + sorted_list1 = [element for _, element in sorted_zipped_lists] + return a.tolist().index(sorted_list1[0]) + 1 + + +def rank_min(x): + return x.rank(method="min").tolist()[0] + + +def init_hfip_baseline(config, baseline_file, input_df): + # Read the HFIP baseline information from a data file. + baseline = pd.read_csv(os.path.join(sys.path[0], baseline_file), + sep=r'\s+', header='infer', + quotechar='"', skipinitialspace=True, encoding='utf-8') + + baseline['LEAD_HR'] = baseline['LEAD'] / 10000 + baseline['LEAD_HR'] = baseline['LEAD_HR'].astype('int') + + cur_baseline_data = None + cur_baseline = "no" + for stat in config.list_stat_1: + if config.hfip_bsln == "no" or len(config.get_config_value('derived_series_1')) > 0: + cur_baseline = "no" + cur_baseline_data = None + elif stat in baseline['VARIABLE'].tolist(): + if cur_baseline_data is None: + all_filters = [baseline['BASIN'].isin(input_df['BASIN']), baseline['VARIABLE'].isin([stat]), + baseline['LEAD_HR'].isin(input_df['LEAD_HR'])] + mask = np.array(all_filters).all(axis=0) + cur_baseline_data = baseline.loc[mask] + if config.hfip_bsln == "0": + cur_baseline = "HFIP Baseline" + elif config.hfip_bsln == "5": + cur_baseline = 'Error Target for 20% HFIP Goal' + cur_baseline_data['VALUE'] = cur_baseline_data['VALUE'].apply( + lambda x: calc_util.round_half_up(x * 0.8, 1)) + else: # config.hfip_bsln == "10": + cur_baseline = 'Error Target for 50% HFIP Goal' + cur_baseline_data['VALUE'] = cur_baseline_data['VALUE'].apply( + lambda x: calc_util.round_half_up(x * 0.5, 1)) + + return {'cur_baseline': cur_baseline, + 'cur_baseline_data': cur_baseline_data} + + +def get_column_val(dep, input_df): + """ + Get the column values, handling wind data. + :param dep: + :param input_df: + :return: + """ + # Compute the average of the wind radii, if requested + if 'AVG_WIND' in dep: + # Parse the first character and the last 2 characters + typ = dep[0] + rad = dep[-2:] + + # Pull wind radii for the 4 quadrants + ne_wind = input_df[typ + "NE_WIND_" + rad].tolist() + se_wind = input_df[typ + "SE_WIND_" + rad].tolist() + sw_wind = input_df[typ + "SW_WIND_" + rad].tolist() + nw_wind = input_df[typ + "NW_WIND_" + rad].tolist() + + # Replace any instances of 0 with NA + ne_wind = [None if i == 0 else i for i in ne_wind] + se_wind = [None if i == 0 else i for i in se_wind] + sw_wind = [None if i == 0 else i for i in sw_wind] + nw_wind = [None if i == 0 else i for i in nw_wind] + + # Compute the average + val = [(ne + se + sw + nw) / 4 for ne, se, sw, nw in zip(ne_wind, se_wind, sw_wind, nw_wind)] + + else: + # Otherwise, just get the column value + val = input_df[dep].tolist() + # For _WIND_ columns, replace any instances of 0 with NA + if '_WIND_' in dep: + val = [None if i == 0 else i for i in val] + + return val + + +def get_prop_ci(x, n, n_min, alpha): + """ + Compute a confidence interval for a proportion. + :param alpha: + :param n_min: + :param x: + :param n: + :return: + """ + + # Compute the standard proportion error + zval = abs(norm.ppf(alpha / 2)) + phat = x / n + bound = (zval * ((phat * (1 - phat) + (zval ** 2) / (4 * n)) / n) ** (1 / 2)) / (1 + (zval ** 2) / n) + midpnt = (phat + (zval ** 2) / (2 * n)) / (1 + (zval ** 2) / n) + + # Compute the statistic and confidence interval + stat = {'val': 100 * phat} + if n < n_min: + stat['ncl'] = None + stat['ncu'] = None + else: + stat['ncl'] = 100 * (midpnt - bound) + stat['ncu'] = 100 * (midpnt + bound) + return stat + + +def get_mean_ci(d, alpha, n_min): + """ + Compute a confidence interval about the mean. + :param n_min: + :param alpha: + :param d: + :return: + """ + len_valid = len([x for x in d if x is not None and ~np.isnan(x)]) + # Degrees of freedom for t-distribution + df = len_valid - 1 + + # replace Nan to None if needed + if len_valid != len(d): + d_cleaned = [x for x in d if ~np.isnan(x)] + else: + d_cleaned = d + # Compute the standard error + s = calc_util.compute_std_err_from_mean(d_cleaned) + if s[1] == 0: + tval = abs(tp.qt(alpha / 2, df)) + stderr = tval * s[0] + else: + stderr = None + + # Compute the statistic and confidence interval + stat = {'val': np.nanmean(d)} + if len_valid < n_min or stderr is None: + stat['ncl'] = None + stat['ncu'] = None + else: + stat['ncl'] = stat['val'] - stderr + stat['ncu'] = stat['val'] + stderr + + # Compute the p-value + if s[0] != 0: + ss_pval = 0.0 - abs(stat['val'] / s[0]) + cum_t_distrib = tp.pt(ss_pval, df) + else: + # in this case in Rscript ss_pval = -Inf and pt(ss_pval, df) = 0 + cum_t_distrib = 0 + stat['pval'] = 1 - 2 * cum_t_distrib + + return stat + + +def get_median_ci(d, alpha, n_min): + """ + Compute a confidence interval about the median. + :param d: + :param alpha: + :param n_min: + :return: + """ + len_valid = len([x for x in d if x is not None and ~np.isnan(x)]) + # Degrees of freedom for t-distribution + df = len_valid - 1 + + # replace Nan to None if needed + if len_valid != len(d): + d_cleaned = [x for x in d if ~np.isnan(x)] + else: + d_cleaned = d + + # Compute the standard error + s = calc_util.compute_std_err_from_median_no_variance_inflation_factor(d_cleaned) + if s[1] == 0: + tval = abs(tp.qt(alpha / 2, df)) + stderr = tval * s[0] + else: + stderr = None + + # Compute the statistic and confidence interval + stat = {'val': np.nanmedian(d)} + if len_valid < n_min or stderr is None: + stat['ncl'] = None + stat['ncu'] = None + else: + stat['ncl'] = stat['val'] - stderr + stat['ncu'] = stat['val'] + stderr + + # Compute the p-value + if s[0] != 0: + ss_pval = 0.0 - abs(stat['val'] / s[0]) + cum_t_distrib = tp.pt(ss_pval, df) + else: + # in this case in Rscript ss_pval = -Inf and pt(ss_pval, df) = 0 + cum_t_distrib = 0 + + stat['pval'] = 1 - 2 * cum_t_distrib + + return stat + + +def common_member(a, b): + """ + method to check if two lists have at-least one element common + :param a: + :param b: + :return: + """ + a_set = set(a) + b_set = set(b) + if len(a_set.intersection(b_set)) > 0: + return True + return False + + +def get_dep_column(stat, column_info, input_df): + """ + Get a column of data to be plotted, handling absolute values and differences. + :param stat: + :param column_info: + :param input_df: + :return: + """ + # Check for absolute value + abs_flag = stat[0: 3] == 'ABS' + + # Split based on differences + if abs_flag is True: + dep = re.split(r'[()]', stat)[1] + else: + dep = stat + diff_list = re.split(r'-', dep) + + # Initialize output + col_to_plot = {'val': get_column_val(diff_list[0], input_df)} + list_desc = column_info[column_info['COLUMN'] == diff_list[0]]['DESCRIPTION'].tolist() + if len(list_desc) > 0: + col_to_plot['desc'] = list_desc[0] + col_to_plot['units'] = column_info[column_info['COLUMN'] == diff_list[0]]['UNITS'].tolist()[0] + else: + col_to_plot['desc'] = '' + col_to_plot['units'] = '' + # Loop over any remaining entries + for i in range(1, len(diff_list), 1): + val = get_column_val(diff_list[i], input_df) + col_to_plot['val'] = [x - y for x, y in zip(col_to_plot['val'], val)] + + col_to_plot['desc'] = f"{col_to_plot['desc']}-{column_info[column_info['COLUMN'] == diff_list[i]]['DESCRIPTION'].tolist()[0]}" + # Only append units that differ + if col_to_plot['units'] != column_info[column_info['COLUMN'] == diff_list[i]]['UNITS'].tolist()[0]: + col_to_plot['units'] = f"{col_to_plot['units']}{column_info[column_info['COLUMN'] == diff_list[i]]['UNITS'].tolist()[0]}" + # Apply absolute value + if abs_flag is True: + col_to_plot['val'] = [abs(ele) for ele in col_to_plot['val']] + col_to_plot['desc'] = f"Absolute Value of {col_to_plot['desc']}" + + return col_to_plot From c4ace2ee148b56200ea6cae4267a0eb98ac9c80c Mon Sep 17 00:00:00 2001 From: TatianaBurek Date: Mon, 27 Feb 2023 14:55:40 -0700 Subject: [PATCH 5/8] fixed the type of config_value in calculate_plot_dimension to string #240 --- metplotpy/plots/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metplotpy/plots/config.py b/metplotpy/plots/config.py index b0c7ee51..16d88343 100644 --- a/metplotpy/plots/config.py +++ b/metplotpy/plots/config.py @@ -598,7 +598,7 @@ def create_list_by_plot_val_ordering(self, setting_to_order: str) -> list: return ordered_settings_list - def calculate_plot_dimension(self, config_value: int , output_units: str) -> int: + def calculate_plot_dimension(self, config_value: str , output_units: str) -> int: ''' To calculate the width or height that defines the size of the plot. Matplotlib defines these values in inches, Python plotly defines these From 33b37d3025717664f360fedbda578f6b1603fe7a Mon Sep 17 00:00:00 2001 From: TatianaBurek Date: Mon, 27 Feb 2023 14:56:13 -0700 Subject: [PATCH 6/8] add more plotly markers #240 --- metplotpy/plots/constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metplotpy/plots/constants.py b/metplotpy/plots/constants.py index 2b9e09bc..2c8fb35c 100644 --- a/metplotpy/plots/constants.py +++ b/metplotpy/plots/constants.py @@ -62,9 +62,9 @@ AVAILABLE_MARKERS_LIST = ["o", "^", "s", "d", "H", ".", "h"] -AVAILABLE_PLOTLY_MARKERS_LIST = ["open-circle", "circle", +AVAILABLE_PLOTLY_MARKERS_LIST = ["circle-open", "circle", "square", "diamond", - "hexagon", "triangle-up"] + "hexagon", "triangle-up", "asterisk-open"] PCH_TO_MATPLOTLIB_MARKER = {'20': '.', '19': 'o', '17': '^', '1': 'H', '18': 'd', '15': 's', 'small circle': '.', From f276c026172006d318d27a02788fb3d0fabc35b3 Mon Sep 17 00:00:00 2001 From: TatianaBurek Date: Mon, 27 Feb 2023 14:57:02 -0700 Subject: [PATCH 7/8] check if image name is not None before removing the old image #240 --- metplotpy/plots/base_plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metplotpy/plots/base_plot.py b/metplotpy/plots/base_plot.py index 0194bee8..c2825f48 100644 --- a/metplotpy/plots/base_plot.py +++ b/metplotpy/plots/base_plot.py @@ -355,7 +355,7 @@ def remove_file(self): image_name = self.get_config_value('plot_filename') # remove the old file if it exist - if os.path.exists(image_name): + if image_name is not None and os.path.exists(image_name): os.remove(image_name) def show_in_browser(self): From 6bf19ee73b5599a8a62a996ee2cd362a5e2e2c1f Mon Sep 17 00:00:00 2001 From: TatianaBurek Date: Mon, 27 Feb 2023 15:00:11 -0700 Subject: [PATCH 8/8] initial version #240 --- metplotpy/plots/config/tcmpr_defaults.yaml | 147 +++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 metplotpy/plots/config/tcmpr_defaults.yaml diff --git a/metplotpy/plots/config/tcmpr_defaults.yaml b/metplotpy/plots/config/tcmpr_defaults.yaml new file mode 100644 index 00000000..01c20120 --- /dev/null +++ b/metplotpy/plots/config/tcmpr_defaults.yaml @@ -0,0 +1,147 @@ +alpha: 0.05 +colors: + - '#696969' + - '#0000FF' + - '#008000' + - '#000000' + - '#ff0000' + - '#800080' + - '#FFA500' +create_html: 'False' +derived_series_1: [] +fixed_vars_vals_input: + BASIN: + BASIN_0: + - AL + +grid_col: '#cccccc' +grid_lty: 3 +grid_lwd: 1 +grid_on: 'False' +grid_x: listX + +indy_vals: [] + +indy_label: [] + +indy_var: 'LEAD' +legend_box: o +legend_inset: + x: 0.0 + y: -0.25 +legend_ncol: 3 +legend_size: 0.8 +line_type: None + +list_stat_1: [] + +mar: + l: 0 #left margin + r: 0 #right margin + b: 105 #bottom margin + t: 120 #top margin +mgp: +- 1 +- 1 +- 0 +plot_caption: '' +plot_disp: [] + +plot_height: 8.5 +plot_res: 72 +plot_stat: median +plot_type: png16m +plot_units: in +plot_width: 11.0 +series_order: [] +series_val_1: {} +series_ci: [] +series_line_width: +- 1 +- 1 +- 1 +series_line_style: + - '-' + - '-' + - '-' +series_symbols: +- 'circle-open' +- 'circle-open' +- 'circle-open' + +series_symbols_size: + - 7 + - 7 + - 7 + + +show_nstats: 'True' + +tcst_dir: +tcst_files: [] + +title: +title_align: 0.5 +title_offset: -2.0833 +title_size: 1.4 +title_weight: 2.0 +user_legend: [] +x2lab_align: 0.5 +x2lab_offset: -0.5 +x2lab_size: 0.8 +x2lab_weight: 1 +x2tlab_horiz: 0.5 +x2tlab_orient: 1 +x2tlab_perp: 1 +x2tlab_size: 0.8 +xaxis: 'Lead Time(h)' +xaxis_reverse: 'False' +xlab_align: 0.5 +xlab_offset: 2 +xlab_size: 1 +xlab_weight: 1 +xlim: [] +xtlab_decim: 0 +xtlab_horiz: 0.5 +xtlab_orient: 1 +xtlab_perp: -0.75 +xtlab_size: 1 + +yaxis_1: +ylab_align: 0.5 +ylab_offset: 15 +ylab_size: 1 +ylab_weight: 1 +ylim: [] +ytlab_horiz: 0.5 +ytlab_orient: 1 +ytlab_perp: 0.5 +ytlab_size: 1 + +box_notch: True +box_outline: True +box_avg: True + +caption_size: 0.8 +caption_offset: 3 +caption_weight: 1 +caption_align: 0 +caption_col: '#333333' +n_min: 11 +plot_list: [] # boxplot, point, mean, median, relperf, rank, scatter ,skill_mn, skill_md +rp_diff: + - '>=100' +hfip_bsln: no +footnote_flag: 'False' +event_equal: 'False' +skill_ref: [] +demo_yr: NA #not used in Rscript. not sure if we need it +scatter_x: [] +scatter_y: [] +plot_dir: './' +subtitle: '' +prefix: +baseline_file: ./hfip_baseline.dat +column_info_file: ./plot_tcmpr_hdr.dat + +