Skip to content

Commit

Permalink
issue #424 add support to read in the reformatted CTC linetype data p…
Browse files Browse the repository at this point in the history
…roduced by the METdataio reformatter
  • Loading branch information
bikegeek committed Feb 26, 2024
1 parent ecf6340 commit 7d5e054
Showing 1 changed file with 52 additions and 3 deletions.
55 changes: 52 additions & 3 deletions metplotpy/plots/roc_diagram/roc_diagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def __init__(self, parameters):
if len(self.series_list) > 0:
self._add_lines(self.config_obj)

def _read_input_data(self):
def _read_input_data(self) -> pd.DataFrame:
"""
Read the input data file (either CTC or PCT linetype)
and store as a pandas dataframe so we can subset the
Expand All @@ -117,11 +117,60 @@ def _read_input_data(self):
Args:
Returns:
Returns: input_df the dataframe representation of the input data
"""
self.logger.info("Reading input data.")
return pd.read_csv(self.config_obj.stat_input, sep='\t', header='infer')
# If self.config_obj.lineype_ctc is True, check for the presence of the fy_oy column.
# If present, proceed as usual, otherwise extract the fcst_thresh, fy_oy, fy_on, fn_on, and fn_oy data
# from the stat_name and stat_value columns (long to wide).
input_df = pd.read_csv(self.config_obj.stat_input, sep='\t', header='infer')
if self.config_obj.linetype_ctc:
# Check if there is a column name 'fy_oy'. If it is missing, then this data has been reformatted by
# the METdataio reformatter.
input_columns = input_df.columns.to_list()
if 'fy_oy' in input_columns:
# This data has been created from the METviewer database
return input_df

else:
# This data was created by the METdataio reformatter and needs to be modified from long to wide format.
wide_input_df = self.ctc_long_to_wide(input_df)
return wide_input_df
else:
# PCT data
return input_df

def ctc_long_to_wide(self, input_df: pd.DataFrame) -> pd.DataFrame:
"""
Convert the dataframe representation of the CTC linetype data (that was reformatted by METdataio) from long
to wide format. The fcst_thresh, fy_oy, fy_on, fn_oy, and fn_on will be in separate columns,
rather than residing under the stat_name and stat_value.
Args:
@param input_df: The input dataframe that represents the CTC data reformatted by METdataio.
Returns: ctc_df: a dataframe that has the additional columns: fy_oy, fy_on, fn_on, fn_oy, and
fcst_thresh extracted from the stat_name and stat_values columns
"""


# Use all the columns (except the stat_name, stat_value,stat_bcl, stat_bcu, stat_ncl, stat_ncu,
# and Idx column) as the pivot index
col_index = input_df.columns.to_list()
ignore_cols = ['Idx', 'stat_name', 'stat_value', 'stat_bcl', 'stat_bcu', 'stat_ncl', 'stat_ncu']
for cur in ignore_cols:
if cur in col_index:
col_index.remove(cur)
df_wide = input_df.pivot(index=col_index, columns='stat_name', values='stat_value')

# reset the index
reset_df_wide = df_wide.reset_index()

# Convert all the header names (column labels) to all lower case
reset_df_wide.columns = [x.lower() for x in reset_df_wide.columns]

return reset_df_wide

def _create_series(self, input_data):
"""
Expand Down

0 comments on commit 7d5e054

Please sign in to comment.