From 8e9eaba49047125ce11db448fee95af67fac5bca Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Thu, 6 Apr 2023 13:34:40 -0400 Subject: [PATCH 1/8] Add plotting function and callback for generating pipeline status bar graph --- proc_dash/app.py | 19 ++++++++++++++++ proc_dash/utility.py | 52 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/proc_dash/app.py b/proc_dash/app.py index b758052..1cad23b 100644 --- a/proc_dash/app.py +++ b/proc_dash/app.py @@ -93,6 +93,7 @@ ), ] ), + dcc.Graph(id="pipeline-completion", style={"display": "none"}), ] ) @@ -179,5 +180,23 @@ def reset_table(contents, filename): raise PreventUpdate +@app.callback( + [ + Output("pipeline-completion", "figure"), + Output("pipeline-completion", "style"), + ], + Input( + "interactive-datatable", "data" + ), # Input not triggered by datatable frontend filtering + prevent_initial_call=True, +) +def update_overview_status_fig(data): + if data is not None: + df = pd.DataFrame.from_dict(data) + return util.create_overview_status_fig(df), {"display": "block"} + + return {"data": [], "layout": {}, "frames": []}, {"display": "none"} + + if __name__ == "__main__": app.run_server(debug=True) diff --git a/proc_dash/utility.py b/proc_dash/utility.py index f0aad52..556ee76 100644 --- a/proc_dash/utility.py +++ b/proc_dash/utility.py @@ -5,9 +5,18 @@ from typing import Optional, Tuple import pandas as pd +import plotly.express as px SCHEMAS_PATH = Path(__file__).absolute().parents[1] / "schemas" +STATUS_CMAP = px.colors.qualitative.Bold +STATUS_COLORS = { + "SUCCESS": STATUS_CMAP[5], + "FAIL": STATUS_CMAP[9], + "INCOMPLETE": STATUS_CMAP[3], + "UNAVAILABLE": STATUS_CMAP[10], +} + def get_required_bagel_columns() -> list: """Returns names of required columns from the bagel schema.""" @@ -99,6 +108,9 @@ def get_pipelines_overview(bagel: pd.DataFrame) -> pd.DataFrame: "-".join(tup) for tup in pipeline_complete_df.columns.to_flat_index() ] + pipeline_complete_df = pipeline_complete_df.reindex( + sorted(pipeline_complete_df.columns), axis=1 + ) pipeline_complete_df.reset_index(inplace=True) return pipeline_complete_df @@ -173,3 +185,43 @@ def filter_by_sessions( data = data[data["session"].isin(session_values)] return data + + +def create_overview_status_fig(data: pd.DataFrame): + long_data = pd.melt( + data, + id_vars="participant_id", + value_vars=list(data.columns[2:]), + var_name="pipeline_name", + value_name="pipeline_complete", + ) + status_counts = ( + long_data.groupby(["pipeline_name", "pipeline_complete"]) + .size() + .reset_index(name="records") + ) + + fig = px.bar( + status_counts, + x="pipeline_name", + y="records", + color="pipeline_complete", + text_auto=True, + category_orders={ + "pipeline_complete": [ + "SUCCESS", + "FAIL", + "INCOMPLETE", + "UNAVAILABLE", + ] + }, + color_discrete_map=STATUS_COLORS, + labels={ + "pipeline_name": "Pipeline", + "records": "Unique records", + "pipeline_complete": "Processing status", + }, + ) + fig.update_traces(textposition="outside", cliponaxis=False) + + return fig From 2385b2a296fdddb13851233d8f2388463861481f Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Fri, 7 Apr 2023 04:00:52 -0400 Subject: [PATCH 2/8] Implement graph component for participant status counts by session, grouped by pipeline - Plotting utils refactored into separate module - Created constant for returning an empty `figure` property - Added dbc GRID stylesheet to organize graphs --- proc_dash/app.py | 77 +++++++++++++++++++++++++++++++----- proc_dash/plotting.py | 91 +++++++++++++++++++++++++++++++++++++++++++ proc_dash/utility.py | 49 ----------------------- 3 files changed, 159 insertions(+), 58 deletions(-) create mode 100644 proc_dash/plotting.py diff --git a/proc_dash/app.py b/proc_dash/app.py index 1cad23b..833067a 100644 --- a/proc_dash/app.py +++ b/proc_dash/app.py @@ -8,12 +8,18 @@ from dash.dependencies import Input, Output, State from dash.exceptions import PreventUpdate +import proc_dash.plotting as plot import proc_dash.utility as util from dash import Dash, ctx, dash_table, dcc, html +EMPTY_FIGURE_PROPS = {"data": [], "layout": {}, "frames": []} + app = Dash( __name__, - external_stylesheets=["https://codepen.io/chriddyp/pen/bWLwgP.css"], + external_stylesheets=[ + "https://codepen.io/chriddyp/pen/bWLwgP.css", + dbc.themes.GRID, + ], ) @@ -49,7 +55,10 @@ page_size=50, fixed_rows={"headers": True}, style_table={"height": "300px", "overflowY": "auto"}, - ), # TODO: Treat all columns as strings to standardize filtering syntax? + ), + # NOTE: Could cast columns to strings for the datatable to standardize filtering syntax, + # but this results in undesirable effects (e.g., if there is session 1 and session 11, + # a query for "1" would return both) ], style={"margin-top": "10px", "margin-bottom": "10px"}, ), @@ -93,7 +102,22 @@ ), ] ), - dcc.Graph(id="pipeline-completion", style={"display": "none"}), + dbc.Row( + [ + # NOTE: Legend displayed for both graphs so that user can toggle visibility of status data + dbc.Col( + dcc.Graph( + id="fig-pipeline-status", style={"display": "none"} + ) + ), + dbc.Col( + dcc.Graph( + id="fig-pipeline-status-all-ses", + style={"display": "none"}, + ) + ), + ], + ), ] ) @@ -182,20 +206,55 @@ def reset_table(contents, filename): @app.callback( [ - Output("pipeline-completion", "figure"), - Output("pipeline-completion", "style"), + Output("fig-pipeline-status-all-ses", "figure"), + Output("fig-pipeline-status-all-ses", "style"), + ], + Input("upload-data", "contents"), + State("upload-data", "filename"), + prevent_initial_call=True, +) +def generate_overview_status_fig_for_participants(contents, filename): + """ + If new dataset uploaded, generate stacked bar plot of pipeline_complete statuses per session, + grouped by pipeline. Provides overview of the number of participants with each status in a given session, + per processing pipeline. + """ + if contents is None: + raise PreventUpdate + data, total_subjects, sessions, upload_error = util.parse_csv_contents( + contents=contents, filename=filename + ) + if upload_error is not None: + return EMPTY_FIGURE_PROPS, {"display": "none"} + + return plot.plot_pipeline_status_by_participants(data), { + "display": "block" + } + + +@app.callback( + [ + Output("fig-pipeline-status", "figure"), + Output("fig-pipeline-status", "style"), ], Input( "interactive-datatable", "data" ), # Input not triggered by datatable frontend filtering prevent_initial_call=True, ) -def update_overview_status_fig(data): +def update_overview_status_fig_for_records(data): + """ + When visible data in the overview datatable is updated (excluding built-in frontend datatable filtering + but including component filtering for multiple sessions), generate stacked bar plot of pipeline_complete + statuses aggregated by pipeline. Counts of statuses in plot thus correspond to unique records (unique + participant-session combinations). + """ if data is not None: - df = pd.DataFrame.from_dict(data) - return util.create_overview_status_fig(df), {"display": "block"} + return plot.plot_pipeline_status_by_records( + pd.DataFrame.from_dict(data) + ), {"display": "block"} - return {"data": [], "layout": {}, "frames": []}, {"display": "none"} + return EMPTY_FIGURE_PROPS, {"display": "none"} if __name__ == "__main__": diff --git a/proc_dash/plotting.py b/proc_dash/plotting.py new file mode 100644 index 0000000..e5a929c --- /dev/null +++ b/proc_dash/plotting.py @@ -0,0 +1,91 @@ +import pandas as pd +import plotly.express as px + +STATUS_CMAP = px.colors.qualitative.Bold +STATUS_COLORS = { + "SUCCESS": STATUS_CMAP[5], + "FAIL": STATUS_CMAP[9], + "INCOMPLETE": STATUS_CMAP[3], + "UNAVAILABLE": STATUS_CMAP[10], +} +PIPELINE_STATUS_ORDER = ["SUCCESS", "FAIL", "INCOMPLETE", "UNAVAILABLE"] +LAYOUTS = { + "margin": {"l": 30, "r": 30, "t": 60, "b": 30}, # margins of chart + "title": { # figure title position properties + "yref": "container", + "y": 1, + "yanchor": "top", + "pad": {"t": 20}, + }, +} + + +def plot_pipeline_status_by_participants(data: pd.DataFrame): + long_data = pd.melt( + data, + id_vars=["participant_id", "session"], + var_name="pipeline_name", + value_name="pipeline_complete", + ) + status_counts = ( + long_data.groupby(["pipeline_name", "pipeline_complete", "session"]) + .size() + .reset_index(name="participants") + ) + + fig = px.bar( + status_counts, + x="session", + y="participants", + color="pipeline_complete", + text_auto=True, + facet_col="pipeline_name", + category_orders={"pipeline_complete": PIPELINE_STATUS_ORDER}, + color_discrete_map=STATUS_COLORS, + labels={ + "pipeline_name": "Pipeline", + "participants": "Participants (n)", + "pipeline_complete": "Processing status", + "session": "Session", + }, + title="Overview: Participant pipeline statuses by session", + ) + # Treat session labels as categorical in plot to avoid a continuous x-axis + fig.update_xaxes(type="category") + fig.update_layout(margin=LAYOUTS["margin"], title=LAYOUTS["title"]) + + return fig + + +def plot_pipeline_status_by_records(data: pd.DataFrame): + long_data = pd.melt( + data, + id_vars=["participant_id", "session"], + var_name="pipeline_name", + value_name="pipeline_complete", + ) + status_counts = ( + long_data.groupby(["pipeline_name", "pipeline_complete"]) + .size() + .reset_index(name="records") + ) + + fig = px.bar( + status_counts, + x="pipeline_name", + y="records", + color="pipeline_complete", + text_auto=True, + category_orders={"pipeline_complete": PIPELINE_STATUS_ORDER}, + color_discrete_map=STATUS_COLORS, + labels={ + "pipeline_name": "Pipeline", + "records": "Records (n)", + "pipeline_complete": "Processing status", + }, + title="Selected sessions: Pipeline statuses of unique matching records (default: all)" + # alternative title: "Pipeline statuses of unique records for selected sessions (default: all)" + ) + fig.update_layout(margin=LAYOUTS["margin"], title=LAYOUTS["title"]) + + return fig diff --git a/proc_dash/utility.py b/proc_dash/utility.py index 556ee76..82f9d85 100644 --- a/proc_dash/utility.py +++ b/proc_dash/utility.py @@ -5,18 +5,9 @@ from typing import Optional, Tuple import pandas as pd -import plotly.express as px SCHEMAS_PATH = Path(__file__).absolute().parents[1] / "schemas" -STATUS_CMAP = px.colors.qualitative.Bold -STATUS_COLORS = { - "SUCCESS": STATUS_CMAP[5], - "FAIL": STATUS_CMAP[9], - "INCOMPLETE": STATUS_CMAP[3], - "UNAVAILABLE": STATUS_CMAP[10], -} - def get_required_bagel_columns() -> list: """Returns names of required columns from the bagel schema.""" @@ -185,43 +176,3 @@ def filter_by_sessions( data = data[data["session"].isin(session_values)] return data - - -def create_overview_status_fig(data: pd.DataFrame): - long_data = pd.melt( - data, - id_vars="participant_id", - value_vars=list(data.columns[2:]), - var_name="pipeline_name", - value_name="pipeline_complete", - ) - status_counts = ( - long_data.groupby(["pipeline_name", "pipeline_complete"]) - .size() - .reset_index(name="records") - ) - - fig = px.bar( - status_counts, - x="pipeline_name", - y="records", - color="pipeline_complete", - text_auto=True, - category_orders={ - "pipeline_complete": [ - "SUCCESS", - "FAIL", - "INCOMPLETE", - "UNAVAILABLE", - ] - }, - color_discrete_map=STATUS_COLORS, - labels={ - "pipeline_name": "Pipeline", - "records": "Unique records", - "pipeline_complete": "Processing status", - }, - ) - fig.update_traces(textposition="outside", cliponaxis=False) - - return fig From ae853cd4e2de937cc03c1b1d620efa817c962e17 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Sat, 8 Apr 2023 00:41:57 -0400 Subject: [PATCH 3/8] add function to construct string contents of pipeline status legend component --- proc_dash/utility.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/proc_dash/utility.py b/proc_dash/utility.py index 82f9d85..8d5317e 100644 --- a/proc_dash/utility.py +++ b/proc_dash/utility.py @@ -9,6 +9,15 @@ SCHEMAS_PATH = Path(__file__).absolute().parents[1] / "schemas" +def construct_legend_str(status_desc: dict) -> str: + """From a dictionary, constructs a legend-style string with multiple lines in the format of key: value.""" + legend_str = "" + for status, desc in status_desc.items(): + legend_str += status + ": " + desc + "\n" + + return legend_str + + def get_required_bagel_columns() -> list: """Returns names of required columns from the bagel schema.""" with open(SCHEMAS_PATH / "bagel_schema.json", "r") as file: From 718c2a6dae0fefeb0f49f68605eda8d1da1393dd Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Sat, 8 Apr 2023 00:50:20 -0400 Subject: [PATCH 4/8] Add component for descriptive legend of pipeline_complete statuses - primary stylesheet for app changed to dbc theme to use card and layout components --- proc_dash/app.py | 132 +++++++++++++++++++++++++++++++---------------- 1 file changed, 88 insertions(+), 44 deletions(-) diff --git a/proc_dash/app.py b/proc_dash/app.py index 833067a..6d54f4b 100644 --- a/proc_dash/app.py +++ b/proc_dash/app.py @@ -14,13 +14,14 @@ EMPTY_FIGURE_PROPS = {"data": [], "layout": {}, "frames": []} -app = Dash( - __name__, - external_stylesheets=[ - "https://codepen.io/chriddyp/pen/bWLwgP.css", - dbc.themes.GRID, - ], -) +PIPE_COMPLETE_STATUS_SHORT_DESC = { + "SUCCESS": "All stages of pipeline finished successfully (all expected output files present).", + "FAIL": "At least one stage of the pipeline failed.", + "INCOMPLETE": "Pipeline has not yet been run or at least one stage is unfinished/still running.", + "UNAVAILABLE": "Relevant data modality for pipeline not available.", +} + +app = Dash(__name__, external_stylesheets=[dbc.themes.FLATLY]) app.layout = html.Div( @@ -28,14 +29,16 @@ html.H2(children="Neuroimaging Derivatives Status Dashboard"), dcc.Upload( id="upload-data", - children=html.Button("Drag and Drop or Select .csv File"), + children=dbc.Button( + "Drag and Drop or Select .csv File", color="secondary" + ), # TODO: Constrain click responsive area of button style={"margin-top": "10px", "margin-bottom": "10px"}, multiple=False, ), html.Div( id="output-data-upload", children=[ - html.H6(id="input-filename"), + html.H4(id="input-filename"), html.Div( children=[ html.Div(id="total-participants"), @@ -55,6 +58,9 @@ page_size=50, fixed_rows={"headers": True}, style_table={"height": "300px", "overflowY": "auto"}, + style_cell={ + "fontSize": 13 # accounts for font size inflation by dbc theme + }, ), # NOTE: Could cast columns to strings for the datatable to standardize filtering syntax, # but this results in undesirable effects (e.g., if there is session 1 and session 11, @@ -62,43 +68,80 @@ ], style={"margin-top": "10px", "margin-bottom": "10px"}, ), - dbc.Card( + dbc.Row( [ - # TODO: Put label and dropdown in same row - html.Div( - [ - dbc.Label("Filter by multiple sessions:"), - dcc.Dropdown( - id="session-dropdown", - options=[], - multi=True, - placeholder="Select one or more available sessions to filter by", - # TODO: Can set `disabled=True` here to prevent any user interaction before file is uploaded - ), - ] + dbc.Col( + dbc.Form( + [ + # TODO: Put label and dropdown in same row + html.Div( + [ + dbc.Label( + "Filter by multiple sessions:", + html_for="session-dropdown", + className="mb-0", + ), + dcc.Dropdown( + id="session-dropdown", + options=[], + multi=True, + placeholder="Select one or more available sessions to filter by", + # TODO: Can set `disabled=True` here to prevent any user interaction before file is uploaded + ), + ], + className="mb-2", # Add margin to keep dropdowns spaced apart + ), + html.Div( + [ + dbc.Label( + "Selection operator:", + html_for="select-operator", + className="mb-0", + ), + dcc.Dropdown( + id="select-operator", + options=[ + { + "label": "AND", + "value": "AND", + "title": "Show only participants with all selected sessions.", + }, + { + "label": "OR", + "value": "OR", + "title": "Show participants with any of the selected sessions.", + }, + ], + value="AND", + clearable=False, + # TODO: Can set `disabled=True` here to prevent any user interaction before file is uploaded + ), + ], + className="mb-2", + ), + ], + ) ), - html.Div( - [ - dbc.Label("Selection operator:"), - dcc.Dropdown( - id="select-operator", - options=[ - { - "label": "AND", - "value": "AND", - "title": "Show only participants with all selected sessions.", - }, - { - "label": "OR", - "value": "OR", - "title": "Show participants with any of the selected sessions.", - }, - ], - value="AND", - clearable=False, - # TODO: Can set `disabled=True` here to prevent any user interaction before file is uploaded + dbc.Col( + dbc.Card( + dbc.CardBody( + [ + html.H5( + "Legend: Processing status", + className="card-title", + ), + html.P( + children=util.construct_legend_str( + PIPE_COMPLETE_STATUS_SHORT_DESC + ), + style={ + "whiteSpace": "pre" # preserve newlines + }, + className="card-text", + ), + ] ), - ] + ) ), ] ), @@ -118,7 +161,8 @@ ), ], ), - ] + ], + style={"padding": "10px 10px 10px 10px"}, ) From c3ee12e0ee46b78affda09b80f07fda33213509b Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Sat, 8 Apr 2023 00:50:54 -0400 Subject: [PATCH 5/8] minor title change to graph --- proc_dash/plotting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proc_dash/plotting.py b/proc_dash/plotting.py index e5a929c..c83e0d1 100644 --- a/proc_dash/plotting.py +++ b/proc_dash/plotting.py @@ -83,7 +83,7 @@ def plot_pipeline_status_by_records(data: pd.DataFrame): "records": "Records (n)", "pipeline_complete": "Processing status", }, - title="Selected sessions: Pipeline statuses of unique matching records (default: all)" + title="Selected sessions: Pipeline statuses of matching records (default: all)" # alternative title: "Pipeline statuses of unique records for selected sessions (default: all)" ) fig.update_layout(margin=LAYOUTS["margin"], title=LAYOUTS["title"]) From dbb72032bc88fe84cfaf1f5bd413083cded8cca8 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Fri, 14 Apr 2023 13:19:35 -0400 Subject: [PATCH 6/8] refactor plotting functions --- proc_dash/plotting.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/proc_dash/plotting.py b/proc_dash/plotting.py index c83e0d1..7c716f7 100644 --- a/proc_dash/plotting.py +++ b/proc_dash/plotting.py @@ -20,15 +20,19 @@ } -def plot_pipeline_status_by_participants(data: pd.DataFrame): - long_data = pd.melt( +def transform_data_to_long(data: pd.DataFrame) -> pd.DataFrame: + return pd.melt( data, id_vars=["participant_id", "session"], var_name="pipeline_name", value_name="pipeline_complete", ) + + +def plot_pipeline_status_by_participants(data: pd.DataFrame): status_counts = ( - long_data.groupby(["pipeline_name", "pipeline_complete", "session"]) + transform_data_to_long(data) + .groupby(["pipeline_name", "pipeline_complete", "session"]) .size() .reset_index(name="participants") ) @@ -58,14 +62,9 @@ def plot_pipeline_status_by_participants(data: pd.DataFrame): def plot_pipeline_status_by_records(data: pd.DataFrame): - long_data = pd.melt( - data, - id_vars=["participant_id", "session"], - var_name="pipeline_name", - value_name="pipeline_complete", - ) status_counts = ( - long_data.groupby(["pipeline_name", "pipeline_complete"]) + transform_data_to_long(data) + .groupby(["pipeline_name", "pipeline_complete"]) .size() .reset_index(name="records") ) From 237f146f56ebad0f50da3a7b8ec6d16e808da635 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Fri, 14 Apr 2023 13:24:57 -0400 Subject: [PATCH 7/8] remove exception raising by util functions, handle all user-facing errors together --- proc_dash/utility.py | 46 ++++++++++++++++---------------------------- 1 file changed, 17 insertions(+), 29 deletions(-) diff --git a/proc_dash/utility.py b/proc_dash/utility.py index 8d5317e..fadbcc3 100644 --- a/proc_dash/utility.py +++ b/proc_dash/utility.py @@ -11,11 +11,9 @@ def construct_legend_str(status_desc: dict) -> str: """From a dictionary, constructs a legend-style string with multiple lines in the format of key: value.""" - legend_str = "" - for status, desc in status_desc.items(): - legend_str += status + ": " + desc + "\n" - - return legend_str + return "\n".join( + [f"{status}: {desc}" for status, desc in status_desc.items()] + ) def get_required_bagel_columns() -> list: @@ -34,17 +32,14 @@ def get_required_bagel_columns() -> list: # TODO: When possible values per column have been finalized (waiting on mr_proc), # validate that each column only has acceptable values -def check_required_columns(bagel: pd.DataFrame): +def get_missing_required_columns(bagel: pd.DataFrame) -> set: """Returns error if required columns in bagel schema are missing.""" missing_req_columns = set(get_required_bagel_columns()).difference( bagel.columns ) # TODO: Check if there are any missing values in the `participant_id` column - if len(missing_req_columns) > 0: - raise LookupError( - f"The selected .csv is missing the following required metadata columns: {missing_req_columns}." - ) + return missing_req_columns def extract_pipelines(bagel: pd.DataFrame) -> dict: @@ -64,7 +59,7 @@ def extract_pipelines(bagel: pd.DataFrame) -> dict: return pipelines_dict -def check_num_subjects(bagel: pd.DataFrame): +def are_subjects_same_across_pipelines(bagel: pd.DataFrame) -> bool: """Returns error if subjects and sessions are different across pipelines in the input.""" pipelines_dict = extract_pipelines(bagel) @@ -73,13 +68,10 @@ def check_num_subjects(bagel: pd.DataFrame): for df in pipelines_dict.values() ] - if not all( + return all( pipeline.equals(pipeline_subject_sessions[0]) for pipeline in pipeline_subject_sessions - ): - raise LookupError( - "The pipelines in bagel.csv do not have the same number of subjects and sessions." - ) + ) def count_unique_subjects(data: pd.DataFrame) -> int: @@ -95,9 +87,6 @@ def get_pipelines_overview(bagel: pd.DataFrame) -> pd.DataFrame: Constructs a dataframe containing global statuses of pipelines in bagel.csv (based on "pipeline_complete" column) for each participant and session. """ - check_required_columns(bagel) - check_num_subjects(bagel) - pipeline_complete_df = bagel.pivot( index=["participant_id", "session"], columns=["pipeline_name", "pipeline_version"], @@ -137,19 +126,18 @@ def parse_csv_contents( decoded = base64.b64decode(content_string) error_msg = None - try: - if ".csv" in filename: - bagel = pd.read_csv(io.StringIO(decoded.decode("utf-8"))) + if ".csv" in filename: + bagel = pd.read_csv(io.StringIO(decoded.decode("utf-8"))) + if len(missing_req_cols := get_missing_required_columns(bagel)) > 0: + error_msg = f"The selected .csv is missing the following required metadata columns: {missing_req_cols}." + elif not are_subjects_same_across_pipelines(bagel): + error_msg = "The pipelines in bagel.csv do not have the same number of subjects and sessions." + else: overview_df = get_pipelines_overview(bagel=bagel) total_subjects = count_unique_subjects(overview_df) sessions = overview_df["session"].sort_values().unique().tolist() - else: - error_msg = "Input file is not a .csv file." - except LookupError as err: - error_msg = str(err) - except Exception as exc: - print(exc) - error_msg = "Something went wrong while processing this file." + else: + error_msg = "Input file is not a .csv file." if error_msg is not None: return None, None, None, error_msg From 846b0c43d80df71187a9b280447609447d6ad256 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Fri, 14 Apr 2023 16:57:44 -0400 Subject: [PATCH 8/8] refactor csv content parsing ops into separate callback, update utility.py constants + docstrings --- proc_dash/app.py | 89 ++++++++++++++++++++++++++------------------ proc_dash/utility.py | 10 ++++- 2 files changed, 60 insertions(+), 39 deletions(-) diff --git a/proc_dash/app.py b/proc_dash/app.py index 6d54f4b..69e5d6a 100644 --- a/proc_dash/app.py +++ b/proc_dash/app.py @@ -14,19 +14,13 @@ EMPTY_FIGURE_PROPS = {"data": [], "layout": {}, "frames": []} -PIPE_COMPLETE_STATUS_SHORT_DESC = { - "SUCCESS": "All stages of pipeline finished successfully (all expected output files present).", - "FAIL": "At least one stage of the pipeline failed.", - "INCOMPLETE": "Pipeline has not yet been run or at least one stage is unfinished/still running.", - "UNAVAILABLE": "Relevant data modality for pipeline not available.", -} - app = Dash(__name__, external_stylesheets=[dbc.themes.FLATLY]) app.layout = html.Div( children=[ html.H2(children="Neuroimaging Derivatives Status Dashboard"), + dcc.Store(id="memory"), dcc.Upload( id="upload-data", children=dbc.Button( @@ -132,7 +126,7 @@ ), html.P( children=util.construct_legend_str( - PIPE_COMPLETE_STATUS_SHORT_DESC + util.PIPE_COMPLETE_STATUS_SHORT_DESC ), style={ "whiteSpace": "pre" # preserve newlines @@ -168,28 +162,55 @@ @app.callback( [ - Output("interactive-datatable", "columns"), - Output("interactive-datatable", "data"), + Output("memory", "data"), Output("total-participants", "children"), Output("session-dropdown", "options"), ], [ Input("upload-data", "contents"), State("upload-data", "filename"), - Input("session-dropdown", "value"), - Input("select-operator", "value"), ], ) -def update_outputs(contents, filename, session_values, operator_value): +def process_bagel(contents, filename): + """ + From the contents of a correctly-formatted uploaded .csv file, parse and store the pipeline overview + data as a dataframe and update the session dropdown options and displayed total participants count. + Returns any errors encountered during input file processing as a user-friendly message. + """ if contents is None: - return None, None, "Upload a CSV file to begin.", [] - - data, total_subjects, sessions, upload_error = util.parse_csv_contents( - contents=contents, filename=filename - ) + return None, "Upload a CSV file to begin.", [] + try: + data, total_subjects, sessions, upload_error = util.parse_csv_contents( + contents=contents, filename=filename + ) + except Exception: + upload_error = "Something went wrong while processing this file." if upload_error is not None: - return None, None, f"Error: {upload_error} Please try again.", [] + return None, f"Error: {upload_error} Please try again.", [] + + report_total_subjects = f"Total number of participants: {total_subjects}" + session_opts = [{"label": ses, "value": ses} for ses in sessions] + + return data.to_dict("records"), report_total_subjects, session_opts + + +@app.callback( + [ + Output("interactive-datatable", "columns"), + Output("interactive-datatable", "data"), + ], + [ + Input("memory", "data"), + Input("session-dropdown", "value"), + Input("select-operator", "value"), + ], +) +def update_outputs(parsed_data, session_values, operator_value): + if parsed_data is None: + return None, None + + data = pd.DataFrame.from_dict(parsed_data) if session_values: data = util.filter_by_sessions( @@ -197,13 +218,10 @@ def update_outputs(contents, filename, session_values, operator_value): session_values=session_values, operator_value=operator_value, ) - tbl_columns = [{"name": i, "id": i} for i in data.columns] tbl_data = data.to_dict("records") - tbl_total_subjects = f"Total number of participants: {total_subjects}" - session_opts = [{"label": ses, "value": ses} for ses in sessions] - return tbl_columns, tbl_data, tbl_total_subjects, session_opts + return tbl_columns, tbl_data @app.callback( @@ -240,8 +258,11 @@ def update_matching_participants(columns, virtual_data): State("upload-data", "filename"), prevent_initial_call=True, ) -def reset_table(contents, filename): - """If file contents change (i.e., new CSV uploaded), reset file name and filter selection values.""" +def reset_selections(contents, filename): + """ + If file contents change (i.e., selected new CSV for upload), reset displayed file name and dropdown filter + selection values. Reset will occur regardless of whether there is an issue processing the selected file. + """ if ctx.triggered_id == "upload-data": return f"Input file: {filename}", "", "" @@ -253,27 +274,21 @@ def reset_table(contents, filename): Output("fig-pipeline-status-all-ses", "figure"), Output("fig-pipeline-status-all-ses", "style"), ], - Input("upload-data", "contents"), - State("upload-data", "filename"), + Input("memory", "data"), prevent_initial_call=True, ) -def generate_overview_status_fig_for_participants(contents, filename): +def generate_overview_status_fig_for_participants(parsed_data): """ If new dataset uploaded, generate stacked bar plot of pipeline_complete statuses per session, grouped by pipeline. Provides overview of the number of participants with each status in a given session, per processing pipeline. """ - if contents is None: - raise PreventUpdate - data, total_subjects, sessions, upload_error = util.parse_csv_contents( - contents=contents, filename=filename - ) - if upload_error is not None: + if parsed_data is None: return EMPTY_FIGURE_PROPS, {"display": "none"} - return plot.plot_pipeline_status_by_participants(data), { - "display": "block" - } + return plot.plot_pipeline_status_by_participants( + pd.DataFrame.from_dict(parsed_data) + ), {"display": "block"} @app.callback( diff --git a/proc_dash/utility.py b/proc_dash/utility.py index fadbcc3..e9b75c4 100644 --- a/proc_dash/utility.py +++ b/proc_dash/utility.py @@ -7,6 +7,12 @@ import pandas as pd SCHEMAS_PATH = Path(__file__).absolute().parents[1] / "schemas" +PIPE_COMPLETE_STATUS_SHORT_DESC = { + "SUCCESS": "All stages of pipeline finished successfully (all expected output files present).", + "FAIL": "At least one stage of the pipeline failed.", + "INCOMPLETE": "Pipeline has not yet been run or at least one stage is unfinished/still running.", + "UNAVAILABLE": "Relevant data modality for pipeline not available.", +} def construct_legend_str(status_desc: dict) -> str: @@ -33,7 +39,7 @@ def get_required_bagel_columns() -> list: # TODO: When possible values per column have been finalized (waiting on mr_proc), # validate that each column only has acceptable values def get_missing_required_columns(bagel: pd.DataFrame) -> set: - """Returns error if required columns in bagel schema are missing.""" + """Identifies any missing required columns in bagel schema.""" missing_req_columns = set(get_required_bagel_columns()).difference( bagel.columns ) @@ -60,7 +66,7 @@ def extract_pipelines(bagel: pd.DataFrame) -> dict: def are_subjects_same_across_pipelines(bagel: pd.DataFrame) -> bool: - """Returns error if subjects and sessions are different across pipelines in the input.""" + """Checks if subjects and sessions are the same across pipelines in the input.""" pipelines_dict = extract_pipelines(bagel) pipeline_subject_sessions = [