From 8e9eaba49047125ce11db448fee95af67fac5bca Mon Sep 17 00:00:00 2001
From: Alyssa Dai <alyssa.ydai@gmail.com>
Date: Thu, 6 Apr 2023 13:34:40 -0400
Subject: [PATCH 1/8] Add plotting function and callback for generating
 pipeline status bar graph

---
 proc_dash/app.py     | 19 ++++++++++++++++
 proc_dash/utility.py | 52 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)

diff --git a/proc_dash/app.py b/proc_dash/app.py
index b758052..1cad23b 100644
--- a/proc_dash/app.py
+++ b/proc_dash/app.py
@@ -93,6 +93,7 @@
                 ),
             ]
         ),
+        dcc.Graph(id="pipeline-completion", style={"display": "none"}),
     ]
 )
 
@@ -179,5 +180,23 @@ def reset_table(contents, filename):
     raise PreventUpdate
 
 
+@app.callback(
+    [
+        Output("pipeline-completion", "figure"),
+        Output("pipeline-completion", "style"),
+    ],
+    Input(
+        "interactive-datatable", "data"
+    ),  # Input not triggered by datatable frontend filtering
+    prevent_initial_call=True,
+)
+def update_overview_status_fig(data):
+    if data is not None:
+        df = pd.DataFrame.from_dict(data)
+        return util.create_overview_status_fig(df), {"display": "block"}
+
+    return {"data": [], "layout": {}, "frames": []}, {"display": "none"}
+
+
 if __name__ == "__main__":
     app.run_server(debug=True)
diff --git a/proc_dash/utility.py b/proc_dash/utility.py
index f0aad52..556ee76 100644
--- a/proc_dash/utility.py
+++ b/proc_dash/utility.py
@@ -5,9 +5,18 @@
 from typing import Optional, Tuple
 
 import pandas as pd
+import plotly.express as px
 
 SCHEMAS_PATH = Path(__file__).absolute().parents[1] / "schemas"
 
+STATUS_CMAP = px.colors.qualitative.Bold
+STATUS_COLORS = {
+    "SUCCESS": STATUS_CMAP[5],
+    "FAIL": STATUS_CMAP[9],
+    "INCOMPLETE": STATUS_CMAP[3],
+    "UNAVAILABLE": STATUS_CMAP[10],
+}
+
 
 def get_required_bagel_columns() -> list:
     """Returns names of required columns from the bagel schema."""
@@ -99,6 +108,9 @@ def get_pipelines_overview(bagel: pd.DataFrame) -> pd.DataFrame:
         "-".join(tup)
         for tup in pipeline_complete_df.columns.to_flat_index()
     ]
+    pipeline_complete_df = pipeline_complete_df.reindex(
+        sorted(pipeline_complete_df.columns), axis=1
+    )
     pipeline_complete_df.reset_index(inplace=True)
 
     return pipeline_complete_df
@@ -173,3 +185,43 @@ def filter_by_sessions(
             data = data[data["session"].isin(session_values)]
 
     return data
+
+
+def create_overview_status_fig(data: pd.DataFrame):
+    long_data = pd.melt(
+        data,
+        id_vars="participant_id",
+        value_vars=list(data.columns[2:]),
+        var_name="pipeline_name",
+        value_name="pipeline_complete",
+    )
+    status_counts = (
+        long_data.groupby(["pipeline_name", "pipeline_complete"])
+        .size()
+        .reset_index(name="records")
+    )
+
+    fig = px.bar(
+        status_counts,
+        x="pipeline_name",
+        y="records",
+        color="pipeline_complete",
+        text_auto=True,
+        category_orders={
+            "pipeline_complete": [
+                "SUCCESS",
+                "FAIL",
+                "INCOMPLETE",
+                "UNAVAILABLE",
+            ]
+        },
+        color_discrete_map=STATUS_COLORS,
+        labels={
+            "pipeline_name": "Pipeline",
+            "records": "Unique records",
+            "pipeline_complete": "Processing status",
+        },
+    )
+    fig.update_traces(textposition="outside", cliponaxis=False)
+
+    return fig

From 2385b2a296fdddb13851233d8f2388463861481f Mon Sep 17 00:00:00 2001
From: Alyssa Dai <alyssa.ydai@gmail.com>
Date: Fri, 7 Apr 2023 04:00:52 -0400
Subject: [PATCH 2/8] Implement graph component for participant status counts
 by session, grouped by pipeline - Plotting utils refactored into separate
 module - Created constant for returning an empty `figure` property - Added
 dbc GRID stylesheet to organize graphs

---
 proc_dash/app.py      | 77 +++++++++++++++++++++++++++++++-----
 proc_dash/plotting.py | 91 +++++++++++++++++++++++++++++++++++++++++++
 proc_dash/utility.py  | 49 -----------------------
 3 files changed, 159 insertions(+), 58 deletions(-)
 create mode 100644 proc_dash/plotting.py

diff --git a/proc_dash/app.py b/proc_dash/app.py
index 1cad23b..833067a 100644
--- a/proc_dash/app.py
+++ b/proc_dash/app.py
@@ -8,12 +8,18 @@
 from dash.dependencies import Input, Output, State
 from dash.exceptions import PreventUpdate
 
+import proc_dash.plotting as plot
 import proc_dash.utility as util
 from dash import Dash, ctx, dash_table, dcc, html
 
+EMPTY_FIGURE_PROPS = {"data": [], "layout": {}, "frames": []}
+
 app = Dash(
     __name__,
-    external_stylesheets=["https://codepen.io/chriddyp/pen/bWLwgP.css"],
+    external_stylesheets=[
+        "https://codepen.io/chriddyp/pen/bWLwgP.css",
+        dbc.themes.GRID,
+    ],
 )
 
 
@@ -49,7 +55,10 @@
                     page_size=50,
                     fixed_rows={"headers": True},
                     style_table={"height": "300px", "overflowY": "auto"},
-                ),  # TODO: Treat all columns as strings to standardize filtering syntax?
+                ),
+                # NOTE: Could cast columns to strings for the datatable to standardize filtering syntax,
+                # but this results in undesirable effects (e.g., if there is session 1 and session 11,
+                # a query for "1" would return both)
             ],
             style={"margin-top": "10px", "margin-bottom": "10px"},
         ),
@@ -93,7 +102,22 @@
                 ),
             ]
         ),
-        dcc.Graph(id="pipeline-completion", style={"display": "none"}),
+        dbc.Row(
+            [
+                # NOTE: Legend displayed for both graphs so that user can toggle visibility of status data
+                dbc.Col(
+                    dcc.Graph(
+                        id="fig-pipeline-status", style={"display": "none"}
+                    )
+                ),
+                dbc.Col(
+                    dcc.Graph(
+                        id="fig-pipeline-status-all-ses",
+                        style={"display": "none"},
+                    )
+                ),
+            ],
+        ),
     ]
 )
 
@@ -182,20 +206,55 @@ def reset_table(contents, filename):
 
 @app.callback(
     [
-        Output("pipeline-completion", "figure"),
-        Output("pipeline-completion", "style"),
+        Output("fig-pipeline-status-all-ses", "figure"),
+        Output("fig-pipeline-status-all-ses", "style"),
+    ],
+    Input("upload-data", "contents"),
+    State("upload-data", "filename"),
+    prevent_initial_call=True,
+)
+def generate_overview_status_fig_for_participants(contents, filename):
+    """
+    If new dataset uploaded, generate stacked bar plot of pipeline_complete statuses per session,
+    grouped by pipeline. Provides overview of the number of participants with each status in a given session,
+    per processing pipeline.
+    """
+    if contents is None:
+        raise PreventUpdate
+    data, total_subjects, sessions, upload_error = util.parse_csv_contents(
+        contents=contents, filename=filename
+    )
+    if upload_error is not None:
+        return EMPTY_FIGURE_PROPS, {"display": "none"}
+
+    return plot.plot_pipeline_status_by_participants(data), {
+        "display": "block"
+    }
+
+
+@app.callback(
+    [
+        Output("fig-pipeline-status", "figure"),
+        Output("fig-pipeline-status", "style"),
     ],
     Input(
         "interactive-datatable", "data"
     ),  # Input not triggered by datatable frontend filtering
     prevent_initial_call=True,
 )
-def update_overview_status_fig(data):
+def update_overview_status_fig_for_records(data):
+    """
+    When visible data in the overview datatable is updated (excluding built-in frontend datatable filtering
+    but including component filtering for multiple sessions), generate stacked bar plot of pipeline_complete
+    statuses aggregated by pipeline. Counts of statuses in plot thus correspond to unique records (unique
+    participant-session combinations).
+    """
     if data is not None:
-        df = pd.DataFrame.from_dict(data)
-        return util.create_overview_status_fig(df), {"display": "block"}
+        return plot.plot_pipeline_status_by_records(
+            pd.DataFrame.from_dict(data)
+        ), {"display": "block"}
 
-    return {"data": [], "layout": {}, "frames": []}, {"display": "none"}
+    return EMPTY_FIGURE_PROPS, {"display": "none"}
 
 
 if __name__ == "__main__":
diff --git a/proc_dash/plotting.py b/proc_dash/plotting.py
new file mode 100644
index 0000000..e5a929c
--- /dev/null
+++ b/proc_dash/plotting.py
@@ -0,0 +1,91 @@
+import pandas as pd
+import plotly.express as px
+
+STATUS_CMAP = px.colors.qualitative.Bold
+STATUS_COLORS = {
+    "SUCCESS": STATUS_CMAP[5],
+    "FAIL": STATUS_CMAP[9],
+    "INCOMPLETE": STATUS_CMAP[3],
+    "UNAVAILABLE": STATUS_CMAP[10],
+}
+PIPELINE_STATUS_ORDER = ["SUCCESS", "FAIL", "INCOMPLETE", "UNAVAILABLE"]
+LAYOUTS = {
+    "margin": {"l": 30, "r": 30, "t": 60, "b": 30},  # margins of chart
+    "title": {  # figure title position properties
+        "yref": "container",
+        "y": 1,
+        "yanchor": "top",
+        "pad": {"t": 20},
+    },
+}
+
+
+def plot_pipeline_status_by_participants(data: pd.DataFrame):
+    long_data = pd.melt(
+        data,
+        id_vars=["participant_id", "session"],
+        var_name="pipeline_name",
+        value_name="pipeline_complete",
+    )
+    status_counts = (
+        long_data.groupby(["pipeline_name", "pipeline_complete", "session"])
+        .size()
+        .reset_index(name="participants")
+    )
+
+    fig = px.bar(
+        status_counts,
+        x="session",
+        y="participants",
+        color="pipeline_complete",
+        text_auto=True,
+        facet_col="pipeline_name",
+        category_orders={"pipeline_complete": PIPELINE_STATUS_ORDER},
+        color_discrete_map=STATUS_COLORS,
+        labels={
+            "pipeline_name": "Pipeline",
+            "participants": "Participants (n)",
+            "pipeline_complete": "Processing status",
+            "session": "Session",
+        },
+        title="Overview: Participant pipeline statuses by session",
+    )
+    # Treat session labels as categorical in plot to avoid a continuous x-axis
+    fig.update_xaxes(type="category")
+    fig.update_layout(margin=LAYOUTS["margin"], title=LAYOUTS["title"])
+
+    return fig
+
+
+def plot_pipeline_status_by_records(data: pd.DataFrame):
+    long_data = pd.melt(
+        data,
+        id_vars=["participant_id", "session"],
+        var_name="pipeline_name",
+        value_name="pipeline_complete",
+    )
+    status_counts = (
+        long_data.groupby(["pipeline_name", "pipeline_complete"])
+        .size()
+        .reset_index(name="records")
+    )
+
+    fig = px.bar(
+        status_counts,
+        x="pipeline_name",
+        y="records",
+        color="pipeline_complete",
+        text_auto=True,
+        category_orders={"pipeline_complete": PIPELINE_STATUS_ORDER},
+        color_discrete_map=STATUS_COLORS,
+        labels={
+            "pipeline_name": "Pipeline",
+            "records": "Records (n)",
+            "pipeline_complete": "Processing status",
+        },
+        title="Selected sessions: Pipeline statuses of unique matching records (default: all)"
+        # alternative title: "Pipeline statuses of unique records for selected sessions (default: all)"
+    )
+    fig.update_layout(margin=LAYOUTS["margin"], title=LAYOUTS["title"])
+
+    return fig
diff --git a/proc_dash/utility.py b/proc_dash/utility.py
index 556ee76..82f9d85 100644
--- a/proc_dash/utility.py
+++ b/proc_dash/utility.py
@@ -5,18 +5,9 @@
 from typing import Optional, Tuple
 
 import pandas as pd
-import plotly.express as px
 
 SCHEMAS_PATH = Path(__file__).absolute().parents[1] / "schemas"
 
-STATUS_CMAP = px.colors.qualitative.Bold
-STATUS_COLORS = {
-    "SUCCESS": STATUS_CMAP[5],
-    "FAIL": STATUS_CMAP[9],
-    "INCOMPLETE": STATUS_CMAP[3],
-    "UNAVAILABLE": STATUS_CMAP[10],
-}
-
 
 def get_required_bagel_columns() -> list:
     """Returns names of required columns from the bagel schema."""
@@ -185,43 +176,3 @@ def filter_by_sessions(
             data = data[data["session"].isin(session_values)]
 
     return data
-
-
-def create_overview_status_fig(data: pd.DataFrame):
-    long_data = pd.melt(
-        data,
-        id_vars="participant_id",
-        value_vars=list(data.columns[2:]),
-        var_name="pipeline_name",
-        value_name="pipeline_complete",
-    )
-    status_counts = (
-        long_data.groupby(["pipeline_name", "pipeline_complete"])
-        .size()
-        .reset_index(name="records")
-    )
-
-    fig = px.bar(
-        status_counts,
-        x="pipeline_name",
-        y="records",
-        color="pipeline_complete",
-        text_auto=True,
-        category_orders={
-            "pipeline_complete": [
-                "SUCCESS",
-                "FAIL",
-                "INCOMPLETE",
-                "UNAVAILABLE",
-            ]
-        },
-        color_discrete_map=STATUS_COLORS,
-        labels={
-            "pipeline_name": "Pipeline",
-            "records": "Unique records",
-            "pipeline_complete": "Processing status",
-        },
-    )
-    fig.update_traces(textposition="outside", cliponaxis=False)
-
-    return fig

From ae853cd4e2de937cc03c1b1d620efa817c962e17 Mon Sep 17 00:00:00 2001
From: Alyssa Dai <alyssa.ydai@gmail.com>
Date: Sat, 8 Apr 2023 00:41:57 -0400
Subject: [PATCH 3/8] add function to construct string contents of pipeline
 status legend component

---
 proc_dash/utility.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/proc_dash/utility.py b/proc_dash/utility.py
index 82f9d85..8d5317e 100644
--- a/proc_dash/utility.py
+++ b/proc_dash/utility.py
@@ -9,6 +9,15 @@
 SCHEMAS_PATH = Path(__file__).absolute().parents[1] / "schemas"
 
 
+def construct_legend_str(status_desc: dict) -> str:
+    """From a dictionary, constructs a legend-style string with multiple lines in the format of key: value."""
+    legend_str = ""
+    for status, desc in status_desc.items():
+        legend_str += status + ": " + desc + "\n"
+
+    return legend_str
+
+
 def get_required_bagel_columns() -> list:
     """Returns names of required columns from the bagel schema."""
     with open(SCHEMAS_PATH / "bagel_schema.json", "r") as file:

From 718c2a6dae0fefeb0f49f68605eda8d1da1393dd Mon Sep 17 00:00:00 2001
From: Alyssa Dai <alyssa.ydai@gmail.com>
Date: Sat, 8 Apr 2023 00:50:20 -0400
Subject: [PATCH 4/8] Add component for descriptive legend of pipeline_complete
 statuses - primary stylesheet for app changed to dbc theme to use card and
 layout components

---
 proc_dash/app.py | 132 +++++++++++++++++++++++++++++++----------------
 1 file changed, 88 insertions(+), 44 deletions(-)

diff --git a/proc_dash/app.py b/proc_dash/app.py
index 833067a..6d54f4b 100644
--- a/proc_dash/app.py
+++ b/proc_dash/app.py
@@ -14,13 +14,14 @@
 
 EMPTY_FIGURE_PROPS = {"data": [], "layout": {}, "frames": []}
 
-app = Dash(
-    __name__,
-    external_stylesheets=[
-        "https://codepen.io/chriddyp/pen/bWLwgP.css",
-        dbc.themes.GRID,
-    ],
-)
+PIPE_COMPLETE_STATUS_SHORT_DESC = {
+    "SUCCESS": "All stages of pipeline finished successfully (all expected output files present).",
+    "FAIL": "At least one stage of the pipeline failed.",
+    "INCOMPLETE": "Pipeline has not yet been run or at least one stage is unfinished/still running.",
+    "UNAVAILABLE": "Relevant data modality for pipeline not available.",
+}
+
+app = Dash(__name__, external_stylesheets=[dbc.themes.FLATLY])
 
 
 app.layout = html.Div(
@@ -28,14 +29,16 @@
         html.H2(children="Neuroimaging Derivatives Status Dashboard"),
         dcc.Upload(
             id="upload-data",
-            children=html.Button("Drag and Drop or Select .csv File"),
+            children=dbc.Button(
+                "Drag and Drop or Select .csv File", color="secondary"
+            ),  # TODO: Constrain click responsive area of button
             style={"margin-top": "10px", "margin-bottom": "10px"},
             multiple=False,
         ),
         html.Div(
             id="output-data-upload",
             children=[
-                html.H6(id="input-filename"),
+                html.H4(id="input-filename"),
                 html.Div(
                     children=[
                         html.Div(id="total-participants"),
@@ -55,6 +58,9 @@
                     page_size=50,
                     fixed_rows={"headers": True},
                     style_table={"height": "300px", "overflowY": "auto"},
+                    style_cell={
+                        "fontSize": 13  # accounts for font size inflation by dbc theme
+                    },
                 ),
                 # NOTE: Could cast columns to strings for the datatable to standardize filtering syntax,
                 # but this results in undesirable effects (e.g., if there is session 1 and session 11,
@@ -62,43 +68,80 @@
             ],
             style={"margin-top": "10px", "margin-bottom": "10px"},
         ),
-        dbc.Card(
+        dbc.Row(
             [
-                # TODO: Put label and dropdown in same row
-                html.Div(
-                    [
-                        dbc.Label("Filter by multiple sessions:"),
-                        dcc.Dropdown(
-                            id="session-dropdown",
-                            options=[],
-                            multi=True,
-                            placeholder="Select one or more available sessions to filter by",
-                            # TODO: Can set `disabled=True` here to prevent any user interaction before file is uploaded
-                        ),
-                    ]
+                dbc.Col(
+                    dbc.Form(
+                        [
+                            # TODO: Put label and dropdown in same row
+                            html.Div(
+                                [
+                                    dbc.Label(
+                                        "Filter by multiple sessions:",
+                                        html_for="session-dropdown",
+                                        className="mb-0",
+                                    ),
+                                    dcc.Dropdown(
+                                        id="session-dropdown",
+                                        options=[],
+                                        multi=True,
+                                        placeholder="Select one or more available sessions to filter by",
+                                        # TODO: Can set `disabled=True` here to prevent any user interaction before file is uploaded
+                                    ),
+                                ],
+                                className="mb-2",  # Add margin to keep dropdowns spaced apart
+                            ),
+                            html.Div(
+                                [
+                                    dbc.Label(
+                                        "Selection operator:",
+                                        html_for="select-operator",
+                                        className="mb-0",
+                                    ),
+                                    dcc.Dropdown(
+                                        id="select-operator",
+                                        options=[
+                                            {
+                                                "label": "AND",
+                                                "value": "AND",
+                                                "title": "Show only participants with all selected sessions.",
+                                            },
+                                            {
+                                                "label": "OR",
+                                                "value": "OR",
+                                                "title": "Show participants with any of the selected sessions.",
+                                            },
+                                        ],
+                                        value="AND",
+                                        clearable=False,
+                                        # TODO: Can set `disabled=True` here to prevent any user interaction before file is uploaded
+                                    ),
+                                ],
+                                className="mb-2",
+                            ),
+                        ],
+                    )
                 ),
-                html.Div(
-                    [
-                        dbc.Label("Selection operator:"),
-                        dcc.Dropdown(
-                            id="select-operator",
-                            options=[
-                                {
-                                    "label": "AND",
-                                    "value": "AND",
-                                    "title": "Show only participants with all selected sessions.",
-                                },
-                                {
-                                    "label": "OR",
-                                    "value": "OR",
-                                    "title": "Show participants with any of the selected sessions.",
-                                },
-                            ],
-                            value="AND",
-                            clearable=False,
-                            # TODO: Can set `disabled=True` here to prevent any user interaction before file is uploaded
+                dbc.Col(
+                    dbc.Card(
+                        dbc.CardBody(
+                            [
+                                html.H5(
+                                    "Legend: Processing status",
+                                    className="card-title",
+                                ),
+                                html.P(
+                                    children=util.construct_legend_str(
+                                        PIPE_COMPLETE_STATUS_SHORT_DESC
+                                    ),
+                                    style={
+                                        "whiteSpace": "pre"  # preserve newlines
+                                    },
+                                    className="card-text",
+                                ),
+                            ]
                         ),
-                    ]
+                    )
                 ),
             ]
         ),
@@ -118,7 +161,8 @@
                 ),
             ],
         ),
-    ]
+    ],
+    style={"padding": "10px 10px 10px 10px"},
 )
 
 

From c3ee12e0ee46b78affda09b80f07fda33213509b Mon Sep 17 00:00:00 2001
From: Alyssa Dai <alyssa.ydai@gmail.com>
Date: Sat, 8 Apr 2023 00:50:54 -0400
Subject: [PATCH 5/8] minor title change to graph

---
 proc_dash/plotting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/proc_dash/plotting.py b/proc_dash/plotting.py
index e5a929c..c83e0d1 100644
--- a/proc_dash/plotting.py
+++ b/proc_dash/plotting.py
@@ -83,7 +83,7 @@ def plot_pipeline_status_by_records(data: pd.DataFrame):
             "records": "Records (n)",
             "pipeline_complete": "Processing status",
         },
-        title="Selected sessions: Pipeline statuses of unique matching records (default: all)"
+        title="Selected sessions: Pipeline statuses of matching records (default: all)"
         # alternative title: "Pipeline statuses of unique records for selected sessions (default: all)"
     )
     fig.update_layout(margin=LAYOUTS["margin"], title=LAYOUTS["title"])

From dbb72032bc88fe84cfaf1f5bd413083cded8cca8 Mon Sep 17 00:00:00 2001
From: Alyssa Dai <alyssa.ydai@gmail.com>
Date: Fri, 14 Apr 2023 13:19:35 -0400
Subject: [PATCH 6/8] refactor plotting functions

---
 proc_dash/plotting.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/proc_dash/plotting.py b/proc_dash/plotting.py
index c83e0d1..7c716f7 100644
--- a/proc_dash/plotting.py
+++ b/proc_dash/plotting.py
@@ -20,15 +20,19 @@
 }
 
 
-def plot_pipeline_status_by_participants(data: pd.DataFrame):
-    long_data = pd.melt(
+def transform_data_to_long(data: pd.DataFrame) -> pd.DataFrame:
+    return pd.melt(
         data,
         id_vars=["participant_id", "session"],
         var_name="pipeline_name",
         value_name="pipeline_complete",
     )
+
+
+def plot_pipeline_status_by_participants(data: pd.DataFrame):
     status_counts = (
-        long_data.groupby(["pipeline_name", "pipeline_complete", "session"])
+        transform_data_to_long(data)
+        .groupby(["pipeline_name", "pipeline_complete", "session"])
         .size()
         .reset_index(name="participants")
     )
@@ -58,14 +62,9 @@ def plot_pipeline_status_by_participants(data: pd.DataFrame):
 
 
 def plot_pipeline_status_by_records(data: pd.DataFrame):
-    long_data = pd.melt(
-        data,
-        id_vars=["participant_id", "session"],
-        var_name="pipeline_name",
-        value_name="pipeline_complete",
-    )
     status_counts = (
-        long_data.groupby(["pipeline_name", "pipeline_complete"])
+        transform_data_to_long(data)
+        .groupby(["pipeline_name", "pipeline_complete"])
         .size()
         .reset_index(name="records")
     )

From 237f146f56ebad0f50da3a7b8ec6d16e808da635 Mon Sep 17 00:00:00 2001
From: Alyssa Dai <alyssa.ydai@gmail.com>
Date: Fri, 14 Apr 2023 13:24:57 -0400
Subject: [PATCH 7/8] remove exception raising by util functions, handle all
 user-facing errors together

---
 proc_dash/utility.py | 46 ++++++++++++++++----------------------------
 1 file changed, 17 insertions(+), 29 deletions(-)

diff --git a/proc_dash/utility.py b/proc_dash/utility.py
index 8d5317e..fadbcc3 100644
--- a/proc_dash/utility.py
+++ b/proc_dash/utility.py
@@ -11,11 +11,9 @@
 
 def construct_legend_str(status_desc: dict) -> str:
     """From a dictionary, constructs a legend-style string with multiple lines in the format of key: value."""
-    legend_str = ""
-    for status, desc in status_desc.items():
-        legend_str += status + ": " + desc + "\n"
-
-    return legend_str
+    return "\n".join(
+        [f"{status}: {desc}" for status, desc in status_desc.items()]
+    )
 
 
 def get_required_bagel_columns() -> list:
@@ -34,17 +32,14 @@ def get_required_bagel_columns() -> list:
 
 # TODO: When possible values per column have been finalized (waiting on mr_proc),
 # validate that each column only has acceptable values
-def check_required_columns(bagel: pd.DataFrame):
+def get_missing_required_columns(bagel: pd.DataFrame) -> set:
     """Returns error if required columns in bagel schema are missing."""
     missing_req_columns = set(get_required_bagel_columns()).difference(
         bagel.columns
     )
 
     # TODO: Check if there are any missing values in the `participant_id` column
-    if len(missing_req_columns) > 0:
-        raise LookupError(
-            f"The selected .csv is missing the following required metadata columns: {missing_req_columns}."
-        )
+    return missing_req_columns
 
 
 def extract_pipelines(bagel: pd.DataFrame) -> dict:
@@ -64,7 +59,7 @@ def extract_pipelines(bagel: pd.DataFrame) -> dict:
     return pipelines_dict
 
 
-def check_num_subjects(bagel: pd.DataFrame):
+def are_subjects_same_across_pipelines(bagel: pd.DataFrame) -> bool:
     """Returns error if subjects and sessions are different across pipelines in the input."""
     pipelines_dict = extract_pipelines(bagel)
 
@@ -73,13 +68,10 @@ def check_num_subjects(bagel: pd.DataFrame):
         for df in pipelines_dict.values()
     ]
 
-    if not all(
+    return all(
         pipeline.equals(pipeline_subject_sessions[0])
         for pipeline in pipeline_subject_sessions
-    ):
-        raise LookupError(
-            "The pipelines in bagel.csv do not have the same number of subjects and sessions."
-        )
+    )
 
 
 def count_unique_subjects(data: pd.DataFrame) -> int:
@@ -95,9 +87,6 @@ def get_pipelines_overview(bagel: pd.DataFrame) -> pd.DataFrame:
     Constructs a dataframe containing global statuses of pipelines in bagel.csv
     (based on "pipeline_complete" column) for each participant and session.
     """
-    check_required_columns(bagel)
-    check_num_subjects(bagel)
-
     pipeline_complete_df = bagel.pivot(
         index=["participant_id", "session"],
         columns=["pipeline_name", "pipeline_version"],
@@ -137,19 +126,18 @@ def parse_csv_contents(
     decoded = base64.b64decode(content_string)
 
     error_msg = None
-    try:
-        if ".csv" in filename:
-            bagel = pd.read_csv(io.StringIO(decoded.decode("utf-8")))
+    if ".csv" in filename:
+        bagel = pd.read_csv(io.StringIO(decoded.decode("utf-8")))
+        if len(missing_req_cols := get_missing_required_columns(bagel)) > 0:
+            error_msg = f"The selected .csv is missing the following required metadata columns: {missing_req_cols}."
+        elif not are_subjects_same_across_pipelines(bagel):
+            error_msg = "The pipelines in bagel.csv do not have the same number of subjects and sessions."
+        else:
             overview_df = get_pipelines_overview(bagel=bagel)
             total_subjects = count_unique_subjects(overview_df)
             sessions = overview_df["session"].sort_values().unique().tolist()
-        else:
-            error_msg = "Input file is not a .csv file."
-    except LookupError as err:
-        error_msg = str(err)
-    except Exception as exc:
-        print(exc)
-        error_msg = "Something went wrong while processing this file."
+    else:
+        error_msg = "Input file is not a .csv file."
 
     if error_msg is not None:
         return None, None, None, error_msg

From 846b0c43d80df71187a9b280447609447d6ad256 Mon Sep 17 00:00:00 2001
From: Alyssa Dai <alyssa.ydai@gmail.com>
Date: Fri, 14 Apr 2023 16:57:44 -0400
Subject: [PATCH 8/8] refactor csv content parsing ops into separate callback,
 update utility.py constants + docstrings

---
 proc_dash/app.py     | 89 ++++++++++++++++++++++++++------------------
 proc_dash/utility.py | 10 ++++-
 2 files changed, 60 insertions(+), 39 deletions(-)

diff --git a/proc_dash/app.py b/proc_dash/app.py
index 6d54f4b..69e5d6a 100644
--- a/proc_dash/app.py
+++ b/proc_dash/app.py
@@ -14,19 +14,13 @@
 
 EMPTY_FIGURE_PROPS = {"data": [], "layout": {}, "frames": []}
 
-PIPE_COMPLETE_STATUS_SHORT_DESC = {
-    "SUCCESS": "All stages of pipeline finished successfully (all expected output files present).",
-    "FAIL": "At least one stage of the pipeline failed.",
-    "INCOMPLETE": "Pipeline has not yet been run or at least one stage is unfinished/still running.",
-    "UNAVAILABLE": "Relevant data modality for pipeline not available.",
-}
-
 app = Dash(__name__, external_stylesheets=[dbc.themes.FLATLY])
 
 
 app.layout = html.Div(
     children=[
         html.H2(children="Neuroimaging Derivatives Status Dashboard"),
+        dcc.Store(id="memory"),
         dcc.Upload(
             id="upload-data",
             children=dbc.Button(
@@ -132,7 +126,7 @@
                                 ),
                                 html.P(
                                     children=util.construct_legend_str(
-                                        PIPE_COMPLETE_STATUS_SHORT_DESC
+                                        util.PIPE_COMPLETE_STATUS_SHORT_DESC
                                     ),
                                     style={
                                         "whiteSpace": "pre"  # preserve newlines
@@ -168,28 +162,55 @@
 
 @app.callback(
     [
-        Output("interactive-datatable", "columns"),
-        Output("interactive-datatable", "data"),
+        Output("memory", "data"),
         Output("total-participants", "children"),
         Output("session-dropdown", "options"),
     ],
     [
         Input("upload-data", "contents"),
         State("upload-data", "filename"),
-        Input("session-dropdown", "value"),
-        Input("select-operator", "value"),
     ],
 )
-def update_outputs(contents, filename, session_values, operator_value):
+def process_bagel(contents, filename):
+    """
+    From the contents of a correctly-formatted uploaded .csv file, parse and store the pipeline overview
+    data as a dataframe and update the session dropdown options and displayed total participants count.
+    Returns any errors encountered during input file processing as a user-friendly message.
+    """
     if contents is None:
-        return None, None, "Upload a CSV file to begin.", []
-
-    data, total_subjects, sessions, upload_error = util.parse_csv_contents(
-        contents=contents, filename=filename
-    )
+        return None, "Upload a CSV file to begin.", []
+    try:
+        data, total_subjects, sessions, upload_error = util.parse_csv_contents(
+            contents=contents, filename=filename
+        )
+    except Exception:
+        upload_error = "Something went wrong while processing this file."
 
     if upload_error is not None:
-        return None, None, f"Error: {upload_error} Please try again.", []
+        return None, f"Error: {upload_error} Please try again.", []
+
+    report_total_subjects = f"Total number of participants: {total_subjects}"
+    session_opts = [{"label": ses, "value": ses} for ses in sessions]
+
+    return data.to_dict("records"), report_total_subjects, session_opts
+
+
+@app.callback(
+    [
+        Output("interactive-datatable", "columns"),
+        Output("interactive-datatable", "data"),
+    ],
+    [
+        Input("memory", "data"),
+        Input("session-dropdown", "value"),
+        Input("select-operator", "value"),
+    ],
+)
+def update_outputs(parsed_data, session_values, operator_value):
+    if parsed_data is None:
+        return None, None
+
+    data = pd.DataFrame.from_dict(parsed_data)
 
     if session_values:
         data = util.filter_by_sessions(
@@ -197,13 +218,10 @@ def update_outputs(contents, filename, session_values, operator_value):
             session_values=session_values,
             operator_value=operator_value,
         )
-
     tbl_columns = [{"name": i, "id": i} for i in data.columns]
     tbl_data = data.to_dict("records")
-    tbl_total_subjects = f"Total number of participants: {total_subjects}"
-    session_opts = [{"label": ses, "value": ses} for ses in sessions]
 
-    return tbl_columns, tbl_data, tbl_total_subjects, session_opts
+    return tbl_columns, tbl_data
 
 
 @app.callback(
@@ -240,8 +258,11 @@ def update_matching_participants(columns, virtual_data):
     State("upload-data", "filename"),
     prevent_initial_call=True,
 )
-def reset_table(contents, filename):
-    """If file contents change (i.e., new CSV uploaded), reset file name and filter selection values."""
+def reset_selections(contents, filename):
+    """
+    If file contents change (i.e., selected new CSV for upload), reset displayed file name and dropdown filter
+    selection values. Reset will occur regardless of whether there is an issue processing the selected file.
+    """
     if ctx.triggered_id == "upload-data":
         return f"Input file: {filename}", "", ""
 
@@ -253,27 +274,21 @@ def reset_table(contents, filename):
         Output("fig-pipeline-status-all-ses", "figure"),
         Output("fig-pipeline-status-all-ses", "style"),
     ],
-    Input("upload-data", "contents"),
-    State("upload-data", "filename"),
+    Input("memory", "data"),
     prevent_initial_call=True,
 )
-def generate_overview_status_fig_for_participants(contents, filename):
+def generate_overview_status_fig_for_participants(parsed_data):
     """
     If new dataset uploaded, generate stacked bar plot of pipeline_complete statuses per session,
     grouped by pipeline. Provides overview of the number of participants with each status in a given session,
     per processing pipeline.
     """
-    if contents is None:
-        raise PreventUpdate
-    data, total_subjects, sessions, upload_error = util.parse_csv_contents(
-        contents=contents, filename=filename
-    )
-    if upload_error is not None:
+    if parsed_data is None:
         return EMPTY_FIGURE_PROPS, {"display": "none"}
 
-    return plot.plot_pipeline_status_by_participants(data), {
-        "display": "block"
-    }
+    return plot.plot_pipeline_status_by_participants(
+        pd.DataFrame.from_dict(parsed_data)
+    ), {"display": "block"}
 
 
 @app.callback(
diff --git a/proc_dash/utility.py b/proc_dash/utility.py
index fadbcc3..e9b75c4 100644
--- a/proc_dash/utility.py
+++ b/proc_dash/utility.py
@@ -7,6 +7,12 @@
 import pandas as pd
 
 SCHEMAS_PATH = Path(__file__).absolute().parents[1] / "schemas"
+PIPE_COMPLETE_STATUS_SHORT_DESC = {
+    "SUCCESS": "All stages of pipeline finished successfully (all expected output files present).",
+    "FAIL": "At least one stage of the pipeline failed.",
+    "INCOMPLETE": "Pipeline has not yet been run or at least one stage is unfinished/still running.",
+    "UNAVAILABLE": "Relevant data modality for pipeline not available.",
+}
 
 
 def construct_legend_str(status_desc: dict) -> str:
@@ -33,7 +39,7 @@ def get_required_bagel_columns() -> list:
 # TODO: When possible values per column have been finalized (waiting on mr_proc),
 # validate that each column only has acceptable values
 def get_missing_required_columns(bagel: pd.DataFrame) -> set:
-    """Returns error if required columns in bagel schema are missing."""
+    """Identifies any missing required columns in bagel schema."""
     missing_req_columns = set(get_required_bagel_columns()).difference(
         bagel.columns
     )
@@ -60,7 +66,7 @@ def extract_pipelines(bagel: pd.DataFrame) -> dict:
 
 
 def are_subjects_same_across_pipelines(bagel: pd.DataFrame) -> bool:
-    """Returns error if subjects and sessions are different across pipelines in the input."""
+    """Checks if subjects and sessions are the same across pipelines in the input."""
     pipelines_dict = extract_pipelines(bagel)
 
     pipeline_subject_sessions = [