From e991de176e12207ad85331ff83a2bbdffce162a8 Mon Sep 17 00:00:00 2001
From: Lu Zhu <zhu.lu@hotmail.com>
Date: Mon, 6 May 2024 23:15:46 -0400
Subject: [PATCH] remove unused code

---
 auroris/curation/_curator.py              |   2 +-
 auroris/curation/actions/_discretize.py   |  15 ---
 auroris/curation/actions/_distribution.py |   3 +-
 auroris/report/broadcaster/_html.py       |   7 --
 auroris/types.py                          |   1 -
 auroris/utils.py                          |   1 -
 auroris/visualization/_distribution.py    | 140 +---------------------
 7 files changed, 5 insertions(+), 164 deletions(-)
diff --git a/auroris/curation/_curator.py b/auroris/curation/_curator.py
index c42b193..d0b16e3 100644
--- a/auroris/curation/_curator.py
+++ b/auroris/curation/_curator.py
@@ -46,7 +46,7 @@ def transform(self, dataset: pd.DataFrame) -> Tuple[pd.DataFrame, CurationReport
         dataset = dataset.copy(deep=True)
         for action in self.steps:
             logger.info(f"Performing step: {action.name}")
-            if action._dep_action and not action._dep_action in self.state:
+            if action._dep_action and action._dep_action not in self.state:
                 raise RuntimeError(f"{action._dep_action} should be called before {action.name}.")
             with report.section(action.name):
                 kwargs = {}
diff --git a/auroris/curation/actions/_discretize.py b/auroris/curation/actions/_discretize.py
index 7780fd7..d5be312 100644
--- a/auroris/curation/actions/_discretize.py
+++ b/auroris/curation/actions/_discretize.py
@@ -7,7 +7,6 @@
 from auroris.curation.actions._base import BaseAction
 from auroris.report import CurationReport
 from auroris.types import VerbosityLevel
-from auroris.visualization._distribution import detailed_distributions_plots
 
 
 def discretize(
@@ -105,19 +104,5 @@ def transform(
 
         if report is not None:
             report.log_new_column(column_name)
-            # sections = []
-            # low = -np.inf
-            # high = np.inf
-
-            # for i, threshold in enumerate(self.thresholds + [high]):
-            #     if self.label_order == "descending":
-            #         i = len(self.thresholds) - i
-            #     pct = 100 * sum(X == i) / len(X)
-            #     sections.append(
-            #         {"label": f"{column_name} = {i}: {pct:.1f} %", "start": low, "end": threshold, "pct": pct}
-            #     )
-            #     low = threshold
-            # fig = detailed_distributions_plots(data=dataset[self.input_column], label_name=self.input_column, sections=sections)
-            # report.log_image(fig, title="Data class distribution")
 
         return dataset
diff --git a/auroris/curation/actions/_distribution.py b/auroris/curation/actions/_distribution.py
index c0e5310..e2cddd3 100644
--- a/auroris/curation/actions/_distribution.py
+++ b/auroris/curation/actions/_distribution.py
@@ -1,12 +1,11 @@
 from typing import Dict, List, Optional
 import pandas as pd
-from pydantic import Field, PrivateAttr
+from pydantic import Field
 import numpy as np
 
 from auroris.curation.actions._base import BaseAction
 from auroris.report import CurationReport
 from auroris.types import VerbosityLevel
-from auroris.curation.actions._discretize import Discretization
 from auroris.visualization import detailed_distributions_plots
 
 
diff --git a/auroris/report/broadcaster/_html.py b/auroris/report/broadcaster/_html.py
index be2346a..09beb99 100644
--- a/auroris/report/broadcaster/_html.py
+++ b/auroris/report/broadcaster/_html.py
@@ -1,10 +1,6 @@
-import io
 import fsspec
 import base64
 from typing import Optional
-from PIL import Image as PILImage
-from PIL.Image import Image as ImageType
-from IPython.core.display import Image as IPy_Image
 
 import datamol as dm
 from auroris.report import CurationReport, Section
@@ -121,9 +117,6 @@ def on_report_start(self, report: CurationReport):
                     <p>Version: {report.auroris_version}</p>
                 </header>"""
         )
-        # self._file.write("<h1>Curation Report</h1>")
-        # self._file.write(f"<p>Time: {report.time_stamp.strftime('%Y-%m-%d %H:%M:%S')}</p>")
-        # self._file.write(f"<p>Version: {report.auroris_version}</p>")
 
     def on_section_start(self, section: Section):
         self._file.write(f"<h2>{section.title}</h2>")
diff --git a/auroris/types.py b/auroris/types.py
index 9879e68..013135c 100644
--- a/auroris/types.py
+++ b/auroris/types.py
@@ -1,5 +1,4 @@
 from enum import IntEnum
-from PIL.Image import Image
 
 
 class VerbosityLevel(IntEnum):
diff --git a/auroris/utils.py b/auroris/utils.py
index f56a421..c5ed03b 100644
--- a/auroris/utils.py
+++ b/auroris/utils.py
@@ -3,7 +3,6 @@
 from PIL import Image
 from PIL.Image import Image as ImageType
 from sklearn.utils.multiclass import type_of_target
-import matplotlib.pyplot as plt
 from matplotlib.backends.backend_agg import FigureCanvasAgg
 from io import BytesIO
 
diff --git a/auroris/visualization/_distribution.py b/auroris/visualization/_distribution.py
index 4282b00..d99d8a1 100644
--- a/auroris/visualization/_distribution.py
+++ b/auroris/visualization/_distribution.py
@@ -1,17 +1,14 @@
-from typing import Callable, Dict, List, Optional, Tuple
+from typing import List, Optional
 
 import numpy as np
 import pandas as pd
 import seaborn as sns
 from loguru import logger
 from scipy import stats
+import matplotlib.pyplot as plt
 
 from auroris.visualization.utils import create_figure
 
-import seaborn as sns
-import matplotlib.pyplot as plt
-import numpy as np
-
 
 def detailed_distributions_plots(
     data: pd.DataFrame, label_name: str, sections: Optional[List[dict]] = None, log_scale: bool = False
@@ -33,7 +30,7 @@ def detailed_distributions_plots(
         logger.exception(e)
         if log_scale:
             logger.exception(
-                f"The current error is likely due to the `log_scale` was enabled. Please disable the `log_scale` and try again."
+                "The current error is likely due to the `log_scale` was enabled. Please disable the `log_scale` and try again."
             )
 
     # Fill the sections under the KDE curve
@@ -51,137 +48,6 @@ def detailed_distributions_plots(
     return fig.figure
 
 
-# def detailed_distributions_plots(
-#     df: pd.DataFrame,
-#     thresholds: Optional[Dict[str, Tuple[int, Callable]]] = None,
-#     discretizer: Optional[callable] = None,
-#     label_names: List[str] = None,
-#     log_scale_mapping: Dict[str, bool] = None,
-#     positive_color: str = "#3db371",
-#     negative_color: str = "#a9a9a9",
-#     n_cols: int = 3,
-#     fig_base_size: float = 8,
-#     w_h_ratio: float = 0.5,
-#     legend_fontsize: int = 18,
-#     ticks_fontsize: int = 18,
-#     title_fontsize: int = 18,
-#     gridsize: int = 1000,
-#     dpi: int = 150,
-#     seaborn_theme: Optional[str] = "whitegrid",
-# ):
-#     """Plot the detailed distribution of the columns in `df`. Also, color the part of the
-#     "positive" distribution using `thresholds`.
-
-#     Args:
-#         df: A dataframe with binarized readouts only. NaN are allowed.
-#         thresholds: A dict mapping of the `df` column. Value is a tuple where the first
-#             element is the threshold value and the second element is a callable deciding wether
-#             a datapoint meets the criterai or not (something like `np.less` or np.greater`).
-#         label_names: Name of the labels (same order as the columns in `df`). If not set
-#             the name of the columns are used.
-#         log_scale_mapping: A dict mapping of the `df` column. If True,
-#             the plot for this readout will be log scaled.
-#         positive_color: Color for `True` or `1`.
-#         negative_color: Color for `False` or `0`.
-#         n_cols: Number of columns in the subplots.
-#         fig_base_size: Base size of the plots.
-#         w_h_ratio: Width/height ratio.
-#         legend_fontsize: Font size of the legend.
-#         ticks_fontsize: Font size of the x ticks and x label.
-#         title_fontsize: Font size of the title.
-#         gridsize: Gridsize for the kernel density estimate (KDE).
-#         dpi: DPI value of the figure.
-#         seaborn_theme: Seaborn theme.
-#     """
-
-#     # NOTE: the `thresholds` API is not super nice, consider an alternative.
-#     # NOTE: we could eventually add support for multiclass here if we need it.
-#     if thresholds is None:
-#         thresholds = {}
-
-#     if log_scale_mapping is None:
-#         log_scale_mapping = {}
-
-#     if label_names is None:
-#         label_names = df.columns.tolist()
-
-#     # Check all columns are numeric
-#     numerics = df.apply(lambda x: x.dtype.kind in "biufc")
-#     if not numerics.all():
-#         raise ValueError(f"Not all columns are numeric: {numerics[~numerics].to_dict()}")
-
-#     n_plots = len(df.columns)
-
-#     # Create the figure
-#     with create_figure(
-#         n_plots=n_plots,
-#         n_cols=n_cols,
-#         dpi=dpi,
-#         fig_base_size=fig_base_size,
-#         w_h_ratio=w_h_ratio,
-#         seaborn_theme=seaborn_theme,
-#     ) as (fig, axes):
-#         for ax, readout, label_name in zip(axes, df.columns, label_names):
-#             values = df[readout].dropna()
-
-#             # Get threshold value and function
-#             threshold_value, threshold_fn = None, None
-#             threshold = thresholds.get(readout, None)
-#             if threshold is not None:
-#                 threshold_value, threshold_fn = threshold
-
-#             # Whether to log scale
-#             log_scale = log_scale_mapping.get(readout, False)
-
-#             # Draw distribution and kde plot
-#             kde_kws = {}
-#             kde_kws["clip"] = values.min(), values.max()
-#             kde_kws["gridsize"] = gridsize
-#             kplot = sns.histplot(
-#                 values,
-#                 kde=True,
-#                 ax=ax,
-#                 color=negative_color,
-#                 kde_kws=kde_kws,
-#                 log_scale=log_scale,
-#             )
-
-#             # Label
-#             ax.set_title(label_name, fontsize=title_fontsize)
-#             ax.set_xlabel(None)
-#             ax.set_ylabel("Count", fontsize=ticks_fontsize)
-
-#             ax.xaxis.set_tick_params(labelsize=ticks_fontsize)
-#             ax.yaxis.set_tick_params(labelsize=ticks_fontsize)
-
-#             if threshold_value is not None and threshold_fn is not None:
-#                 # Fill between on active values
-#                 x, y = kplot.get_lines()[0].get_data()
-#                 ax.fill_between(
-#                     x,
-#                     y,
-#                     where=threshold_fn(x, threshold_value),
-#                     facecolor=positive_color,
-#                     alpha=0.8,
-#                 )
-
-#                 # Active ratio text box
-#                 positive_ratio = threshold_fn(values, threshold_value).sum() / len(values) * 100
-#                 ax.text(
-#                     0.85,
-#                     0.95,
-#                     f"{positive_ratio:.1f} %",
-#                     transform=ax.transAxes,
-#                     fontsize=legend_fontsize,
-#                     verticalalignment="top",
-#                     bbox=dict(boxstyle="round", facecolor="wheat", alpha=0.5),
-#                 )
-#             else:
-#                 logger.warning(f"Threshold not available for readout '{readout}'")
-
-#     return fig
-
-
 def visualize_distribution_with_outliers(
     values: np.ndarray,
     is_outlier: Optional[List[bool]] = None,