Skip to content

Commit

Permalink
remove unused code
Browse files Browse the repository at this point in the history
  • Loading branch information
zhu0619 committed May 7, 2024
1 parent 1580695 commit e991de1
Show file tree
Hide file tree
Showing 7 changed files with 5 additions and 164 deletions.
2 changes: 1 addition & 1 deletion auroris/curation/_curator.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def transform(self, dataset: pd.DataFrame) -> Tuple[pd.DataFrame, CurationReport
dataset = dataset.copy(deep=True)
for action in self.steps:
logger.info(f"Performing step: {action.name}")
if action._dep_action and not action._dep_action in self.state:
if action._dep_action and action._dep_action not in self.state:
raise RuntimeError(f"{action._dep_action} should be called before {action.name}.")
with report.section(action.name):
kwargs = {}
Expand Down
15 changes: 0 additions & 15 deletions auroris/curation/actions/_discretize.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from auroris.curation.actions._base import BaseAction
from auroris.report import CurationReport
from auroris.types import VerbosityLevel
from auroris.visualization._distribution import detailed_distributions_plots


def discretize(
Expand Down Expand Up @@ -105,19 +104,5 @@ def transform(

if report is not None:
report.log_new_column(column_name)
# sections = []
# low = -np.inf
# high = np.inf

# for i, threshold in enumerate(self.thresholds + [high]):
# if self.label_order == "descending":
# i = len(self.thresholds) - i
# pct = 100 * sum(X == i) / len(X)
# sections.append(
# {"label": f"{column_name} = {i}: {pct:.1f} %", "start": low, "end": threshold, "pct": pct}
# )
# low = threshold
# fig = detailed_distributions_plots(data=dataset[self.input_column], label_name=self.input_column, sections=sections)
# report.log_image(fig, title="Data class distribution")

return dataset
3 changes: 1 addition & 2 deletions auroris/curation/actions/_distribution.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from typing import Dict, List, Optional
import pandas as pd
from pydantic import Field, PrivateAttr
from pydantic import Field
import numpy as np

from auroris.curation.actions._base import BaseAction
from auroris.report import CurationReport
from auroris.types import VerbosityLevel
from auroris.curation.actions._discretize import Discretization
from auroris.visualization import detailed_distributions_plots


Expand Down
7 changes: 0 additions & 7 deletions auroris/report/broadcaster/_html.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
import io
import fsspec
import base64
from typing import Optional
from PIL import Image as PILImage
from PIL.Image import Image as ImageType
from IPython.core.display import Image as IPy_Image

import datamol as dm
from auroris.report import CurationReport, Section
Expand Down Expand Up @@ -121,9 +117,6 @@ def on_report_start(self, report: CurationReport):
<p>Version: {report.auroris_version}</p>
</header>"""
)
# self._file.write("<h1>Curation Report</h1>")
# self._file.write(f"<p>Time: {report.time_stamp.strftime('%Y-%m-%d %H:%M:%S')}</p>")
# self._file.write(f"<p>Version: {report.auroris_version}</p>")

def on_section_start(self, section: Section):
self._file.write(f"<h2>{section.title}</h2>")
Expand Down
1 change: 0 additions & 1 deletion auroris/types.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from enum import IntEnum
from PIL.Image import Image


class VerbosityLevel(IntEnum):
Expand Down
1 change: 0 additions & 1 deletion auroris/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from PIL import Image
from PIL.Image import Image as ImageType
from sklearn.utils.multiclass import type_of_target
import matplotlib.pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg
from io import BytesIO

Expand Down
140 changes: 3 additions & 137 deletions auroris/visualization/_distribution.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
from typing import Callable, Dict, List, Optional, Tuple
from typing import List, Optional

import numpy as np
import pandas as pd
import seaborn as sns
from loguru import logger
from scipy import stats
import matplotlib.pyplot as plt

from auroris.visualization.utils import create_figure

import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np


def detailed_distributions_plots(
data: pd.DataFrame, label_name: str, sections: Optional[List[dict]] = None, log_scale: bool = False
Expand All @@ -33,7 +30,7 @@ def detailed_distributions_plots(
logger.exception(e)
if log_scale:
logger.exception(
f"The current error is likely due to the `log_scale` was enabled. Please disable the `log_scale` and try again."
"The current error is likely due to the `log_scale` was enabled. Please disable the `log_scale` and try again."
)

# Fill the sections under the KDE curve
Expand All @@ -51,137 +48,6 @@ def detailed_distributions_plots(
return fig.figure


# def detailed_distributions_plots(
# df: pd.DataFrame,
# thresholds: Optional[Dict[str, Tuple[int, Callable]]] = None,
# discretizer: Optional[callable] = None,
# label_names: List[str] = None,
# log_scale_mapping: Dict[str, bool] = None,
# positive_color: str = "#3db371",
# negative_color: str = "#a9a9a9",
# n_cols: int = 3,
# fig_base_size: float = 8,
# w_h_ratio: float = 0.5,
# legend_fontsize: int = 18,
# ticks_fontsize: int = 18,
# title_fontsize: int = 18,
# gridsize: int = 1000,
# dpi: int = 150,
# seaborn_theme: Optional[str] = "whitegrid",
# ):
# """Plot the detailed distribution of the columns in `df`. Also, color the part of the
# "positive" distribution using `thresholds`.

# Args:
# df: A dataframe with binarized readouts only. NaN are allowed.
# thresholds: A dict mapping of the `df` column. Value is a tuple where the first
# element is the threshold value and the second element is a callable deciding wether
# a datapoint meets the criterai or not (something like `np.less` or np.greater`).
# label_names: Name of the labels (same order as the columns in `df`). If not set
# the name of the columns are used.
# log_scale_mapping: A dict mapping of the `df` column. If True,
# the plot for this readout will be log scaled.
# positive_color: Color for `True` or `1`.
# negative_color: Color for `False` or `0`.
# n_cols: Number of columns in the subplots.
# fig_base_size: Base size of the plots.
# w_h_ratio: Width/height ratio.
# legend_fontsize: Font size of the legend.
# ticks_fontsize: Font size of the x ticks and x label.
# title_fontsize: Font size of the title.
# gridsize: Gridsize for the kernel density estimate (KDE).
# dpi: DPI value of the figure.
# seaborn_theme: Seaborn theme.
# """

# # NOTE: the `thresholds` API is not super nice, consider an alternative.
# # NOTE: we could eventually add support for multiclass here if we need it.
# if thresholds is None:
# thresholds = {}

# if log_scale_mapping is None:
# log_scale_mapping = {}

# if label_names is None:
# label_names = df.columns.tolist()

# # Check all columns are numeric
# numerics = df.apply(lambda x: x.dtype.kind in "biufc")
# if not numerics.all():
# raise ValueError(f"Not all columns are numeric: {numerics[~numerics].to_dict()}")

# n_plots = len(df.columns)

# # Create the figure
# with create_figure(
# n_plots=n_plots,
# n_cols=n_cols,
# dpi=dpi,
# fig_base_size=fig_base_size,
# w_h_ratio=w_h_ratio,
# seaborn_theme=seaborn_theme,
# ) as (fig, axes):
# for ax, readout, label_name in zip(axes, df.columns, label_names):
# values = df[readout].dropna()

# # Get threshold value and function
# threshold_value, threshold_fn = None, None
# threshold = thresholds.get(readout, None)
# if threshold is not None:
# threshold_value, threshold_fn = threshold

# # Whether to log scale
# log_scale = log_scale_mapping.get(readout, False)

# # Draw distribution and kde plot
# kde_kws = {}
# kde_kws["clip"] = values.min(), values.max()
# kde_kws["gridsize"] = gridsize
# kplot = sns.histplot(
# values,
# kde=True,
# ax=ax,
# color=negative_color,
# kde_kws=kde_kws,
# log_scale=log_scale,
# )

# # Label
# ax.set_title(label_name, fontsize=title_fontsize)
# ax.set_xlabel(None)
# ax.set_ylabel("Count", fontsize=ticks_fontsize)

# ax.xaxis.set_tick_params(labelsize=ticks_fontsize)
# ax.yaxis.set_tick_params(labelsize=ticks_fontsize)

# if threshold_value is not None and threshold_fn is not None:
# # Fill between on active values
# x, y = kplot.get_lines()[0].get_data()
# ax.fill_between(
# x,
# y,
# where=threshold_fn(x, threshold_value),
# facecolor=positive_color,
# alpha=0.8,
# )

# # Active ratio text box
# positive_ratio = threshold_fn(values, threshold_value).sum() / len(values) * 100
# ax.text(
# 0.85,
# 0.95,
# f"{positive_ratio:.1f} %",
# transform=ax.transAxes,
# fontsize=legend_fontsize,
# verticalalignment="top",
# bbox=dict(boxstyle="round", facecolor="wheat", alpha=0.5),
# )
# else:
# logger.warning(f"Threshold not available for readout '{readout}'")

# return fig


def visualize_distribution_with_outliers(
values: np.ndarray,
is_outlier: Optional[List[bool]] = None,
Expand Down

0 comments on commit e991de1

Please sign in to comment.