Skip to content

Commit

Permalink
add find_outliers percentage of total (#81)
Browse files Browse the repository at this point in the history
  • Loading branch information
d33bs authored Sep 3, 2024
1 parent 2d66b5a commit 6122320
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 199 deletions.
218 changes: 23 additions & 195 deletions docs/src/examples/cosmicqc_in_a_nutshell.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions docs/src/examples/cosmicqc_in_a_nutshell.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@
# set a path for the parquet-based dataset
# (in this case, CellProfiler SQLite data processed by CytoTable)
data_path = (
"../../tests/data/cytotable/NF1_cellpainting_data/Plate_2_with_image_data.parquet"
"../../../tests/data/cytotable/NF1_cellpainting_data/"
"Plate_2_with_image_data.parquet"
)

# set a context directory for images associated with the dataset
Expand Down Expand Up @@ -76,7 +77,6 @@
# show histogram reports on the outliers and inliers
# for each threshold set in the new columns
labeled_scdf.show_report()

# show cropped images through CytoDataFrame from the dataset to help analyze outliers
labeled_scdf.sort_values(by="cqc.large_nuclei.is_outlier", ascending=False)[
[
Expand Down
6 changes: 5 additions & 1 deletion src/cosmicqc/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,11 @@ def find_outliers(
]

# Print outliers count and range for each feature
print("Number of outliers:", outliers_df.shape[0])
print(
"Number of outliers:",
outliers_df.shape[0],
f"({'{:.2f}'.format((outliers_df.shape[0] / df.shape[0])*100)}%)",
)
print("Outliers Range:")
for feature in feature_thresholds:
print(f"{feature} Min:", outliers_df[feature].min())
Expand Down
2 changes: 1 addition & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def test_cli_find_outliers(tmp_path: pathlib.Path, basic_outlier_csv: str):
assert returncode == 0
assert (
stdout.strip()
== """Number of outliers: 2
== """Number of outliers: 2 (20.00%)
Outliers Range:
example_feature Min: 9
example_feature Max: 10
Expand Down

0 comments on commit 6122320

Please sign in to comment.