From b15d744bc446cdea0bdd3995295c68afa7eb1e6a Mon Sep 17 00:00:00 2001 From: d33bs Date: Tue, 23 Jul 2024 16:10:35 -0600 Subject: [PATCH] linting --- .pre-commit-config.yaml | 3 +- .../create_image_and_parquet_data.py | 75 ------------------- .../create_mask_data.py | 4 +- 3 files changed, 4 insertions(+), 78 deletions(-) delete mode 100644 tests/data/cytotable/NF1_cellpainting_data_shrunken/create_image_and_parquet_data.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3f5f266..f2b1cb5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,7 +23,8 @@ repos: (?x)^( .*\.lock | .*\.json | - .*\.ipynb + .*\.ipynb | + .*\.cppipe )$ - repo: https://github.com/executablebooks/mdformat rev: 0.7.17 diff --git a/tests/data/cytotable/NF1_cellpainting_data_shrunken/create_image_and_parquet_data.py b/tests/data/cytotable/NF1_cellpainting_data_shrunken/create_image_and_parquet_data.py deleted file mode 100644 index 9451a4f..0000000 --- a/tests/data/cytotable/NF1_cellpainting_data_shrunken/create_image_and_parquet_data.py +++ /dev/null @@ -1,75 +0,0 @@ -""" -Creates and shrunken dataset for testing puproses -based on coSMicQC/tests/data/cytotable/NF1_cellpainting_data (Plate 2) - -This file may be processed using the following command from the root -of the project repository: -`poetry run python \ -tests/data/cytotable/NF1_cellpainting_data_shrunken/create_image_and_parquet_data.py` -""" - - -import pathlib -import shutil - -import pandas as pd - -source_data_path = "tests/data/cytotable/NF1_cellpainting_data/" -target_data_path = "tests/data/cytotable/NF1_cellpainting_data_shrunken/" -source_image_data_path = source_data_path + "Plate_2_images" -source_parquet_path = source_data_path + "Plate_2_with_image_data.parquet" -target_image_data_path = target_data_path + "Plate_2_images" -target_parquet_path = target_data_path + "Plate_2_with_image_data_shrunken.parquet" - -# create target image dir -pathlib.Path(target_image_data_path).mkdir(exist_ok=True) - -# read source data -source_df = pd.read_parquet(source_parquet_path) - -# get a sample of 5 from the source data -sampled_df = source_df.sample(n=5) - -# send the sampled df to parquet file -sampled_df.to_parquet(target_parquet_path) - - -def check_and_copy_file(filename: str): - """ - Checks for files in target dir and copies them if they don't already exist - """ - source_path = f"{source_image_data_path}/{filename}" - target_path = f"{target_image_data_path}/{filename}" - - # Check if the file already exists in the target directory - if not pathlib.Path(target_path).is_file(): - # Copy the file if it doesn't exist - shutil.copy(source_path, target_path) - return f"Copied {filename} to {target_image_data_path}" - else: - return f"{filename} already exists in {target_image_data_path}" - - -# apply the file copy and collect status information -sampled_df["Image_FileName_DAPI_status"] = sampled_df["Image_FileName_DAPI"].apply( - check_and_copy_file -) -sampled_df["Image_FileName_GFP_status"] = sampled_df["Image_FileName_GFP"].apply( - check_and_copy_file -) -sampled_df["Image_FileName_RFP_status"] = sampled_df["Image_FileName_RFP"].apply( - check_and_copy_file -) - -sampled_df - -# show the results using a mask on the dataframe for status -print( - sampled_df[ - [ - "Image_FileName_DAPI_status", - "Image_FileName_GFP_status", - "Image_FileName_RFP_status", - ] - ] -) diff --git a/tests/data/cytotable/NF1_cellpainting_data_shrunken/create_mask_data.py b/tests/data/cytotable/NF1_cellpainting_data_shrunken/create_mask_data.py index f16eee3..d360508 100644 --- a/tests/data/cytotable/NF1_cellpainting_data_shrunken/create_mask_data.py +++ b/tests/data/cytotable/NF1_cellpainting_data_shrunken/create_mask_data.py @@ -10,9 +10,9 @@ tests/data/cytotable/NF1_cellpainting_data_shrunken/create_mask_data.py` """ -import subprocess -import pathlib import os +import pathlib +import subprocess # create a dir for segmentation masks pathlib.Path("tests/data/cytotable/NF1_cellpainting_data_shrunken/Plate_2_masks").mkdir(