diff --git a/.cruft.json b/.cruft.json
index 38c173e..0f6f474 100644
--- a/.cruft.json
+++ b/.cruft.json
@@ -1,7 +1,7 @@
 {
   "template": "https://github.com/scverse/cookiecutter-scverse",
-  "commit": "8e96abb5c3e2d5078c44713958da672711cf2a48",
-  "checkout": null,
+  "commit": "87a407a65408d75a949c0b54b19fd287475a56f8",
+  "checkout": "v0.4.0",
   "context": {
     "cookiecutter": {
       "project_name": "PopV",
@@ -13,7 +13,8 @@
       "project_repo": "https://github.com/YosefLab/PopV.git",
       "license": "MIT License",
       "_copy_without_render": [
-        ".github/workflows/**.yaml",
+        ".github/workflows/build.yaml",
+        ".github/workflows/test.yaml",
         "docs/_templates/autosummary/**.rst"
       ],
       "_render_devdocs": false,
diff --git a/.gitignore b/.gitignore
index de87789..c0f512d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,6 +19,7 @@ __pycache__/
 /.pytest_cache/
 /.cache/
 /data/
+/node_modules/
 
 # docs
 /docs/generated/
diff --git a/.pre-commit-config.yaml.rej b/.pre-commit-config.yaml.rej
new file mode 100644
index 0000000..5f9ac18
--- /dev/null
+++ b/.pre-commit-config.yaml.rej
@@ -0,0 +1,34 @@
+diff a/.pre-commit-config.yaml b/.pre-commit-config.yaml	(rejected hunks)
+@@ -6,29 +6,18 @@ default_stages:
+   - push
+ minimum_pre_commit_version: 2.16.0
+ repos:
+-  - repo: https://github.com/psf/black
+-    rev: "24.4.2"
+-    hooks:
+-      - id: black
+-  - repo: https://github.com/asottile/blacken-docs
+-    rev: 1.16.0
+-    hooks:
+-      - id: blacken-docs
+   - repo: https://github.com/pre-commit/mirrors-prettier
+     rev: v4.0.0-alpha.8
+     hooks:
+       - id: prettier
+-        # Newer versions of node don't work on systems that have an older version of GLIBC
+-        # (in particular Ubuntu 18.04 and Centos 7)
+-        # EOL of Centos 7 is in 2024-06, we can probably get rid of this then.
+-        # See https://github.com/scverse/cookiecutter-scverse/issues/143 and
+-        # https://github.com/jupyterlab/jupyterlab/issues/12675
+-        language_version: "17.9.1"
+   - repo: https://github.com/astral-sh/ruff-pre-commit
+     rev: v0.4.4
+     hooks:
+       - id: ruff
++        types_or: [python, pyi, jupyter]
+         args: [--fix, --exit-non-zero-on-fix]
++      - id: ruff-format
++        types_or: [python, pyi, jupyter]
+   - repo: https://github.com/pre-commit/pre-commit-hooks
+     rev: v4.6.0
+     hooks:
diff --git a/README.md.rej b/README.md.rej
new file mode 100644
index 0000000..9d5babc
--- /dev/null
+++ b/README.md.rej
@@ -0,0 +1,10 @@
+diff a/README.md b/README.md	(rejected hunks)
+@@ -17,7 +17,7 @@ Please refer to the [documentation][link-docs]. In particular, the
+
+ ## Installation
+
+-You need to have Python 3.9 or newer installed on your system. If you don't have
++You need to have Python 3.10 or newer installed on your system. If you don't have
+ Python installed, we recommend installing [Mambaforge](https://github.com/conda-forge/miniforge#mambaforge).
+
+ There are several alternative options to install PopV:
diff --git a/docs/conf.py b/docs/conf.py
index 8ba3576..c57a743 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,5 +1,5 @@
 # Configuration file for the Sphinx documentation builder.
-#
+
 # This file only contains a selection of the most common options. For a full
 # list see the documentation:
 # https://www.sphinx-doc.org/en/master/usage/configuration.html
@@ -45,10 +45,10 @@
 
 html_context = {
     "display_github": True,  # Integrate GitHub
-    "github_user": "cane11",  # Username
-    "github_repo": project_name,  # Repo name
-    "github_version": "main",  # Version
-    "conf_py_path": "/docs/",  # Path in the checkout to the docs root
+    "github_user": "cane11",
+    "github_repo": "https://github.com/YosefLab/PopV.git",
+    "github_version": "main",
+    "conf_py_path": "/docs/",
 }
 
 # -- General configuration ---------------------------------------------------
diff --git a/docs/contributing.md b/docs/contributing.md
index 0f210e3..9fc2252 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -51,7 +51,7 @@ and [prettier][prettier-editors].
 ## Writing tests
 
 ```{note}
-Remember to first install the package with `pip install '-e[dev,test]'`
+Remember to first install the package with `pip install -e '.[dev,test]'`
 ```
 
 This package uses the [pytest][] for automated testing. Please [write tests][scanpy-test-docs] for every function added
@@ -93,7 +93,7 @@ Before making a release, you need to update the version number in the `pyproject
 > Additional labels for pre-release and build metadata are available as extensions to the MAJOR.MINOR.PATCH format.
 
 Once you are done, commit and push your changes and navigate to the "Releases" page of this project on GitHub.
-Specify `vX.X.X` as a tag name and create a release. For more information, see [managing Github releases][]. This will automatically create a git tag and trigger a Github workflow that creates a release on PyPI.
+Specify `vX.X.X` as a tag name and create a release. For more information, see [managing GitHub releases][]. This will automatically create a git tag and trigger a Github workflow that creates a release on PyPI.
 
 ## Writing documentation
 
@@ -157,3 +157,4 @@ open _build/html/index.html
 [numpydoc]: https://numpydoc.readthedocs.io/en/latest/format.html
 [sphinx autodoc typehints]: https://github.com/tox-dev/sphinx-autodoc-typehints
 [pypi]: https://pypi.org/
+[managing GitHub releases]: https://docs.github.com/en/repositories/releasing-projects-on-github/managing-releases-in-a-repository
diff --git a/popv/_settings.py b/popv/_settings.py
index 06bd242..b587517 100644
--- a/popv/_settings.py
+++ b/popv/_settings.py
@@ -101,7 +101,9 @@ def verbosity(self, level: str | int):
             console = Console(force_terminal=True)
             if console.is_jupyter is True:
                 console.is_jupyter = False
-            ch = RichHandler(level=level, show_path=False, console=console, show_time=False)
+            ch = RichHandler(
+                level=level, show_path=False, console=console, show_time=False
+            )
             formatter = logging.Formatter("%(message)s")
             ch.setFormatter(formatter)
             popv_logger.addHandler(ch)
diff --git a/popv/_utils.py b/popv/_utils.py
index c6fa62c..98ac74a 100644
--- a/popv/_utils.py
+++ b/popv/_utils.py
@@ -49,7 +49,9 @@ def subsample_dataset(
         if labels_counts[label] < n_samples_per_label:
             sample_idx.append(label_locs)
         else:
-            label_subset = np.random.choice(label_locs, n_samples_per_label, replace=False)
+            label_subset = np.random.choice(
+                label_locs, n_samples_per_label, replace=False
+            )
             sample_idx.append(label_subset)
     sample_idx = np.concatenate(sample_idx)
     return adata.obs_names[sample_idx]
@@ -79,7 +81,9 @@ def check_genes_is_subset(ref_genes, query_genes):
         logging.info("All ref genes are in query dataset. Can use pretrained models.")
         is_subset = True
     else:
-        logging.info("Not all reference genes are in query dataset. Set 'prediction_mode' to 'retrain'.")
+        logging.info(
+            "Not all reference genes are in query dataset. Set 'prediction_mode' to 'retrain'."
+        )
         is_subset = False
     return is_subset
 
@@ -95,7 +99,9 @@ def make_batch_covariate(adata, batch_keys, new_batch_key):
     batch_keys
         List of keys in adat.obs corresponding to batches
     """
-    adata.obs[new_batch_key] = adata.obs[batch_keys].astype(str).sum(1).astype("category")
+    adata.obs[new_batch_key] = (
+        adata.obs[batch_keys].astype(str).sum(1).astype("category")
+    )
 
 
 def calculate_depths(g):
@@ -142,7 +148,9 @@ def make_ontology_dag(obofile, lowercase=False):
     """
     co = obonet.read_obo(obofile, encoding="utf-8")
     id_to_name = {id_: data.get("name") for id_, data in co.nodes(data=True)}
-    name_to_id = {data["name"]: id_ for id_, data in co.nodes(data=True) if ("name" in data)}
+    name_to_id = {
+        data["name"]: id_ for id_, data in co.nodes(data=True) if ("name" in data)
+    }
 
     # get all node ids that are celltypes (start with CL)
     cl_ids = {id_: True for _, id_ in name_to_id.items() if id_.startswith("CL:")}
@@ -160,7 +168,11 @@ def make_ontology_dag(obofile, lowercase=False):
     for node in co.nodes():
         if node in cl_ids:
             for child, parent, key in co.out_edges(node, keys=True):
-                if child.startswith("CL:") and parent.startswith("CL:") and key == "is_a":
+                if (
+                    child.startswith("CL:")
+                    and parent.startswith("CL:")
+                    and key == "is_a"
+                ):
                     childname = id_to_name[child]
                     parentname = id_to_name[parent]
                     g.add_edge(childname, parentname, key=key)
diff --git a/popv/algorithms/_bbknn.py b/popv/algorithms/_bbknn.py
index 422fac8..aa6b090 100644
--- a/popv/algorithms/_bbknn.py
+++ b/popv/algorithms/_bbknn.py
@@ -86,7 +86,9 @@ def predict(self, adata):
             ]
         )
         if smallest_neighbor_graph < 15:
-            logging.warning(f"BBKNN found only {smallest_neighbor_graph} neighbors. Reduced neighbors in KNN.")
+            logging.warning(
+                f"BBKNN found only {smallest_neighbor_graph} neighbors. Reduced neighbors in KNN."
+            )
             self.classifier_dict["n_neighbors"] = smallest_neighbor_graph
 
         knn = KNeighborsClassifier(metric="precomputed", **self.classifier_dict)
@@ -95,9 +97,15 @@ def predict(self, adata):
         adata.obs[self.result_key] = knn.predict(test_distances)
 
         if adata.uns["_return_probabilities"]:
-            adata.obs[self.result_key + "_probabilities"] = np.max(knn.predict_proba(test_distances), axis=1)
+            adata.obs[self.result_key + "_probabilities"] = np.max(
+                knn.predict_proba(test_distances), axis=1
+            )
 
     def compute_embedding(self, adata):
         if adata.uns["_compute_embedding"]:
-            logging.info(f'Saving UMAP of bbknn results to adata.obs["{self.embedding_key}"]')
-            adata.obsm[self.embedding_key] = sc.tl.umap(adata, copy=True, **self.embedding_dict).obsm["X_umap"]
+            logging.info(
+                f'Saving UMAP of bbknn results to adata.obs["{self.embedding_key}"]'
+            )
+            adata.obsm[self.embedding_key] = sc.tl.umap(
+                adata, copy=True, **self.embedding_dict
+            ).obsm["X_umap"]
diff --git a/popv/algorithms/_celltypist.py b/popv/algorithms/_celltypist.py
index 295577a..da3258b 100644
--- a/popv/algorithms/_celltypist.py
+++ b/popv/algorithms/_celltypist.py
@@ -63,12 +63,16 @@ def predict(self, adata):
             **self.classifier_dict,
         )
         out_column = (
-            "majority_voting" if "majority_voting" in predictions.predicted_labels.columns else "predicted_labels"
+            "majority_voting"
+            if "majority_voting" in predictions.predicted_labels.columns
+            else "predicted_labels"
         )
 
         adata.obs[self.result_key] = predictions.predicted_labels[out_column]
         if adata.uns["_return_probabilities"]:
-            adata.obs[self.result_key + "_probabilities"] = predictions.probability_matrix.max(axis=1).values
+            adata.obs[self.result_key + "_probabilities"] = (
+                predictions.probability_matrix.max(axis=1).values
+            )
 
     def compute_embedding(self, adata):
         pass
diff --git a/popv/algorithms/_harmony.py b/popv/algorithms/_harmony.py
index 15d1857..cb56136 100644
--- a/popv/algorithms/_harmony.py
+++ b/popv/algorithms/_harmony.py
@@ -61,7 +61,9 @@ def __init__(
     def compute_integration(self, adata):
         logging.info("Integrating data with harmony")
 
-        adata.obsm["X_pca_harmony"] = harmonize(adata.obsm["X_pca"], adata.obs, batch_key=self.batch_key)
+        adata.obsm["X_pca_harmony"] = harmonize(
+            adata.obsm["X_pca"], adata.obs, batch_key=self.batch_key
+        )
 
     def predict(self, adata, result_key="popv_knn_on_harmony_prediction"):
         logging.info(f'Saving knn on harmony results to adata.obs["{result_key}"]')
@@ -75,7 +77,9 @@ def predict(self, adata, result_key="popv_knn_on_harmony_prediction"):
                 n_neighbors=self.classifier_dict["n_neighbors"],
                 parallel_batch_queries=True,
             ),
-            KNeighborsClassifier(metric="precomputed", weights=self.classifier_dict["weights"]),
+            KNeighborsClassifier(
+                metric="precomputed", weights=self.classifier_dict["weights"]
+            ),
         )
 
         knn.fit(train_X, train_Y)
@@ -91,6 +95,10 @@ def predict(self, adata, result_key="popv_knn_on_harmony_prediction"):
 
     def compute_embedding(self, adata):
         if adata.uns["_compute_embedding"]:
-            logging.info(f'Saving UMAP of harmony results to adata.obs["{self.embedding_key}"]')
+            logging.info(
+                f'Saving UMAP of harmony results to adata.obs["{self.embedding_key}"]'
+            )
             sc.pp.neighbors(adata, use_rep="X_pca_harmony")
-            adata.obsm[self.embedding_key] = sc.tl.umap(adata, copy=True, **self.embedding_dict).obsm["X_umap"]
+            adata.obsm[self.embedding_key] = sc.tl.umap(
+                adata, copy=True, **self.embedding_dict
+            ).obsm["X_umap"]
diff --git a/popv/algorithms/_onclass.py b/popv/algorithms/_onclass.py
index 259cc43..6ffa1bf 100644
--- a/popv/algorithms/_onclass.py
+++ b/popv/algorithms/_onclass.py
@@ -106,10 +106,12 @@ def compute_integration(self, adata):
         pass
 
     def predict(self, adata):
-        logging.info(f'Computing Onclass. Storing prediction in adata.obs["{self.result_key}"]')
-        adata.obs.loc[adata.obs["_dataset"] == "query", self.cell_ontology_obs_key] = adata.uns[
-            "unknown_celltype_label"
-        ]
+        logging.info(
+            f'Computing Onclass. Storing prediction in adata.obs["{self.result_key}"]'
+        )
+        adata.obs.loc[adata.obs["_dataset"] == "query", self.cell_ontology_obs_key] = (
+            adata.uns["unknown_celltype_label"]
+        )
 
         train_idx = adata.obs["_dataset"] == "ref"
 
@@ -127,10 +129,14 @@ def predict(self, adata):
         cl_ontology_file = adata.uns["_cl_ontology_file"]
         nlp_emb_file = adata.uns["_nlp_emb_file"]
 
-        celltype_dict, clid_2_name = self.make_celltype_to_cell_ontology_id_dict(cl_obo_file)
+        celltype_dict, clid_2_name = self.make_celltype_to_cell_ontology_id_dict(
+            cl_obo_file
+        )
         self.make_cell_ontology_id(adata, celltype_dict, self.cell_ontology_obs_key)
 
-        train_model = OnClassModel(cell_type_nlp_emb_file=nlp_emb_file, cell_type_network_file=cl_ontology_file)
+        train_model = OnClassModel(
+            cell_type_nlp_emb_file=nlp_emb_file, cell_type_network_file=cl_ontology_file
+        )
 
         if adata.uns["_save_path_trained_models"] is not None:
             model_path = adata.uns["_save_path_trained_models"] + "/OnClass"
@@ -175,13 +181,17 @@ def predict(self, adata):
         )
 
         if adata.uns["_prediction_mode"] == "fast":
-            onclass_seen = np.argmax(train_model.model.predict(corr_test_feature), axis=1)
+            onclass_seen = np.argmax(
+                train_model.model.predict(corr_test_feature), axis=1
+            )
             pred_label = [train_model.i2co[ind] for ind in onclass_seen]
             pred_label_str = [clid_2_name[ind] for ind in pred_label]
             adata.obs[self.result_key] = pred_label_str
             adata.obs[self.seen_result_key] = pred_label_str
         else:
-            onclass_pred = train_model.Predict(corr_test_feature, use_normalize=False, refine=True, unseen_ratio=-1.0)
+            onclass_pred = train_model.Predict(
+                corr_test_feature, use_normalize=False, refine=True, unseen_ratio=-1.0
+            )
             pred_label = [train_model.i2co[ind] for ind in onclass_pred[2]]
             pred_label_str = [clid_2_name[ind] for ind in pred_label]
             adata.obs[self.result_key] = pred_label_str
@@ -192,9 +202,15 @@ def predict(self, adata):
             adata.obs[self.seen_result_key] = pred_label_str
 
             if adata.uns["_return_probabilities"]:
-                adata.obs[self.result_key + "_probabilities"] = np.max(onclass_pred[1], axis=1) / onclass_pred[1].sum(1)
-                adata.obsm["onclass_probabilities"] = onclass_pred[1] / onclass_pred[1].sum(1, keepdims=True)
-                adata.obs["popv_onclass_seen" + "_probabilities"] = np.max(onclass_pred[0], axis=1)
+                adata.obs[self.result_key + "_probabilities"] = np.max(
+                    onclass_pred[1], axis=1
+                ) / onclass_pred[1].sum(1)
+                adata.obsm["onclass_probabilities"] = onclass_pred[1] / onclass_pred[
+                    1
+                ].sum(1, keepdims=True)
+                adata.obs["popv_onclass_seen" + "_probabilities"] = np.max(
+                    onclass_pred[0], axis=1
+                )
 
     def compute_embedding(self, adata):
         return None
diff --git a/popv/algorithms/_rf.py b/popv/algorithms/_rf.py
index 612731f..ca88ac7 100644
--- a/popv/algorithms/_rf.py
+++ b/popv/algorithms/_rf.py
@@ -48,13 +48,19 @@ def compute_integration(self, adata):
         pass
 
     def predict(self, adata):
-        logging.info(f'Computing random forest classifier. Storing prediction in adata.obs["{self.result_key}"]')
+        logging.info(
+            f'Computing random forest classifier. Storing prediction in adata.obs["{self.result_key}"]'
+        )
 
         test_x = adata.layers[self.layers_key] if self.layers_key else adata.X
 
         if adata.uns["_prediction_mode"] == "retrain":
             train_idx = adata.obs["_ref_subsample"]
-            train_x = adata[train_idx].layers[self.layers_key] if self.layers_key else adata[train_idx].X
+            train_x = (
+                adata[train_idx].layers[self.layers_key]
+                if self.layers_key
+                else adata[train_idx].X
+            )
             train_y = adata[train_idx].obs[self.labels_key].to_numpy()
             rf = RandomForestClassifier(**self.classifier_dict)
             rf.fit(train_x, train_y)
@@ -67,10 +73,14 @@ def predict(self, adata):
                     ),
                 )
         else:
-            rf = pickle.load(open(adata.uns["_save_path_trained_models"] + "rf_classifier.pkl", "rb"))
+            rf = pickle.load(
+                open(adata.uns["_save_path_trained_models"] + "rf_classifier.pkl", "rb")
+            )
         adata.obs[self.result_key] = rf.predict(test_x)
         if adata.uns["_return_probabilities"]:
-            adata.obs[self.result_key + "_probabilities"] = np.max(rf.predict_proba(test_x), axis=1)
+            adata.obs[self.result_key + "_probabilities"] = np.max(
+                rf.predict_proba(test_x), axis=1
+            )
 
     def compute_embedding(self, adata):
         pass
diff --git a/popv/algorithms/_scaffold_algorithm.py b/popv/algorithms/_scaffold_algorithm.py
index 5f90dc7..477c9e6 100644
--- a/popv/algorithms/_scaffold_algorithm.py
+++ b/popv/algorithms/_scaffold_algorithm.py
@@ -69,12 +69,16 @@ def compute_integration(self, adata):
         # adata.obsm["X_new_method"] = embedded_data
 
     def predict(self, adata):
-        logging.info(f'Computing new classifier method. Storing prediction in adata.obs["{self.result_key}"]')
+        logging.info(
+            f'Computing new classifier method. Storing prediction in adata.obs["{self.result_key}"]'
+        )
         # adata.obs[self.result_key] = classifier_results
 
     def compute_embedding(self, adata):
         if adata.uns["_compute_embedding"]:
-            logging.info(f'Saving UMAP of new integration method to adata.obs["{self.embedding_key}"]')
+            logging.info(
+                f'Saving UMAP of new integration method to adata.obs["{self.embedding_key}"]'
+            )
             # sc.pp.neighbors(adata, use_rep="embedding_space")
             # adata.obsm[self.embedding_key] = sc.tl.umap(
             #     adata, copy=True, **self.embedding_dict
diff --git a/popv/algorithms/_scanorama.py b/popv/algorithms/_scanorama.py
index f403156..afd6f51 100644
--- a/popv/algorithms/_scanorama.py
+++ b/popv/algorithms/_scanorama.py
@@ -62,7 +62,10 @@ def __init__(
     def compute_integration(self, adata):
         logging.info("Integrating data with scanorama")
 
-        _adatas = [adata[adata.obs[self.batch_key] == i] for i in np.unique(adata.obs[self.batch_key])]
+        _adatas = [
+            adata[adata.obs[self.batch_key] == i]
+            for i in np.unique(adata.obs[self.batch_key])
+        ]
         scanorama.integrate_scanpy(_adatas, **self.method_dict)
         tmp_adata = anndata.concat(_adatas)
         adata.obsm["X_scanorama"] = tmp_adata[adata.obs_names].obsm["X_scanorama"]
@@ -79,7 +82,9 @@ def predict(self, adata, result_key="popv_knn_on_scanorama_prediction"):
                 n_neighbors=self.classifier_dict["n_neighbors"],
                 parallel_batch_queries=True,
             ),
-            KNeighborsClassifier(metric="precomputed", weights=self.classifier_dict["weights"]),
+            KNeighborsClassifier(
+                metric="precomputed", weights=self.classifier_dict["weights"]
+            ),
         )
 
         knn.fit(train_X, train_Y)
@@ -89,10 +94,16 @@ def predict(self, adata, result_key="popv_knn_on_scanorama_prediction"):
         adata.obs[result_key] = knn_pred
 
         if adata.uns["_return_probabilities"]:
-            adata.obs[self.result_key + "_probabilities"] = np.max(knn.predict_proba(adata.obsm["X_scanorama"]), axis=1)
+            adata.obs[self.result_key + "_probabilities"] = np.max(
+                knn.predict_proba(adata.obsm["X_scanorama"]), axis=1
+            )
 
     def compute_embedding(self, adata):
         if adata.uns["_compute_embedding"]:
-            logging.info(f'Saving UMAP of scanorama results to adata.obs["{self.embedding_key}"]')
+            logging.info(
+                f'Saving UMAP of scanorama results to adata.obs["{self.embedding_key}"]'
+            )
             sc.pp.neighbors(adata, use_rep="X_scanorama")
-            adata.obsm[self.embedding_key] = sc.tl.umap(adata, copy=True, **self.embedding_dict).obsm["X_umap"]
+            adata.obsm[self.embedding_key] = sc.tl.umap(
+                adata, copy=True, **self.embedding_dict
+            ).obsm["X_umap"]
diff --git a/popv/algorithms/_scanvi.py b/popv/algorithms/_scanvi.py
index 804888a..6faf11b 100644
--- a/popv/algorithms/_scanvi.py
+++ b/popv/algorithms/_scanvi.py
@@ -84,9 +84,13 @@ def compute_integration(self, adata):
         if "subsampled_labels" not in adata.obs.columns:
             adata.obs["subsampled_labels"] = [
                 label if subsampled else adata.uns["unknown_celltype_label"]
-                for label, subsampled in zip(adata.obs["_labels_annotation"], adata.obs["_ref_subsample"])
+                for label, subsampled in zip(
+                    adata.obs["_labels_annotation"], adata.obs["_ref_subsample"]
+                )
             ]
-        adata.obs["subsampled_labels"] = adata.obs["subsampled_labels"].astype("category")
+        adata.obs["subsampled_labels"] = adata.obs["subsampled_labels"].astype(
+            "category"
+        )
         yprior = torch.tensor(
             [
                 adata.obs["_labels_annotation"].value_counts()[i] / adata.n_obs
@@ -96,11 +100,15 @@ def compute_integration(self, adata):
         )
 
         if self.n_epochs_unsupervised is None:
-            self.n_epochs_unsupervised = round(min(round((10000 / adata.n_obs) * 200), 200))
+            self.n_epochs_unsupervised = round(
+                min(round((10000 / adata.n_obs) * 200), 200)
+            )
 
         if adata.uns["_prediction_mode"] == "retrain":
             if adata.uns["_pretrained_scvi_path"] is not None:
-                scvi_model = scvi.model.SCVI.load(adata.uns["_save_path_trained_models"] + "/scvi", adata=adata)
+                scvi_model = scvi.model.SCVI.load(
+                    adata.uns["_save_path_trained_models"] + "/scvi", adata=adata
+                )
             else:
                 scvi.model.SCVI.setup_anndata(
                     adata,
@@ -164,15 +172,23 @@ def compute_integration(self, adata):
                 )
 
     def predict(self, adata):
-        logging.info(f'Saving scanvi label prediction to adata.obs["{self.result_key}"]')
+        logging.info(
+            f'Saving scanvi label prediction to adata.obs["{self.result_key}"]'
+        )
 
         adata.obs[self.result_key] = self.model.predict(adata)
         if adata.uns["_return_probabilities"]:
-            adata.obs[self.result_key + "_probabilities"] = np.max(self.model.predict(adata, soft=True), axis=1)
+            adata.obs[self.result_key + "_probabilities"] = np.max(
+                self.model.predict(adata, soft=True), axis=1
+            )
 
     def compute_embedding(self, adata):
         if adata.uns["_compute_embedding"]:
-            logging.info(f'Saving UMAP of scanvi results to adata.obs["{self.embedding_key}"]')
+            logging.info(
+                f'Saving UMAP of scanvi results to adata.obs["{self.embedding_key}"]'
+            )
             adata.obsm["X_scanvi"] = self.model.get_latent_representation(adata)
             sc.pp.neighbors(adata, use_rep="X_scanvi")
-            adata.obsm[self.embedding_key] = sc.tl.umap(adata, copy=True, **self.embedding_dict).obsm["X_umap"]
+            adata.obsm[self.embedding_key] = sc.tl.umap(
+                adata, copy=True, **self.embedding_dict
+            ).obsm["X_umap"]
diff --git a/popv/algorithms/_scvi.py b/popv/algorithms/_scvi.py
index 968c37c..3d75dd4 100644
--- a/popv/algorithms/_scvi.py
+++ b/popv/algorithms/_scvi.py
@@ -83,9 +83,13 @@ def compute_integration(self, adata):
         if "subsampled_labels" not in adata.obs.columns:
             adata.obs["subsampled_labels"] = [
                 label if subsampled else adata.uns["unknown_celltype_label"]
-                for label, subsampled in zip(adata.obs["_labels_annotation"], adata.obs["_ref_subsample"])
+                for label, subsampled in zip(
+                    adata.obs["_labels_annotation"], adata.obs["_ref_subsample"]
+                )
             ]
-        adata.obs["subsampled_labels"] = adata.obs["subsampled_labels"].astype("category")
+        adata.obs["subsampled_labels"] = adata.obs["subsampled_labels"].astype(
+            "category"
+        )
 
         if adata.uns["_pretrained_scvi_path"] is None:
             SCVI.setup_anndata(
@@ -122,9 +126,14 @@ def compute_integration(self, adata):
                 plan_kwargs={"n_epochs_kl_warmup": min(20, self.max_epochs)},
             )
 
-            if adata.uns["_save_path_trained_models"] is not None and adata.uns["_prediction_mode"] == "retrain":
+            if (
+                adata.uns["_save_path_trained_models"] is not None
+                and adata.uns["_prediction_mode"] == "retrain"
+            ):
                 # Update scvi for scanvi.
-                adata.uns["_pretrained_scvi_path"] = adata.uns["_save_path_trained_models"] + "/scvi"
+                adata.uns["_pretrained_scvi_path"] = (
+                    adata.uns["_save_path_trained_models"] + "/scvi"
+                )
                 model.save(
                     adata.uns["_save_path_trained_models"] + "/scvi",
                     save_anndata=False,
@@ -145,14 +154,17 @@ def predict(self, adata):
                     n_neighbors=self.classifier_dict["n_neighbors"],
                     parallel_batch_queries=True,
                 ),
-                KNeighborsClassifier(metric="precomputed", weights=self.classifier_dict["weights"]),
+                KNeighborsClassifier(
+                    metric="precomputed", weights=self.classifier_dict["weights"]
+                ),
             )
             knn.fit(train_X, train_Y)
             if adata.uns["_save_path_trained_models"]:
                 pickle.dump(
                     knn,
                     open(
-                        adata.uns["_save_path_trained_models"] + "scvi_knn_classifier.pkl",
+                        adata.uns["_save_path_trained_models"]
+                        + "scvi_knn_classifier.pkl",
                         "wb",
                     ),
                 )
@@ -169,10 +181,16 @@ def predict(self, adata):
         # save_results
         adata.obs[self.result_key] = knn_pred
         if adata.uns["_return_probabilities"]:
-            adata.obs[self.result_key + "_probabilities"] = np.max(knn.predict_proba(adata.obsm["X_scvi"]), axis=1)
+            adata.obs[self.result_key + "_probabilities"] = np.max(
+                knn.predict_proba(adata.obsm["X_scvi"]), axis=1
+            )
 
     def compute_embedding(self, adata):
         if adata.uns["_compute_embedding"]:
-            logging.info(f'Saving UMAP of scvi results to adata.obs["{self.embedding_key}"]')
+            logging.info(
+                f'Saving UMAP of scvi results to adata.obs["{self.embedding_key}"]'
+            )
             sc.pp.neighbors(adata, use_rep="X_scvi")
-            adata.obsm[self.embedding_key] = sc.tl.umap(adata, copy=True, **self.embedding_dict).obsm["X_umap"]
+            adata.obsm[self.embedding_key] = sc.tl.umap(
+                adata, copy=True, **self.embedding_dict
+            ).obsm["X_umap"]
diff --git a/popv/algorithms/_svm.py b/popv/algorithms/_svm.py
index e18c464..f9a424b 100644
--- a/popv/algorithms/_svm.py
+++ b/popv/algorithms/_svm.py
@@ -50,12 +50,18 @@ def compute_integration(self, adata):
         pass
 
     def predict(self, adata):
-        logging.info(f'Computing support vector machine. Storing prediction in adata.obs["{self.result_key}"]')
+        logging.info(
+            f'Computing support vector machine. Storing prediction in adata.obs["{self.result_key}"]'
+        )
         test_x = adata.layers[self.layers_key] if self.layers_key else adata.X
 
         if adata.uns["_prediction_mode"] == "retrain":
             train_idx = adata.obs["_ref_subsample"]
-            train_x = adata[train_idx].layers[self.layers_key] if self.layers_key else adata[train_idx].X
+            train_x = (
+                adata[train_idx].layers[self.layers_key]
+                if self.layers_key
+                else adata[train_idx].X
+            )
             train_y = adata[train_idx].obs[self.labels_key].to_numpy()
             clf = CalibratedClassifierCV(svm.LinearSVC(**self.classifier_dict))
             clf.fit(train_x, train_y)
@@ -68,12 +74,18 @@ def predict(self, adata):
                     ),
                 )
         else:
-            clf = pickle.load(open(adata.uns["_save_path_trained_models"] + "svm_classifier.pkl", "rb"))
+            clf = pickle.load(
+                open(
+                    adata.uns["_save_path_trained_models"] + "svm_classifier.pkl", "rb"
+                )
+            )
 
         adata.obs[self.result_key] = clf.predict(test_x)
 
         if adata.uns["_return_probabilities"]:
-            adata.obs[self.result_key + "_probabilities"] = np.max(clf.predict_proba(test_x), axis=1)
+            adata.obs[self.result_key + "_probabilities"] = np.max(
+                clf.predict_proba(test_x), axis=1
+            )
 
         adata.obs[self.result_key]
 
diff --git a/popv/annotation.py b/popv/annotation.py
index a99ce3d..87ca81a 100644
--- a/popv/annotation.py
+++ b/popv/annotation.py
@@ -118,7 +118,9 @@ def compute_consensus(adata: anndata.AnnData, prediction_keys: list) -> None:
     Saves the consensus percentage between methods in adata.obs['popv_majority_vote_score']
 
     """
-    consensus_prediction = adata.obs[prediction_keys].apply(_utils.majority_vote, axis=1)
+    consensus_prediction = adata.obs[prediction_keys].apply(
+        _utils.majority_vote, axis=1
+    )
     adata.obs["popv_majority_vote_prediction"] = consensus_prediction
 
     agreement = adata.obs[prediction_keys].apply(_utils.majority_count, axis=1)
@@ -151,9 +153,13 @@ def ontology_vote_onclass(
     if adata.uns["_prediction_mode"] == "retrain":
         G = _utils.make_ontology_dag(adata.uns["_cl_obo_file"])
         if adata.uns["_save_path_trained_models"] is not None:
-            pickle.dump(G, open(adata.uns["_save_path_trained_models"] + "obo_dag.pkl", "wb"))
+            pickle.dump(
+                G, open(adata.uns["_save_path_trained_models"] + "obo_dag.pkl", "wb")
+            )
     else:
-        G = pickle.load(open(adata.uns["_save_path_trained_models"] + "obo_dag.pkl", "rb"))
+        G = pickle.load(
+            open(adata.uns["_save_path_trained_models"] + "obo_dag.pkl", "rb")
+        )
 
     cell_type_root_to_node = {}
     aggregate_ontology_pred = [None] * adata.n_obs
@@ -176,7 +182,9 @@ def ontology_vote_onclass(
                     cell_type_root_to_node[cell_type] = root_to_node
                     depth[cell_type] = len(nx.shortest_path(G, cell_type, "cell"))
                     for ancestor_cell_type in root_to_node:
-                        depth[ancestor_cell_type] = len(nx.shortest_path(G, ancestor_cell_type, "cell"))
+                        depth[ancestor_cell_type] = len(
+                            nx.shortest_path(G, ancestor_cell_type, "cell")
+                        )
                 if pred_key == "popv_onclass_prediction":
                     onclass_depth[ind] = depth[cell_type]
                     for ancestor_cell_type in root_to_node:
@@ -199,11 +207,17 @@ def ontology_vote_onclass(
     adata.obs[save_key] = aggregate_ontology_pred
     adata.obs[save_key + "_score"] = scores
     adata.obs[save_key + "_depth"] = depths
-    adata.obs[save_key + "_onclass_relative_depth"] = np.array(onclass_depth) - adata.obs[save_key + "_depth"]
+    adata.obs[save_key + "_onclass_relative_depth"] = (
+        np.array(onclass_depth) - adata.obs[save_key + "_depth"]
+    )
     # Change numeric values to categoricals.
-    adata.obs[[save_key + "_score", save_key + "_depth", save_key + "_onclass_relative_depth"]] = adata.obs[
+    adata.obs[
         [save_key + "_score", save_key + "_depth", save_key + "_onclass_relative_depth"]
-    ].astype("category")
+    ] = adata.obs[
+        [save_key + "_score", save_key + "_depth", save_key + "_onclass_relative_depth"]
+    ].astype(
+        "category"
+    )
     return adata
 
 
@@ -236,9 +250,13 @@ def ontology_parent_onclass(
     if adata.uns["_prediction_mode"] == "retrain":
         G = _utils.make_ontology_dag(adata.uns["_cl_obo_file"])
         if adata.uns["_save_path_trained_models"] is not None:
-            pickle.dump(G, open(adata.uns["_save_path_trained_models"] + "obo_dag.pkl", "wb"))
+            pickle.dump(
+                G, open(adata.uns["_save_path_trained_models"] + "obo_dag.pkl", "wb")
+            )
     else:
-        G = pickle.load(open(adata.uns["_save_path_trained_models"] + "obo_dag.pkl", "rb"))
+        G = pickle.load(
+            open(adata.uns["_save_path_trained_models"] + "obo_dag.pkl", "rb")
+        )
 
     cell_type_root_to_node = {}
     aggregate_ontology_pred = []
@@ -257,11 +275,16 @@ def ontology_parent_onclass(
                     cell_type_root_to_node[cell_type] = root_to_node
                     depth[cell_type] = len(nx.shortest_path(G, cell_type, "cell"))
                     for ancestor_cell_type in root_to_node:
-                        depth[ancestor_cell_type] = len(nx.shortest_path(G, ancestor_cell_type, "cell"))
+                        depth[ancestor_cell_type] = len(
+                            nx.shortest_path(G, ancestor_cell_type, "cell")
+                        )
                 for ancestor_cell_type in list(root_to_node) + [cell_type]:
                     score[ancestor_cell_type] += 1
                 score_popv[cell_type] += 1
-        score = {key: min(len(prediction_keys) - allowed_errors, value) for key, value in score.items()}
+        score = {
+            key: min(len(prediction_keys) - allowed_errors, value)
+            for key, value in score.items()
+        }
 
         # Find ancestor most present and deepest across all classifiers.
         # If tie, then highest in original classifier.
diff --git a/popv/preprocessing.py b/popv/preprocessing.py
index e9c2732..3c169a6 100644
--- a/popv/preprocessing.py
+++ b/popv/preprocessing.py
@@ -147,7 +147,9 @@ def __init__(
                 set(query_adata.var_names)
             ), "Query dataset misses genes that were used for reference model training. Retrain reference model, set mode='retrain'"
             self.query_adata = query_adata[:, self.genes].copy()
-            assert hvg is None, "Highly variable gene selection is not available if using trained reference model."
+            assert (
+                hvg is None
+            ), "Highly variable gene selection is not available if using trained reference model."
         else:
             self.query_adata = query_adata.copy()
         if query_layers_key is not None:
@@ -164,15 +166,23 @@ def __init__(
         self.compute_embedding = compute_embedding
 
         if cl_obo_folder is None:
-            self.cl_obo_file = os.path.dirname(os.path.dirname(__file__)) + "/ontology/cl.obo"
-            self.cl_ontology_file = os.path.dirname(os.path.dirname(__file__)) + "/ontology/cl.ontology"
-            self.nlp_emb_file = os.path.dirname(os.path.dirname(__file__)) + "/ontology/cl.ontology.nlp.emb"
+            self.cl_obo_file = (
+                os.path.dirname(os.path.dirname(__file__)) + "/ontology/cl.obo"
+            )
+            self.cl_ontology_file = (
+                os.path.dirname(os.path.dirname(__file__)) + "/ontology/cl.ontology"
+            )
+            self.nlp_emb_file = (
+                os.path.dirname(os.path.dirname(__file__))
+                + "/ontology/cl.ontology.nlp.emb"
+            )
             if not os.path.exists(self.nlp_emb_file):
                 subprocess.call(
                     [
                         "tar",
                         "-czf",
-                        os.path.dirname(os.path.dirname(__file__)) + "/ontology/nlp.emb.tar.gz",
+                        os.path.dirname(os.path.dirname(__file__))
+                        + "/ontology/nlp.emb.tar.gz",
                         "cl.ontology.nlp.emb",
                     ]
                 )
@@ -211,7 +221,9 @@ def __init__(
         self._preprocess()
 
     def _check_validity_anndata(self, adata, input_type):
-        assert check_nonnegative_integers(adata.X), f"Make sure input {input_type} adata contains raw_counts"
+        assert check_nonnegative_integers(
+            adata.X
+        ), f"Make sure input {input_type} adata contains raw_counts"
         assert len(set(adata.var_names)) == len(
             adata.var_names
         ), f"{input_type} dataset contains multiple genes with same gene name."
@@ -220,17 +232,25 @@ def _check_validity_anndata(self, adata, input_type):
 
     def _setup_dataset(self, adata, key, add_meta=""):
         if isinstance(self.batch_key[key], list):
-            adata.obs["_batch_annotation"] = adata.obs[self.batch_key[key]].astype(str).sum(1).astype("category")
+            adata.obs["_batch_annotation"] = (
+                adata.obs[self.batch_key[key]].astype(str).sum(1).astype("category")
+            )
         elif isinstance(self.batch_key[key], str):
             adata.obs["_batch_annotation"] = adata.obs[self.batch_key[key]]
         else:
             adata.obs["_batch_annotation"] = self.unknown_celltype_label
-        adata.obs["_batch_annotation"] = adata.obs["_batch_annotation"].astype(str) + add_meta
-        adata.obs["_batch_annotation"] = adata.obs["_batch_annotation"].astype("category")
+        adata.obs["_batch_annotation"] = (
+            adata.obs["_batch_annotation"].astype(str) + add_meta
+        )
+        adata.obs["_batch_annotation"] = adata.obs["_batch_annotation"].astype(
+            "category"
+        )
 
         adata.obs["_labels_annotation"] = self.unknown_celltype_label
         if self.labels_key[key] is not None:
-            adata.obs["_labels_annotation"] = adata.obs[self.labels_key[key]].astype("category")
+            adata.obs["_labels_annotation"] = adata.obs[self.labels_key[key]].astype(
+                "category"
+            )
 
         # subsample the reference cells used for training certain models
         if key == "reference":
@@ -250,8 +270,12 @@ def _setup_dataset(self, adata, key, add_meta=""):
 
     def _preprocess(self):
         if self.genes is None:
-            self.ref_adata = self.ref_adata[:, np.intersect1d(self.ref_adata.var_names, self.query_adata.var_names)]
-            self.query_adata = self.query_adata[:, np.intersect1d(self.ref_adata.var_names, self.query_adata.var_names)]
+            self.ref_adata = self.ref_adata[
+                :, np.intersect1d(self.ref_adata.var_names, self.query_adata.var_names)
+            ]
+            self.query_adata = self.query_adata[
+                :, np.intersect1d(self.ref_adata.var_names, self.query_adata.var_names)
+            ]
 
         if self.prediction_mode == "fast":
             self.adata = self.query_adata
@@ -272,22 +296,28 @@ def _preprocess(self):
             self.adata = self.adata[
                 self.adata.obs["_batch_annotation"].isin(
                     self.adata.obs["_batch_annotation"]
-                    .value_counts()[self.adata.obs["_batch_annotation"].value_counts() > 8]
+                    .value_counts()[
+                        self.adata.obs["_batch_annotation"].value_counts() > 8
+                    ]
                     .index
                 )
             ].copy()
-            difference_batches = set(self.adata.obs["_batch_annotation"]) - batch_before_filtering
+            difference_batches = (
+                set(self.adata.obs["_batch_annotation"]) - batch_before_filtering
+            )
             if difference_batches:
                 logging.warning(
                     f"The following batches will be excluded from annotation because they have less than 9 cells:{difference_batches}."
                 )
 
             # Sort data based on batch for efficiency downstream during SCANORAMA
-            self.adata = self.adata[self.adata.obs.sort_values(by="_batch_annotation").index].copy()
+            self.adata = self.adata[
+                self.adata.obs.sort_values(by="_batch_annotation").index
+            ].copy()
 
-            self.adata.obs[self.labels_key["reference"]] = self.adata.obs[self.labels_key["reference"]].astype(
-                "category"
-            )
+            self.adata.obs[self.labels_key["reference"]] = self.adata.obs[
+                self.labels_key["reference"]
+            ].astype("category")
 
         # Remove any cell with expression below 10 counts.
         zero_cell_names = self.adata[self.adata.X.sum(1) < 10].obs_names
@@ -312,23 +342,29 @@ def _preprocess(self):
                     inplace=False,
                     batch_key="_batch_annotation",
                 )["highly_variable"]
-            except ValueError:  # seurat_v3 tends to error with singularities then use Poisson hvg.
-                self.adata.var["highly_variable"] = sc.experimental.pp.highly_variable_genes(
-                    self.adata[self.adata.obs["_dataset"] == "ref"].copy(),
-                    n_top_genes=self.hvg,
-                    subset=False,
-                    layer="scvi_counts",
-                    flavor="pearson_residuals",
-                    inplace=False,
-                    batch_key="_batch_annotation",
-                )["highly_variable"]
+            except (
+                ValueError
+            ):  # seurat_v3 tends to error with singularities then use Poisson hvg.
+                self.adata.var["highly_variable"] = (
+                    sc.experimental.pp.highly_variable_genes(
+                        self.adata[self.adata.obs["_dataset"] == "ref"].copy(),
+                        n_top_genes=self.hvg,
+                        subset=False,
+                        layer="scvi_counts",
+                        flavor="pearson_residuals",
+                        inplace=False,
+                        batch_key="_batch_annotation",
+                    )["highly_variable"]
+                )
             self.adata = self.adata[:, self.adata.var["highly_variable"]].copy()
 
         sc.pp.normalize_total(self.adata, target_sum=1e4)
         sc.pp.log1p(self.adata)
         self.adata.layers["scaled_counts"] = self.adata.X.copy()
         if self.prediction_mode != "fast":
-            sc.pp.scale(self.adata, max_value=10, zero_center=False, layer="scaled_counts")
+            sc.pp.scale(
+                self.adata, max_value=10, zero_center=False, layer="scaled_counts"
+            )
             self.adata.obsm["X_pca"] = sc.tl.pca(self.adata.layers["scaled_counts"])
 
         # Store values as default for current popv in adata
diff --git a/popv/reproducibility/_accuracy.py b/popv/reproducibility/_accuracy.py
index 8728e4f..cf6a3fb 100644
--- a/popv/reproducibility/_accuracy.py
+++ b/popv/reproducibility/_accuracy.py
@@ -30,7 +30,9 @@ def match_type(n1, n2):
         else:
             return "no match"
 
-    adata.obs[save_key] = adata.obs.apply(lambda x: match_type(x[pred_key], x[gt_key]), axis=1)
+    adata.obs[save_key] = adata.obs.apply(
+        lambda x: match_type(x[pred_key], x[gt_key]), axis=1
+    )
 
 
 def _fine_ontology_sibling_accuracy(adata, obofile, pred_key, gt_key, save_key=None):
@@ -43,7 +45,9 @@ def _fine_ontology_sibling_accuracy(adata, obofile, pred_key, gt_key, save_key=N
 
     ontology_distance_dict = {}
 
-    for name, pred_ct, gt_ct in zip(adata.obs_names, adata.obs[pred_key], adata.obs[gt_key]):
+    for name, pred_ct, gt_ct in zip(
+        adata.obs_names, adata.obs[pred_key], adata.obs[gt_key]
+    ):
         score = None
         combination = f"{pred_ct}_{gt_ct}"
         if combination in ontology_distance_dict:
@@ -57,7 +61,9 @@ def _fine_ontology_sibling_accuracy(adata, obofile, pred_key, gt_key, save_key=N
                 score = nx.shortest_path_length(dag, source=gt_ct, target=pred_ct) - 1
                 score *= -1
             else:
-                paths = nx.algorithms.simple_paths.shortest_simple_paths(nx.Graph(dag), source=pred_ct, target=gt_ct)
+                paths = nx.algorithms.simple_paths.shortest_simple_paths(
+                    nx.Graph(dag), source=pred_ct, target=gt_ct
+                )
                 path = next(paths, None)
                 if path is None:
                     score = 1000
diff --git a/popv/reproducibility/_alluvial.py b/popv/reproducibility/_alluvial.py
index a614ca6..09e4515 100644
--- a/popv/reproducibility/_alluvial.py
+++ b/popv/reproducibility/_alluvial.py
@@ -17,7 +17,15 @@ def plot(input_data, *args, **kwargs):
 
 
 class AlluvialTool:
-    def __init__(self, input_data=(), x_range=(0, 1), res=20, h_gap_frac=0.03, v_gap_frac=0.03, **kwargs):
+    def __init__(
+        self,
+        input_data=(),
+        x_range=(0, 1),
+        res=20,
+        h_gap_frac=0.03,
+        v_gap_frac=0.03,
+        **kwargs,
+    ):
         self.input = input_data
         self.x_range = x_range
         self.res = res  # defines the resolution of the splines for all veins
@@ -30,7 +38,14 @@ def __init__(self, input_data=(), x_range=(0, 1), res=20, h_gap_frac=0.03, v_gap
         self.h_gap = x_range[1] * h_gap_frac
         self.v_gap_frac = v_gap_frac
         self.v_gap = (
-            sum([width for b_item_counter in self.data_dic.values() for width in b_item_counter.values()]) * v_gap_frac
+            sum(
+                [
+                    width
+                    for b_item_counter in self.data_dic.values()
+                    for width in b_item_counter.values()
+                ]
+            )
+            * v_gap_frac
         )
         self.group_widths = self.get_group_widths()
         self.item_coord_dic = self.make_item_coordinate_dic()
@@ -104,7 +119,11 @@ def get_item_groups(self, a_sort=None, b_sort=None, **kwargs):
         _ = kwargs
         b_members = (
             sorted(
-                {b_item for b_item_counter in self.data_dic.values() for b_item in b_item_counter},
+                {
+                    b_item
+                    for b_item_counter in self.data_dic.values()
+                    for b_item in b_item_counter
+                },
                 key=lambda x: self.item_widths_dic[x],
             )
             if not b_sort
@@ -121,7 +140,9 @@ def get_item_groups(self, a_sort=None, b_sort=None, **kwargs):
         return a_members, b_members
 
     def get_group_widths(self):
-        return [self.get_group_width(group) for group in (self.a_members, self.b_members)]
+        return [
+            self.get_group_width(group) for group in (self.a_members, self.b_members)
+        ]
 
     def make_item_coordinate_dic(
         self,
@@ -139,7 +160,10 @@ def make_item_coordinate_dic(
         return item_coord_dic
 
     def get_group_width(self, group):
-        return sum([self.item_widths_dic[item] for item in group]) + (len(group) - 1) * self.v_gap
+        return (
+            sum([self.item_widths_dic[item] for item in group])
+            + (len(group) - 1) * self.v_gap
+        )
 
     def generate_alluvial_vein(self, a_item, b_item):
         width = self.data_dic[a_item][b_item]
@@ -214,14 +238,20 @@ def plot(self, figsize=(10, 15), alpha=0.5, **kwargs):
         ax.autoscale()
         return ax
 
-    def get_color_array(self, colors=None, color_side=0, rand_seed=1, cmap=None, **kwargs):
+    def get_color_array(
+        self, colors=None, color_side=0, rand_seed=1, cmap=None, **kwargs
+    ):
         _ = kwargs
         color_items = self.b_members if color_side else self.a_members
         lci = len(color_items)
         if rand_seed is not None:
             np.random.seed(rand_seed)
         cmap = cmap if cmap is not None else matplotlib.cm.get_cmap("hsv", lci * 10**3)
-        color_array = colors if colors is not None else [cmap(item) for ind, item in enumerate(np.random.rand(lci))]
+        color_array = (
+            colors
+            if colors is not None
+            else [cmap(item) for ind, item in enumerate(np.random.rand(lci))]
+        )
         ind_dic = {item: ind for ind, item in enumerate(color_items)}
         polygon_colors = []
         for (
@@ -256,7 +286,15 @@ def auto_label_veins(self, fontname="Arial", **kwargs):
                 fontname=fontname,
             )
 
-    def label_sides(self, labels=None, label_shift=0, disp_width=False, wdisp_sep=7 * " ", fontname="Arial", **kwargs):
+    def label_sides(
+        self,
+        labels=None,
+        label_shift=0,
+        disp_width=False,
+        wdisp_sep=7 * " ",
+        fontname="Arial",
+        **kwargs,
+    ):
         if labels is not None:
             _ = kwargs
             y = max(self.group_widths) / 2
@@ -264,7 +302,9 @@ def label_sides(self, labels=None, label_shift=0, disp_width=False, wdisp_sep=7
             for side, sign in enumerate((-1, 1)):
                 plt.text(
                     self.x_range[side]
-                    + sign * (label_shift + itl + int(disp_width) * (len(wdisp_sep) + wtl)) * self.h_gap_frac,
+                    + sign
+                    * (label_shift + itl + int(disp_width) * (len(wdisp_sep) + wtl))
+                    * self.h_gap_frac,
                     y,
                     labels[side],
                     # bidi.algorithm.get_display(labels[side]),  # RTL languages
@@ -275,7 +315,9 @@ def label_sides(self, labels=None, label_shift=0, disp_width=False, wdisp_sep=7
                     rotation=90 - 180 * side,
                 )
 
-    def item_text(self, item, side, disp_width=False, wdisp_sep=7 * " ", width_in=True, **kwargs):
+    def item_text(
+        self, item, side, disp_width=False, wdisp_sep=7 * " ", width_in=True, **kwargs
+    ):
         _ = kwargs
         f_item = item
         # f_item = bidi.algorithm.get_display(item)  # for RTL languages
diff --git a/popv/visualization.py b/popv/visualization.py
index 437975c..749958e 100755
--- a/popv/visualization.py
+++ b/popv/visualization.py
@@ -17,14 +17,18 @@
 
 def _sample_report(adata, cell_type_key, score_key, pred_keys):
     adata.obs["counts"] = np.zeros(len(adata.obs))
-    _counts_adata = adata.obs.groupby([cell_type_key, score_key]).count()[["counts"]].reset_index()
+    _counts_adata = (
+        adata.obs.groupby([cell_type_key, score_key]).count()[["counts"]].reset_index()
+    )
     counts_adata = _counts_adata.pivot(cell_type_key, score_key, "counts")
     counts_adata = counts_adata.dropna()
     np_counts = counts_adata.dropna().to_numpy()
     row_sums = np_counts.sum(axis=1)
     new_matrix = np_counts / row_sums[:, np.newaxis]
     ax = (
-        pd.DataFrame(data=new_matrix, index=counts_adata.index, columns=counts_adata.columns)
+        pd.DataFrame(
+            data=new_matrix, index=counts_adata.index, columns=counts_adata.columns
+        )
         .sort_values(7)
         .plot(kind="bar", stacked=True, figsize=(20, 7))
     )
@@ -38,16 +42,31 @@ def _sample_report(adata, cell_type_key, score_key, pred_keys):
     ax.bar_label(ax.containers[0])
     plt.show()
     for key in pred_keys:
-        counts_adata = adata.obs.groupby([key, cell_type_key]).count().reset_index().pivot(key, cell_type_key, "counts")
+        counts_adata = (
+            adata.obs.groupby([key, cell_type_key])
+            .count()
+            .reset_index()
+            .pivot(key, cell_type_key, "counts")
+        )
 
         np_counts = counts_adata.dropna().to_numpy()
         row_sums = np_counts.sum(axis=0)
         new_matrix = np_counts / row_sums[np.newaxis, :]
-        new_index = [counts_adata.index[r] + " " + str(np.sum(np_counts[r])) for r in range(new_matrix.shape[0])]
-        new_columns = [counts_adata.columns[c] + " " + str(np.sum(np_counts[:, c])) for c in range(new_matrix.shape[1])]
-        input_data = pd.DataFrame(data=new_matrix, index=new_index, columns=new_columns).to_dict()
+        new_index = [
+            counts_adata.index[r] + " " + str(np.sum(np_counts[r]))
+            for r in range(new_matrix.shape[0])
+        ]
+        new_columns = [
+            counts_adata.columns[c] + " " + str(np.sum(np_counts[:, c]))
+            for c in range(new_matrix.shape[1])
+        ]
+        input_data = pd.DataFrame(
+            data=new_matrix, index=new_index, columns=new_columns
+        ).to_dict()
         cmap = matplotlib.cm.get_cmap("jet")
-        sorted_index = np.array(new_index)[sorted(range(new_matrix.shape[0]), key=lambda r: np.sum(np_counts[r]))]
+        sorted_index = np.array(new_index)[
+            sorted(range(new_matrix.shape[0]), key=lambda r: np.sum(np_counts[r]))
+        ]
         sorted_columns = np.array(new_columns)[
             sorted(range(new_matrix.shape[1]), key=lambda c: np.sum(np_counts[:, c]))
         ]
@@ -108,7 +127,9 @@ def agreement_score_bar_plot(
         )
         for x in celltypes
     ]
-    mean_agreement = pd.DataFrame([mean_agreement], index=["agreement"], columns=celltypes).T
+    mean_agreement = pd.DataFrame(
+        [mean_agreement], index=["agreement"], columns=celltypes
+    ).T
     mean_agreement.dropna(inplace=True)
     mean_agreement = mean_agreement.sort_values("agreement", ascending=True)
     ax = mean_agreement.plot.bar(y="agreement", figsize=(15, 2), legend=False)
@@ -142,7 +163,13 @@ def prediction_score_bar_plot(
     Returns axis object of corresponding plot.
 
     """
-    ax = adata[adata.obs["_dataset"] == "query"].obs[popv_prediction_score].value_counts().sort_index().plot.bar()
+    ax = (
+        adata[adata.obs["_dataset"] == "query"]
+        .obs[popv_prediction_score]
+        .value_counts()
+        .sort_index()
+        .plot.bar()
+    )
 
     ax.set_xlabel("Score")
     ax.set_ylabel("Frequency")
@@ -188,7 +215,9 @@ def celltype_ratio_bar_plot(
     if normalize:
         prop = prop.div(prop.sum(axis=0), axis=1)
 
-    ax = prop.loc[cell_types].plot(kind="bar", figsize=(len(cell_types) * 0.5, 4), logy=(not normalize))
+    ax = prop.loc[cell_types].plot(
+        kind="bar", figsize=(len(cell_types) * 0.5, 4), logy=(not normalize)
+    )
     ax.set_ylabel("Celltype")
     ax.set_ylabel("Celltype Abundance")
     if save_folder is not None:
diff --git a/pyproject.toml b/pyproject.toml
index 8c20cb6..1f41a21 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -83,12 +83,15 @@ addopts = [
     "--import-mode=importlib",  # allow using test files with same name
 ]
 
-[tool.black]
-line-length = 120
-
 [tool.ruff]
-src = ["src"]
 line-length = 120
+src = ["src"]
+extend-include = ["*.ipynb"]
+
+[tool.ruff.format]
+docstring-code-format = true
+
+[tool.ruff.lint]
 select = [
     "F",  # Errors detected by Pyflakes
     "E",  # Error detected by Pycodestyle
@@ -103,7 +106,7 @@ select = [
     "RUF100",  # Report unused noqa directives
 ]
 ignore = [
-    # line too long -> we accept long comment lines; black gets rid of long code lines
+    # line too long -> we accept long comment lines; formatter gets rid of long code lines
     "E501",
     # Do not assign a lambda expression, use a def -> lambda expression assignments are convenient
     "E731",
@@ -117,7 +120,7 @@ ignore = [
     "D107",
     # Errors from function calls in argument defaults. These are fine when the result is immutable.
     "B008",
-    # __magic__ methods are are often self-explanatory, allow missing docstrings
+    # __magic__ methods are often self-explanatory, allow missing docstrings
     "D105",
     # first line should end with a period [Bug: doesn't work with single-line docstrings]
     "D400",
@@ -130,10 +133,10 @@ ignore = [
     "D213",
 ]
 
-[tool.ruff.pydocstyle]
+[tool.ruff.lint.pydocstyle]
 convention = "numpy"
 
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
 "docs/*" = ["I"]
 "tests/*" = ["D"]
 "*/__init__.py" = ["F401"]
@@ -147,5 +150,5 @@ skip = [
     "docs/changelog.md",
     "docs/references.bib",
     "docs/references.md",
-    "docs/notebooks/example.ipynb"
+    "docs/notebooks/example.ipynb",
 ]
diff --git a/pyproject.toml.rej b/pyproject.toml.rej
new file mode 100644
index 0000000..2536abf
--- /dev/null
+++ b/pyproject.toml.rej
@@ -0,0 +1,33 @@
+diff a/pyproject.toml b/pyproject.toml	(rejected hunks)
+@@ -7,7 +7,7 @@ name = "PopV"
+ version = "0.0.1"
+ description = "Consensus prediction of cell type labels with popV"
+ readme = "README.md"
+-requires-python = ">=3.9"
++requires-python = ">=3.10"
+ license = {file = "LICENSE"}
+ authors = [
+     {name = "Can Ergen"},
+@@ -21,19 +21,19 @@ urls.Home-page = "https://github.com/YosefLab/PopV.git"
+ dependencies = [
+     "anndata",
+     # for debug logging (referenced from the issue template)
+-    "session-info"
++    "session-info",
+ ]
+
+ [project.optional-dependencies]
+ dev = [
+     "pre-commit",
+-    "twine>=4.0.2"
++    "twine>=4.0.2",
+ ]
+ doc = [
+     "docutils>=0.8,!=0.18.*,!=0.19.*",
+     "sphinx>=4",
+     "sphinx-book-theme>=1.0.0",
+-    "myst-nb",
++    "myst-nb>=1.1.0",
+     "sphinxcontrib-bibtex>=1.0.0",
+     "sphinx-autodoc-typehints",
+     "sphinxext-opengraph",
diff --git a/tests/core/test_models.py b/tests/core/test_models.py
index 05f7d04..569012c 100644
--- a/tests/core/test_models.py
+++ b/tests/core/test_models.py
@@ -181,37 +181,57 @@ def test_celltypist():
 def test_annotation():
     """Test Annotation and Plotting pipeline."""
     adata = _get_test_anndata().adata
-    popv.annotation.annotate_data(adata, methods=["svm", "rf"], save_path="tests/tmp_testing/popv_test_results/")
+    popv.annotation.annotate_data(
+        adata, methods=["svm", "rf"], save_path="tests/tmp_testing/popv_test_results/"
+    )
     popv.visualization.agreement_score_bar_plot(adata)
     popv.visualization.prediction_score_bar_plot(adata)
-    popv.visualization.make_agreement_plots(adata, prediction_keys=adata.uns["prediction_keys"], show=False)
+    popv.visualization.make_agreement_plots(
+        adata, prediction_keys=adata.uns["prediction_keys"], show=False
+    )
     popv.visualization.celltype_ratio_bar_plot(adata)
     obo_fn = "resources/ontology/cl.obo"
     _accuracy._ontology_accuracy(
-        adata[adata.obs["_dataset"] == "ref"], obofile=obo_fn, gt_key="cell_ontology_class", pred_key="popv_prediction"
+        adata[adata.obs["_dataset"] == "ref"],
+        obofile=obo_fn,
+        gt_key="cell_ontology_class",
+        pred_key="popv_prediction",
     )
     _accuracy._fine_ontology_sibling_accuracy(
-        adata[adata.obs["_dataset"] == "ref"], obofile=obo_fn, gt_key="cell_ontology_class", pred_key="popv_prediction"
+        adata[adata.obs["_dataset"] == "ref"],
+        obofile=obo_fn,
+        gt_key="cell_ontology_class",
+        pred_key="popv_prediction",
     )
 
     assert "popv_majority_vote_prediction" in adata.obs.columns
     assert not adata.obs["popv_majority_vote_prediction"].isnull().any()
 
     adata = _get_test_anndata(mode="inference").adata
-    popv.annotation.annotate_data(adata, save_path="tests/tmp_testing/popv_test_results/")
+    popv.annotation.annotate_data(
+        adata, save_path="tests/tmp_testing/popv_test_results/"
+    )
 
     adata = _get_test_anndata(mode="fast").adata
-    popv.annotation.annotate_data(adata, save_path="tests/tmp_testing/popv_test_results/")
+    popv.annotation.annotate_data(
+        adata, save_path="tests/tmp_testing/popv_test_results/"
+    )
 
 
 def test_annotation_no_ontology():
     """Test Annotation and Plotting pipeline without ontology."""
     adata = _get_test_anndata(cl_obo_folder=False).adata
-    popv.annotation.annotate_data(adata, methods=["svm", "rf"], save_path="tests/tmp_testing/popv_test_results/")
+    popv.annotation.annotate_data(
+        adata, methods=["svm", "rf"], save_path="tests/tmp_testing/popv_test_results/"
+    )
     popv.visualization.agreement_score_bar_plot(adata)
     popv.visualization.prediction_score_bar_plot(adata)
-    popv.visualization.make_agreement_plots(adata, prediction_keys=adata.uns["prediction_keys"])
-    popv.visualization.celltype_ratio_bar_plot(adata, save_folder="tests/tmp_testing/popv_test_results/")
+    popv.visualization.make_agreement_plots(
+        adata, prediction_keys=adata.uns["prediction_keys"]
+    )
+    popv.visualization.celltype_ratio_bar_plot(
+        adata, save_folder="tests/tmp_testing/popv_test_results/"
+    )
     popv.visualization.celltype_ratio_bar_plot(adata, normalize=False)
     adata.obs["empty_columns"] = "a"
     input_data = adata.obs[["empty_columns", "popv_rf_prediction"]].values.tolist()