From 8786cd7f239a79218f7419c1cb0a8be0800582f0 Mon Sep 17 00:00:00 2001 From: Narges Rezaie Date: Wed, 4 Dec 2024 11:51:00 -0800 Subject: [PATCH] update structure plot --- Topyfic/analysis.py | 20 ++++++++++++++++++-- setup.py | 4 ++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/Topyfic/analysis.py b/Topyfic/analysis.py index 2469187..a1bc1f5 100644 --- a/Topyfic/analysis.py +++ b/Topyfic/analysis.py @@ -180,6 +180,7 @@ def structure_plot(self, width=None, n=2, order_cells=['hierarchy'], + subsample=10000, save=True, show=True, figsize=None, @@ -196,7 +197,7 @@ def structure_plot(self, :type topic_order: list of str :param ascending: for each structure plot on which order you want to sort your data (default is descending for all structure plot) :type ascending: list of bool - :param metaData: if you want to add annotation for each cell add column name of that information (make sure you have that inforamtion in your cell_participation.obs) + :param metaData: if you want to add annotation for each cell add column name of that information (make sure you have that information in your cell_participation.obs) :type metaData: list :param metaData_palette: color palette for each metaData you add :type metaData_palette: dict @@ -206,6 +207,8 @@ def structure_plot(self, :type n: int :param order_cells: determine which kind of sorting options you want to use ('sum', 'hierarchy', sort by metaData); sum: sort cells by sum of top n topics; hierarchy: sort data by doing hierarchical clustring; metaData sort by metaData (default: ['hierarchy']) :type order_cells: list + :param subsample: number of cells/nuclei to subsample the data to be able to render it in a reasonable time (default: 10000) + :type subsample: int :param save: indicate if you want to save the plot or not (default: True) :type save: bool :param show: indicate if you want to show the plot or not (default: True) @@ -217,6 +220,7 @@ def structure_plot(self, :param file_name: name and path of the plot use for save (default: piechart_topicAvgCell) :type file_name: str """ + if category is None: category = self.cell_participation.obs[level].unique().tolist() if figsize is None: @@ -231,7 +235,11 @@ def structure_plot(self, a = [] for i in range(len(category)): - a.append(self.cell_participation.obs[self.cell_participation.obs[level] == category[i]].shape[0]) + n_cells = self.cell_participation.obs[self.cell_participation.obs[level] == category[i]].shape[0] + if n_cells > subsample: + a.append(subsample) + else: + a.append(n_cells) a.append(min(a) / 2) if width is None: width = a @@ -258,6 +266,14 @@ def structure_plot(self, for i in range(len(category)): tissue = self.cell_participation.obs[self.cell_participation.obs[level] == category[i]] tmp = self.cell_participation.to_df().loc[tissue.index, :] + + if tmp.shape[0] > subsample: + print(f"Randomly choose {subsample} out of {tmp.shape[0]} cells/nuclei in {category[i]}") + subsample_index = random.sample(list(range(tmp.shape[0])), subsample) + + tmp = tmp.iloc[subsample_index, :] + tissue = tissue.iloc[subsample_index, :] + if topic_order is None: order = tmp.mean().sort_values(ascending=False).index.tolist() else: diff --git a/setup.py b/setup.py index a9b74ca..01be069 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name='Topyfic', # the name of your package packages=['Topyfic'], # same as above - version='v0.4.16', # version number + version='v0.4.17', # version number license='MIT', # license type description='Topyfic is a Python package designed to identify reproducible latent dirichlet allocation (LDA) ' 'using leiden clustering and harmony for single cell epigenomics data', @@ -11,7 +11,7 @@ author='Narges Rezaie', # your name author_email='nargesrezaie80@gmail.com', # your email url='https://github.com/mortazavilab/Topyfic', # url to your git repo - download_url='https://github.com/mortazavilab/Topyfic/archive/refs/tags/v0.4.16.tar.gz', # link to the tar.gz file associated with this release + download_url='https://github.com/mortazavilab/Topyfic/archive/refs/tags/v0.4.17.tar.gz', # link to the tar.gz file associated with this release keywords=['Cellular Programs', 'Latent Dirichlet allocation', 'single-cell multiome', 'single-cell RNA-seq', 'gene regulatory network', 'Topic Modeling', 'single-nucleus RNA-seq'], # python_requires='>=3.9',