-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompass_analysis.py
107 lines (99 loc) · 4.96 KB
/
compass_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import pandas as pd
import numpy as np
from scipy.stats import wilcoxon, mannwhitneyu, ranksums
from statsmodels.stats.multitest import multipletests
import matplotlib.pyplot as plt
import scipy.cluster.hierarchy as hcluster
from scipy.spatial.distance import squareform
def cohens_d(x, y):
pooled_std = np.sqrt(((len(x)-1) * np.var(x, ddof=1)
+ (len(y)-1) * np.var(y, ddof=1)) /
(len(x) + len(y) - 2))
return (np.mean(x) - np.mean(y)) / pooled_std
def wilcoxon_test(consistencies_matrix, group_A_cells, group_B_cells):
"""
Performs an unpaired wilcoxon test (or mann-whitney U test) for each reaction between group_A and group_B
"""
#per reaction/meta-reaction, conduct wilcoxon test between group_A and group_B
group_A = consistencies_matrix.loc[:,group_A_cells]
group_B = consistencies_matrix.loc[:,group_B_cells]
results = pd.DataFrame(index = consistencies_matrix.index, columns = ['wilcox_stat', 'wilcox_pval', 'cohens_d'], dtype='float64')
for rxn in consistencies_matrix.index:
A, B = group_A.loc[rxn].to_numpy().ravel(), group_B.loc[rxn].to_numpy().ravel()
#sometimes there's a solitary value, and we don't want to test then
if len(np.unique(A)) == 1 and len(np.unique(B)) == 1:
if np.unique(A) == np.unique(B):
#we've got no data. set p-value to 1 and skip!
#(p-value needs to be 1 so multipletests doesn't cry)
results.loc[rxn, ['wilcox_pval']] = 1
continue
stat, pval = mannwhitneyu(A, B, alternative='two-sided')
c_d = cohens_d(A, B)
results.loc[rxn, ['wilcox_stat', 'wilcox_pval', 'cohens_d']] = stat, pval, c_d
results['adjusted_pval'] = np.array(multipletests(results['wilcox_pval'], method='fdr_bh')[1], dtype='float64')
return results
def get_reaction_consistencies(compass_reaction_penalties, min_range=1e-3):
"""
Converts the raw penalties outputs of compass into scores per reactions where higher numbers indicate more activity
"""
df = -np.log(compass_reaction_penalties + 1)
df = df[df.max(axis=1) - df.min(axis=1) >= min_range]
df = df - df.min().min()
return df
def get_metareactions(reactions, height=0.02):
"""
Returns an array of metareaction labels for each reaction
Index k in the returned array has the metareaction label for reaction k.
"""
#pairwise_reaction_correlations = reactions.T.corr(method='spearman') #Pandas method here is orders of magnitude slower
pairwise_reaction_correlations = np.corrcoef(reactions.rank(axis=1))
#Unfortunately due to floating point issues, these matrices are not always perfectly symmetric and the diagonal may be slightly off from 1
pairwise_reaction_correlations[np.arange(reactions.shape[0]), np.arange(reactions.shape[0])] = 1.0
pairwise_reaction_correlations = (pairwise_reaction_correlations + pairwise_reaction_correlations.T)/2
assert(np.all(pairwise_reaction_correlations == pairwise_reaction_correlations.T))
Z = hcluster.complete(squareform(1 - pairwise_reaction_correlations))
return hcluster.fcluster(Z, height, criterion='distance')
labeled_reactions = {
"PGM_neg" : "phosphoglycerate mutase (PGAM)",
"LDH_L_neg" : "lactate dehydrogenase",
"PDHm_pos" : "pyruvate dehydrogenase (PDH)",
"TPI_neg" : "triosephosphate isomerase (DHAP forming)",
"FACOAL1821_neg" : "long-chain fatty-acid-CoA ligase",
"r1257_pos" : "long-chain fatty-acid-CoA ligase",
"FACOAL1831_neg" : "long-chain fatty-acid-CoA ligase",
"CSNATr_neg" : "carnitine O-acetyltransferase",
"C160CPT1_pos" : "carnitine O-palmitoyltransferase",
"ACONTm_pos" : "aconitate hydratase",
"SUCOASm_pos" : "succinate-CoA ligase",
"AKGDm_pos" : "alpha-ketoglutarate dehydrogenase",
"SUCD1m_pos" : "succinate dehydrogenase",
"ICDHyrm_pos" : "isocitrate dehydrogenase",
"CK_pos" : "creatine\nkinase",
"PGCD_pos" : "phosphoglycerate dehydrogenase",
"ARGSS_pos" : "arginosuccinate synthase",
"r0281_neg" : "putrescine diamine oxidase",
"SPMDOX_pos" : "spermidine dehydrogenase (spermidine -> GABA)",
"ARGDCm_pos" : "arginine decarboxylase",
"AGMTm_pos" : "agmatinase",
"GHMT2r_pos" : "serine hydroxymethyltransferase",
"AHC_pos" : "adenosylhomocysteinase",
"METAT_pos" : "methionine adenosyltransferase",
"METS_pos" : "methionine\nsynthase",
"ARGN_pos" : "arginase"
}
amino_acid_metab = ["Alanine and aspartate metabolism",
"Arginine and Proline Metabolism",
"beta-Alanine metabolism",
"Cysteine Metabolism",
"D-alanine metabolism",
"Folate metabolism",
"Glutamate metabolism",
"Glycine, serine, alanine and threonine metabolism",
"Histidine metabolism",
"Lysine metabolism",
"Methionine and cysteine metabolism",
"Taurine and hypotaurine metabolism",
"Tryptophan metabolism",
"Tyrosine metabolism",
"Urea cycle",
"Valine, leucine, and isoleucine metabolism"]