-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathExampleUseECM.py
260 lines (212 loc) · 12.4 KB
/
ExampleUseECM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
import os
import cbmpy as cbm
import copy
import pandas as pd
from ecmtool import extract_sbml_stoichiometry, normalize_columns_fraction, get_conversion_cone
from ecmtool.helpers import unsplit_metabolites, print_ecms_direct, unique
import numpy as np
def calc_ECMs(file_path, print_results=True, input_file_path='', print_metabolites_info=True):
"""
Calculates ECMs using ECMtool
:return ecms: np.array
This array contains the ECMs as columns and the metabolites as rows
:param file_path: string
String with path to the SBML-file.
:param reactions_to_tag: list with strings
List with reaction-IDs of reactions that need to be tagged
:param print_results: Boolean
:param hide_metabs: indices of metabolites that should be ignored
"""
"""Step 1: build network and perform some preprocessing steps"""
network = extract_sbml_stoichiometry(file_path, determine_inputs_outputs=True)
external_inds = [ind for ind, metab in enumerate(network.metabolites) if metab.is_external]
# The following are just for checking the inputs to this program.
metab_info_ext = [(ind, metab.id, metab.name, metab.direction) for ind, metab in
enumerate(network.metabolites) if metab.is_external]
# I extract some information about the external metabolites for checking
metab_info_ext_df = pd.DataFrame(metab_info_ext, columns=['metab_ind', 'metab_id', 'metab_name', 'Direction'])
# You can choose to save this information, by uncommenting this line
# metab_info_ext_df.to_csv(path_or_buf='external_info_exampleUseECM.csv', index=False)
"""If an input file is supplied, we set input, output, and hide metabolites from this"""
if input_file_path: # If no input file is supplied, standard detection of ecmtool is used
info_metabs_df = pd.read_csv(input_file_path)
info_metabs_input = info_metabs_df[info_metabs_df.Input == 1]
info_metabs_output = info_metabs_df[info_metabs_df.Output == 1]
info_metabs_hidden = info_metabs_df[info_metabs_df.Hidden == 1]
# Get the indices that correspond to the metabolites that are inputs, outputs, or hidden.
input_inds = list(info_metabs_input.Index.values)
output_inds = list(info_metabs_output.Index.values) + [ind for ind, metab in enumerate(network.metabolites) if
metab.id == 'objective']
hide_inds = list(info_metabs_hidden.Index.values)
prohibit_inds = [ind for ind, metab in enumerate(network.metabolites) if
(metab.is_external) & (not ind in input_inds + output_inds + hide_inds) & (
not metab.id == 'objective')]
both_inds = [ind for ind in range(len(network.metabolites)) if (ind in input_inds) and (ind in output_inds)]
# Use input information to set input, output, hidden, and prohibited metabolites
network.set_inputs(input_inds)
network.set_outputs(output_inds)
network.set_both(both_inds)
network.prohibit(prohibit_inds)
network.hide(hide_inds)
# Print comma-separated lists of input information. These lists can be used for running the same computation
# via command line, for example to use other arguments
print(','.join(map(str, input_inds)))
print(','.join(map(str, output_inds)))
print(','.join(map(str, hide_inds)))
print(','.join(map(str, prohibit_inds)))
if print_metabolites_info:
for index, item in enumerate(network.metabolites):
print(index, item.id, item.name, 'external' if item.is_external else 'internal', item.direction)
# Keep a copy of the full network before compression. This can be nice for later.
full_network = copy.deepcopy(network)
orig_ids = [m.id for m in network.metabolites]
orig_N = network.N
# Putting external_cyles = None is necessary to make the indirect method and direct method compatible
external_cycles = None
# You can choose to print reactions of metabolites by re-setting this boolean.
print_before_compression = False
compress = True
if print_before_compression:
print('\nReactions%s:' % (' before compression' if compress else ''))
for index, item in enumerate(network.reactions):
print(index, item.id, item.name, 'reversible' if item.reversible else 'irreversible')
print('\nMetabolites%s:' % (' before compression' if compress else ''))
for index, item in enumerate(network.metabolites):
print(index, item.id, item.name, 'external' if item.is_external else 'internal', item.direction)
# Split in and out metabolites, to facilitate ECM computation.
network.split_in_out(only_rays=False)
"""Step 2: Compression of the network will speed ecmtool up."""
if compress:
network.compress(verbose=True, SCEI=True, cycle_removal=True, remove_infeasible=True)
# You can choose to print reactions of metabolites by re-setting this boolean.
print_after_compression = False
if print_after_compression and compress:
print('Reactions (after compression):')
for index, item in enumerate(network.reactions):
print(index, item.id, item.name, 'reversible' if item.reversible else 'irreversible')
print('Metabolites (after compression):')
for index, item in enumerate(network.metabolites):
print(index, item.id, item.name, 'external' if item.is_external else 'internal', item.direction)
"""Step 3: Enumerate ECMs"""
# In this script, indirect intersection with polco is used. Use command line options to use direct intersection
cone = get_conversion_cone(network.N, network.external_metabolite_indices(), network.reversible_reaction_indices(),
network.input_metabolite_indices(), network.output_metabolite_indices(),
verbose=True)
# Unsplit metabolites that were inputs and outputs, and were therefore split before the computation.
cone_transpose, ids = unsplit_metabolites(np.transpose(cone), network)
cone = unique(np.transpose(normalize_columns_fraction(cone_transpose)))
print('Found %s ECMs' % cone.shape[0])
if print_results:
print_ecms_direct(np.transpose(cone), ids)
cone = cone.transpose() # columns will be the different ECMs, rows are metabolites
return cone, ids, full_network
def get_efms(N, reversibility, verbose=True, efmtool_path=os.getcwd()):
"""
Uses Matlab and EFMtool (https://csb.ethz.ch/tools/software/efmtool.html) to calculate the EFMs of a model.
:param N: stoichiometry matrix
:param reversibility: list of booleans indicating if reaction is reversible
:param verbose:
:param efmtool_path: Path to where EFM software is installed on machine
:return: matrix with efms
"""
import matlab.engine
engine = matlab.engine.start_matlab()
engine.cd(efmtool_path)
result = engine.CalculateFluxModes(matlab.double([list(row) for row in N]), matlab.logical(reversibility))
if verbose:
print('Fetching calculated EFMs')
size = result['efms'].size
shape = size[1], size[0] # _data is in transposed form w.r.t. the result matrix
efms = np.reshape(np.array(result['efms']._data), shape)
if verbose:
print('Finishing fetching calculated EFMs')
return efms
def check_bijection_csvs(ecms_first_df, ecms_second_df):
"""
:param ecms_first_df: DataFrame
Matrix with ecms as rows, metabolites as cols
colname should give metab_id
:param ecms_second_df: DataFrame
Matrix with ecms as rows, metabolites as cols
colname should give metab_id
:return bijection_YN: Boolean
:return ecms_second_min_ecms_first: np.array
Matrix with as columns the ECMs that were in the second but not in the first set
:return ecms_first_min_ecms_second: np.array
Matrix with as columns the ECMs that were in the first but not in the second set
"""
# We first remove duplicates from both
metab_ids_first = list(ecms_first_df.columns)
metab_ids_second = list(ecms_second_df.columns)
ecms_first = np.transpose(ecms_first_df.values)
ecms_second = np.transpose(ecms_second_df.values)
n_ecms_first_non_unique = ecms_first.shape[1]
n_ecms_second_non_unique = ecms_second.shape[1]
ecms_first = np.transpose(unique(np.transpose(ecms_first)))
ecms_second = np.transpose(unique(np.transpose(ecms_second)))
n_ecms_first = ecms_first.shape[1]
n_ecms_second = ecms_second.shape[1]
# Find matching of metab_ids
matching_inds = np.zeros(len(metab_ids_first))
for id_ind, id in enumerate(metab_ids_first):
matching_inds[id_ind] = [id_ind_sec for id_ind_sec, id_sec in enumerate(metab_ids_second) if id_sec == id][0]
# Make sure that second ecms metabolites are in the same order
matching_inds = matching_inds.astype(int)
ecms_second = ecms_second[matching_inds, :]
if n_ecms_first_non_unique - n_ecms_first > 0:
print("Watch out. The first set of ECMs has duplicates")
if n_ecms_second_non_unique - n_ecms_second > 0:
print("Watch out. The second set of ECMs has duplicates")
# Normalize both sets of ECMs
sum_columns_first = np.sum(np.abs(ecms_first), axis=0)
sum_columns_first = sum_columns_first[np.newaxis, :]
ecms_first = ecms_first / np.repeat(sum_columns_first, ecms_first.shape[0], axis=0)
sum_columns_second = np.sum(np.abs(ecms_second), axis=0)
sum_columns_second = sum_columns_second[np.newaxis, :]
ecms_second = ecms_second / np.repeat(sum_columns_second, ecms_second.shape[0], axis=0)
found_match_ecms_first = [False] * n_ecms_first
no_match_ecms_second = list(range(n_ecms_second))
for ecm_first_ind in range(n_ecms_first):
if ecm_first_ind % 100 == 0:
print('%d/%d ECMs checked for matches' % (ecm_first_ind, n_ecms_first))
ecm_first = ecms_first[:, ecm_first_ind]
for index, ecm_second_ind in enumerate(no_match_ecms_second):
ecm_second = ecms_second[:, ecm_second_ind]
if max(ecm_first - ecm_second) <= 10 ** -3:
found_match_ecms_first[ecm_first_ind] = True
del no_match_ecms_second[index]
break
ecms_first_min_ecms_second_inds = np.where([not found for found in found_match_ecms_first])[0]
ecms_second_min_ecms_first_inds = no_match_ecms_second
ecms_first_min_ecms_second = ecms_first[:, ecms_first_min_ecms_second_inds]
ecms_second_min_ecms_first = ecms_second[:, ecms_second_min_ecms_first_inds]
if not (ecms_first_min_ecms_second.shape[1] > 0 or ecms_second_min_ecms_first.shape[1] > 0):
bijection_YN = True
else:
bijection_YN = False
return bijection_YN, ecms_first_min_ecms_second, ecms_second_min_ecms_first
"""CONSTANTS"""
model_name = "e_coli_core"
# input_file_name = "bacteroid_ECMinputSmaller.csv"
# For a bigger computation, try:
# input_file_name = "bacteroid_ECMinputAll.csv"
# Define directories for finding models
model_dir = os.path.join(os.getcwd(), "examples_and_results/models")
model_path = os.path.join(model_dir, model_name + ".xml")
mod = cbm.readSBML3FBC(model_path)
# input_file_path = os.path.join(os.getcwd(), "examples_and_results\input_files", input_file_name)
# ecms_matrix, metab_ids, full_network = calc_ECMs(model_path, print_results=True, input_file_path=input_file_path)
ecms_matrix, metab_ids, full_network = calc_ECMs(model_path, print_results=True, )
# Create dataframe with the ecms as columns and the metabolites as index
ecms_df = pd.DataFrame(np.transpose(ecms_matrix), columns=metab_ids)
ext_metab_inds = []
ext_metab_ids = []
for ind_id, metab_id in enumerate(metab_ids):
ext_metab_bool = [metab.is_external for ind, metab in enumerate(full_network.metabolites) if metab.id == metab_id]
if len(ext_metab_bool) > 0 and ext_metab_bool[0]:
ext_metab_inds.append(ind_id)
ext_metab_ids.append(metab_id)
ecms_matrix_ext = ecms_matrix[ext_metab_inds, :]
ecms_df_ext = pd.DataFrame(np.transpose(ecms_matrix_ext), columns=ext_metab_ids)
ecms_df_ext.to_csv(
path_or_buf=os.path.join(os.getcwd(), "examples_and_results", "result_files", 'ecms_ExampleUseECM.csv'), index=True)