-
Notifications
You must be signed in to change notification settings - Fork 17
Expand file tree
/
Copy pathparse_network.py
More file actions
251 lines (157 loc) · 7.07 KB
/
parse_network.py
File metadata and controls
251 lines (157 loc) · 7.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
#!/usr/bin/env pyhton
# -*- coding: UTF-8 -*-
__author__ = 'Chao Wu'
__date__ = '2/14/2019'
__version__ = '1.0'
import numpy as np
import pandas as pd
def parse_network(reactionFile, iniMetabs = [], finMetabs = [], exBalMetabs = [], exOptMetabs = []):
'''
Parameters
reactionFile: str, reaction list file
iniMetabs: lst, metabolites as initial substrates
finMetabs: lst, metabolites as end products
exBalMetabs: lst, metabolites excluded from mass balance
exOptMetabs: lst, metabolites excluded from optimization
Returns
S4Bal: df, stoichiometric matrix for mass balance, metabolite in rows, reaction in columns (same order with S). negative for substrates, positive for products
S4Opt: df, stoichiometric matrix for optimization, metabolite in rows, reaction in columns (same order with S). negative for substrates, positive for products
enzymeInfo: df, reaction in rows (same order with S)
metabInfo: ser, metabolite in index (same order with S), values are 0 (inner metabolite) or -1 (initial substrate) or 1 (final product)
'''
import re
from constants import R, T, deftMW, deftKcat, deftKm, deftKcatRelBnds, deftKmRelBnds, deftKeqRelBnds
def parse_reactantStr(reaStr):
'''
Parameters
reaStr: str
Returns
coes: lst, reactant coefficients
reas: lst, reactants
'''
coe_reas = re.findall(r'(\d+\.?\d*|)(\w+)', reaStr)
coes = [1.0 if not i[0] else float(i[0]) for i in coe_reas]
reas = [i[1] for i in coe_reas]
return coes, reas
def parse_kineticStr(kinStr, deftVal, deftRelBnds):
'''
Parameters
kinStr: str
deftVal: float, default values
deftRelBnds: lst, default lower and upper bound
Returns
infos: lst of tuple (value, lb, ub)
'''
infosRaw = [re.match(r'(\d+\.?\d*)?\(?(\d+\.?\d*)?,?(\d+\.?\d*)?\)?', item).groups() for item in kinStr.split(';')]
infos = []
for val, lb, ub in infosRaw:
val = float(val) if val else deftVal
lb = float(lb) if lb else val * deftRelBnds[0]
ub = float(ub) if ub else val * deftRelBnds[1]
infos.append([val, lb, ub])
return infos
inputs = pd.read_csv(reactionFile, sep = '\t', header = None, index_col = 0, names = ['id', 'rev', 'deltaGm', 'subs', 'pros', 'subsKm', 'prosKm', 'kcat', 'MW'], comment = '#', na_filter = False, dtype = str)
# get S and enzymeInfo
S = pd.DataFrame(columns = inputs.index)
for enzyme in inputs.index:
subsStr, prosStr = inputs.loc[enzyme, 'subs':'pros']
coes, subs = parse_reactantStr(subsStr)
for sub, coe in zip(subs, coes): S.loc[sub, enzyme] = -coe
coes, pros = parse_reactantStr(prosStr)
for pro, coe in zip(pros, coes): S.loc[pro, enzyme] = coe
S = S.replace(np.nan, 0)
# get S4Bal and S4Opt
balMetabs = [metab for metab in S.index if metab not in exBalMetabs]
S4Bal = S.loc[balMetabs, :]
optMetabs = [metab for metab in S.index if metab not in exOptMetabs]
S4Opt = S.loc[optMetabs, :]
# get enzymeInfo
enzymeInfo = pd.DataFrame(index = inputs.index, columns = ['rev', 'Keq', 'subsKm', 'prosKm', 'kcat', 'MW'], dtype = object)
for enzyme in enzymeInfo.index:
rev, deltaGm, subsStr, prosStr, subsKmStr, prosKmStr, kcatStr, MW = inputs.loc[enzyme, ['rev', 'deltaGm', 'subs', 'pros', 'subsKm', 'prosKm', 'kcat', 'MW']]
enzymeInfo.loc[enzyme, 'rev'] = float(rev)
Keq = np.exp(-float(deltaGm) / R / T)
enzymeInfo.loc[enzyme:enzyme, 'Keq'] = [[Keq, Keq * deftKeqRelBnds[0], Keq * deftKeqRelBnds[1]]]
subs = parse_reactantStr(subsStr)[1]
subKmInfos = parse_kineticStr(subsKmStr, deftKm, deftKmRelBnds)
subKms = pd.Series(index = S.index, dtype = object)
subKms.loc[subs] = subKmInfos
enzymeInfo.loc[enzyme:enzyme, 'subsKm'] = [subKms]
pros = parse_reactantStr(prosStr)[1]
proKmInfos = parse_kineticStr(prosKmStr, deftKm, deftKmRelBnds)
proKms = pd.Series(index = S.index, dtype = object)
proKms.loc[pros] = proKmInfos
enzymeInfo.loc[enzyme:enzyme, 'prosKm'] = [proKms]
kcatInfos = parse_kineticStr(kcatStr, deftKcat, deftKcatRelBnds)
enzymeInfo.loc[enzyme:enzyme, 'kcat'] = kcatInfos
enzymeInfo.loc[enzyme, 'MW'] = float(MW) if MW else deftMW
# get metabInfo
iniSubs = S.loc[np.all(S <= 0, axis = 1), :].index
finPros = S.loc[np.all(S >= 0, axis = 1), :].index
innerMetabs = set(S.index) - set(iniSubs) - set(finPros)
metabInfo = pd.Series(dict(list(dict.fromkeys(iniSubs, -1).items()) +
list(dict.fromkeys(finPros, 1).items()) +
list(dict.fromkeys(innerMetabs, 0).items()) +
list(dict.fromkeys(iniMetabs, -1).items()) +
list(dict.fromkeys(finMetabs, 1).items())))
return S4Bal, S4Opt, enzymeInfo, metabInfo
def get_full_stoichiometric_matrix(S, metabInfo):
'''
Parameters
S: df, stoichiometric matrix, metabolite in rows, reaction in columns. negative for substrates, positive for products
metabInfo: ser, metabolite in index, values are 0 (inner metabolite) or -1 (initial substrate) or 1 (final product)
Returns
SFull: same with S, including input and output reactions of the pathway
NOTE input and output reactions all in the form X -> X
'''
iniSubs = [metab for metab in S.index if metabInfo.loc[metab] == -1]
finPros = [metab for metab in S.index if metabInfo.loc[metab] == 1]
# make new S to include input and output reactions of the pathway
SFull = S.copy(deep = 'all')
for iniSub in iniSubs:
SFull.loc[iniSub, iniSub+'_in'] = 1.0
for finPro in finPros:
SFull.loc[finPro, finPro+'_out'] = -1.0
SFull = SFull.replace(np.nan, 0)
return SFull
def get_steady_state_net_fluxes(S, enzymeInfo, metabInfo, speEnz = None, speFlux = None):
'''
Parameters
S: df, stoichiometric matrix, metabolite in rows, reaction in columns. negative for substrates, positive for products
enzymeInfo: df, reaction in rows
metabInfo: ser, metabolite in index, values are 0 (inner metabolite) or -1 (initial substrate) or 1 (final product)
speEnz: str, enzyme specified as initial enzyme
speFlux: float, flux specified as initial flux
Returns
Vss: ser, net fluxes in steady state (including in and out fluxes)
'''
import re
from scipy.optimize import lsq_linear
# make new S to solve all net fluxes
Snew = S.copy(deep = 'all')
if speEnz:
Snew.loc[speEnz, speEnz] = 1.0
else:
iniSubs = [metab for metab in S.index if metabInfo.loc[metab] == -1]
Snew.loc[iniSubs[0] + '_in', iniSubs[0] + '_in'] = 1.0
Snew = Snew.replace(np.nan, 0)
# solve all net fluxes
beq = np.zeros(Snew.shape[0])
if speEnz:
beq[-1] = speFlux
else:
beq[-1] = 1.0
lbs = [0 if (r not in enzymeInfo.index or enzymeInfo.loc[r, 'rev'] == 0) else -np.inf for r in Snew.columns]
bnds = (lbs, np.inf)
fluxes = lsq_linear(Snew.values, beq, bounds = bnds).x
Vss = pd.Series(fluxes.reshape(-1), index = Snew.columns)
return Vss
def read_concentrations(concFile):
'''
Parameters
concFile: str, metabolite concentration file
Returns
Css: ser, metabolite concentration in steady state
'''
Concs = pd.read_csv(concFile, sep = '\t', squeeze = True, header = None, index_col = 0, comment = '#')
return Concs