-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathidentifiability.py
324 lines (284 loc) · 11.3 KB
/
identifiability.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
#!/usr/bin/env python
"""
Custom functions for identifiability analysis to calculate
and plot confidence intervals based on a profile-likelihood analysis. Adapted
from lmfit, with custom functions to select the range for parameter scanning and
for plotting the profile likelihood.
"""
from collections import OrderedDict
from lmfit.minimizer import Minimizer, MinimizerResult, MinimizerException
from lmfit.model import ModelResult
import numpy as np
import scipy as sp
import math
from matplotlib import pyplot as plt
try:
from multiprocessing_on_dill import Pool
except ModuleNotFoundError:
print(
"""Info: module 'multiprocessing_on_dill' is not installed.
Parameter estimation of kinetic models with PySCeS using the CVODE solver won't work!
"""
)
from multiprocessing import Pool
__version__ = '0.4'
CONF_ERR_GEN = 'Cannot determine Confidence Intervals'
CONF_ERR_NVARS = '%s with < 2 variables' % CONF_ERR_GEN
class ConfidenceInterval:
"""Class used to calculate the confidence interval."""
def __init__(self, minimizer, result, p_names=None, log=False):
assert isinstance(minimizer, Minimizer) or isinstance(
minimizer, ModelResult
), 'minimizer must be instance of `lmfit.minimizer.Minimizer` or `lmfit.model.ModelResult`'
assert isinstance(result, MinimizerResult) or isinstance(
result, ModelResult
), 'result must be instance of `lmfit.minimizer.MinimizerResult` or `lmfit.model.ModelResult`'
self.minimizer = minimizer
self.result = result
self.params = result.params.copy()
self.org = {}
for para_key in self.params:
self.org[para_key] = (
self.params[para_key].value,
self.params[para_key].stderr,
)
self.best_chi = result.chisqr
if not p_names:
p_names = [i for i in self.params if self.params[i].vary]
self.p_names = p_names
self.fit_params = [self.params[p] for p in self.p_names]
self.log = log
self._traces_calculated = False
self._k = 2 # degree of smoothing spline
# check that there are at least 2 true variables!
nvars = len([p for p in self.params.values() if p.vary])
if nvars < 2:
raise MinimizerException(CONF_ERR_NVARS)
self.trace_dict = {i: {} for i in self.p_names}
def calc_all_ci(self, limits=0.5, points=11, prob=0.95, method='leastsq', mp=True):
"""Calculate all confidence intervals."""
assert (
(type(prob) == float) & (prob > 0) & (prob < 1)
), 'Please provide a probability value between 0 and 1.'
self.prob = prob
self.method = method
self.ci_values = OrderedDict()
self.threshold = self._calc_threshold()
if not self._traces_calculated:
self._populate_traces(limits, points, mp)
for p in self.p_names:
self.ci_values[p] = self._process_ci(p)
return self.ci_values
def _populate_traces(self, limits, points, mp):
if mp:
proc_pool = Pool()
arl = []
results = []
for para in self.p_names:
if isinstance(para, str):
para = self.params[para]
if self.log:
para_vals = np.logspace(
np.log10(para.value * limits), np.log10(para.value / limits), points,
)
else:
para_vals = np.linspace(limits * para.value, (2 - limits) * para.value, points)
para.vary = False
self.trace_dict[para.name]['value'] = []
self.trace_dict[para.name]['dchi'] = []
self.trace_dict[para.name]['results'] = []
for val in para_vals:
self.trace_dict[para.name]['value'].append(val)
if mp:
arl.append(proc_pool.apply_async(self._calc_dchi, args=(self, para, val)))
else:
results.append(self.calc_dchi(para, val))
para.vary = True
self._reset_vals()
if mp:
arl[-1].wait()
for ar in arl:
results.append(ar.get())
proc_pool.close()
for (para, dchi, opt_res) in results:
self.trace_dict[para.name]['dchi'].append(dchi)
self.trace_dict[para.name]['results'].append(opt_res)
self._traces_calculated = True
def _process_ci(self, p_name):
xx = self.trace_dict[p_name]['value']
yy = self.trace_dict[p_name]['dchi']
t = self.threshold
spl = sp.interpolate.UnivariateSpline(xx, yy, k=self._k, s=0)
if self.log:
allx = np.logspace(np.log10(xx[0]), np.log10(xx[-1]), 20000)
else:
allx = np.linspace(xx[0], xx[-1], 20000)
lo = allx[spl(allx) <= t][0]
hi = allx[spl(allx) <= t][-1]
# catch non-identifiable cases
if lo == xx[0]:
lo = np.nan
if hi == xx[-1]:
hi = np.nan
return lo, hi
def _reset_vals(self):
"""Reset parameter values to best-fit values."""
for para_key in self.params:
(self.params[para_key].value, self.params[para_key].stderr,) = self.org[
para_key
]
@staticmethod
def _calc_dchi(ci_instance, para, val):
"""
Static method to calculate the normalised delta chi-squared
using multiprocessing.
"""
para.vary = False
para.value = val
save_para = ci_instance.params[para.name]
ci_instance.params[para.name] = para
ci_instance.minimizer.prepare_fit(ci_instance.params)
out = ci_instance.minimizer.minimize(method=ci_instance.method)
dchi = ci_instance._dchi(ci_instance.result, out)
ci_instance.params[para.name] = save_para
para.vary = True
return para, dchi, out
def calc_dchi(self, para, val, restore=False):
"""
Calculate the normalised delta chi-squared for
a given parameter value.
"""
if restore:
self._reset_vals()
para.value = val
save_para = self.params[para.name]
self.params[para.name] = para
self.minimizer.prepare_fit(self.params)
out = self.minimizer.minimize(method=self.method)
dchi = self._dchi(self.result, out)
self.params[para.name] = save_para
return para, dchi, out
def _dchi(self, best_fit, new_fit):
"""
Return the normalised delta chi-squared between the best fit
and the new fit.
"""
dchi = new_fit.chisqr / best_fit.chisqr - 1.0
return dchi
def _calc_threshold(self):
"""
Return the threshold of the normalised chi-squared for
the given probability.
"""
nfree = self.result.nfree
nfix = 1
threshold_scaled = sp.stats.chi2.ppf(self.prob, nfix)
threshold = threshold_scaled * nfix / nfree
return threshold
def plot_ci(self, para, ax=None):
assert para in self.p_names, 'para must be one of ' + str(self.p_names)
if not ax:
f, ax = plt.subplots()
xx = self.trace_dict[para]['value']
yy = self.trace_dict[para]['dchi']
t = self.threshold
spl = sp.interpolate.UnivariateSpline(xx, yy, k=self._k, s=0)
allx = np.linspace(xx[0], xx[-1], 20000)
ax.plot(xx, yy, '+')
ax.plot(allx, spl(allx), '-', lw=1)
ax.axhline(t, color='k', ls='--', lw=0.5)
ax.axvline(self.params[para].value, color='k', ls='-', lw=0.5)
lo, hi = self.ci_values[para]
if np.isnan(lo):
lo = ax.get_xlim()[0]
if np.isnan(hi):
hi = ax.get_xlim()[1]
ax.axvspan(lo, hi, alpha=0.1, color='b')
if self.log:
ax.semilogx()
ax.set_xlabel('Parameter value')
ax.set_ylabel(r'$\chi^2\left/\chi^2_0\right. - 1$')
ax.set_title(para)
def plot_all_ci(self):
num = len(self.p_names)
numcols = 3
numrows = math.ceil(num / numcols)
f, ax = plt.subplots(nrows=numrows, ncols=numcols, figsize=(9, 2.5 * numrows))
for i in range(num):
if num <= numcols:
theax = ax[i]
else:
theax = ax[i // numcols, i % numcols]
self.plot_ci(self.p_names[i], ax=theax)
# remove empty axes
if num % numcols != 0:
empty = numcols - num % numcols
for i in range(-empty, 0):
if num <= numcols:
ax[i].set_visible(False)
else:
ax[num // numcols, i].set_visible(False)
f.tight_layout()
def conf_interval(
minimizer,
result,
p_names=None,
prob=0.95,
limits=0.5,
log=False,
points=11,
method='leastsq',
return_CIclass=False,
mp=True,
):
"""
Calculate the confidence interval (CI) for parameters.
The parameter for which the CI is calculated will be varied, while the
remaining parameters are re-optimized to minimize the chi-square. The
resulting chi-square is used to calculate the probability with a given
statistic, i.e. chi-squared test.
Parameters
----------
minimizer : Minimizer or ModelResult
The minimizer to use, holding objective function.
result : MinimizerResult or ModelResult
The result of running Minimizer.minimize() or Model.fit().
p_names : list, optional
Names of the parameters for which the CI is calculated. If None
(default), the CI is calculated for every parameter.
prob : float, optional
The probability for the confidence interval (<1). If None,
the default is 0.95 (95 % confidence interval).
limits : float, optional
The limits (as a fraction of the original parameter value) within which
to vary the parameters for identifiability analysis (default is 0.5).
If ``log=False``, the parameter is varied from p*limits to p*(2 - limits),
where p is the original value.
If ``log=True``, the parameter is varied from p*limits to p/limits.
log : bool, optional
Whether to vary the parameter in a log (True) or a linear (False,
default) scale.
points : int, optional
The number of points for which to calculate the profile likelihood over
the given parameter range.
method : str, optional
The lmfit mimimize() method to use (default='leastsq')
return_CIclass : bool, optional
When true, return the instantiated ``ConfidenceInterval`` class to
access its methods directly (default=False).
mp : bool, optional
Run the optimization in parallel using ``multiprocessing`` (default=True)
Returns
-------
output : dict
A dictionary containing a list of ``(lower, upper)``-tuples containing
the confidence bounds for each parameter.
ci : ``ConfidenceInterval`` instance, optional
Instantiated ``ConfidenceInterval`` class to access the attached methods.
"""
assert (limits > 0) & (limits < 1), 'Please select a limits value between 0 and 1.'
ci = ConfidenceInterval(minimizer, result, p_names, log)
output = ci.calc_all_ci(limits, points, prob, method=method, mp=mp)
if return_CIclass:
return output, ci
return output