forked from clopezx/earm
-
Notifications
You must be signed in to change notification settings - Fork 5
/
estimate_m1a.py
225 lines (187 loc) · 8.74 KB
/
estimate_m1a.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
import pysb.integrate
import pysb.util
import numpy as np
import scipy.optimize
import scipy.interpolate
import matplotlib.pyplot as plt
import os
import inspect
from earm.lopez_embedded import model
# List of model observables and corresponding data file columns for
# point-by-point fitting
obs_names = ['mBid', 'cPARP']
data_names = ['norm_ICRP', 'norm_ECRP']
var_names = ['nrm_var_ICRP', 'nrm_var_ECRP']
# Total starting amounts of proteins in obs_names, for normalizing simulations
obs_totals = [model.parameters['Bid_0'].value,
model.parameters['PARP_0'].value]
# Load experimental data file
earm_path = os.path.dirname(__file__)
data_path = os.path.join(earm_path, 'xpdata', 'forfits',
'EC-RP_IMS-RP_IC-RP_data_for_models.csv')
exp_data = np.genfromtxt(data_path, delimiter=',', names=True)
# Model observable corresponding to the IMS-RP reporter (MOMP timing)
momp_obs = 'aSmac'
# Mean and variance of Td (delay time) and Ts (switching time) of MOMP, and
# yfinal (the last value of the IMS-RP trajectory)
momp_obs_total = model.parameters['Smac_0'].value
momp_data = np.array([9810.0, 180.0, momp_obs_total])
momp_var = np.array([7245000.0, 3600.0, 1e4])
# Build time points for the integrator, using the same time scale as the
# experimental data but with greater resolution to help the integrator converge.
ntimes = len(exp_data['Time'])
# Factor by which to increase time resolution
tmul = 10
# Do the sampling such that the original experimental timepoints can be
# extracted with a slice expression instead of requiring interpolation.
tspan = np.linspace(exp_data['Time'][0], exp_data['Time'][-1],
(ntimes-1) * tmul + 1)
# Initialize solver object
solver = pysb.integrate.Solver(model, tspan, rtol=1e-5, atol=1e-5)
# Get parameters for rates only
rate_params = model.parameters_rules()
# Build a boolean mask for those params against the entire param list
rate_mask = np.array([p in rate_params for p in model.parameters])
# Build vector of nominal parameter values from the model
nominal_values = np.array([p.value for p in model.parameters])
# Set the radius of a hypercube bounding the search space
bounds_radius = 2
def objective_func(x, rate_mask, lb, ub):
# Print out an indication of how the annealing is progressing
print_anneal_status()
# Apply hard bounds
if np.any((x < lb) | (x > ub)):
print "bounds-check failed"
return np.inf
# Simulate model with rates taken from x (which is log transformed)
param_values = np.array([p.value for p in model.parameters])
param_values[rate_mask] = 10 ** x
solver.run(param_values)
# Calculate error for point-by-point trajectory comparisons
e1 = 0
for obs_name, data_name, var_name, obs_total in \
zip(obs_names, data_names, var_names, obs_totals):
# Get model observable trajectory (this is the slice expression
# mentioned above in the comment for tspan)
ysim = solver.yobs[obs_name][::tmul]
# Normalize it to 0-1
ysim_norm = ysim / obs_total
# Get experimental measurement and variance
ydata = exp_data[data_name]
yvar = exp_data[var_name]
# Compute error between simulation and experiment (chi-squared)
e1 += np.sum((ydata - ysim_norm) ** 2 / (2 * yvar)) / len(ydata)
# Calculate error for Td, Ts, and final value for IMS-RP reporter
# =====
# Normalize trajectory
ysim_momp = solver.yobs[momp_obs]
ysim_momp_norm = ysim_momp / np.nanmax(ysim_momp)
# Build a spline to interpolate it
st, sc, sk = scipy.interpolate.splrep(solver.tspan, ysim_momp_norm)
# Use root-finding to find the point where trajectory reaches 10% and 90%
t10 = scipy.interpolate.sproot((st, sc-0.10, sk))[0]
t90 = scipy.interpolate.sproot((st, sc-0.90, sk))[0]
# Calculate Td as the mean of these times
td = (t10 + t90) / 2
# Calculate Ts as their difference
ts = t90 - t10
# Get yfinal, the last element from the trajectory
yfinal = ysim_momp[-1]
# Build a vector of the 3 variables to fit
momp_sim = [td, ts, yfinal]
# Perform chi-squared calculation against mean and variance vectors
e2 = np.sum((momp_data - momp_sim) ** 2 / (2 * momp_var)) / 3
error = e1 + e2
return error
def print_anneal_status():
"""Print annealing progress report"""
# scipy.optimize.anneal doesn't offer any introspection, so we have to dig
# into its stack frames directly to find what we need. This function will
# only work properly when called directly from the objective function. It
# could be modified to walk the stack until the proper frame is found, but
# right now it just explicitly looks two levels up.
caller_frame, _, _, caller_func, _, _ = inspect.stack()[2]
# Only report when called from anneal, not from getstart_temp
if caller_func == 'anneal':
caller_locals = caller_frame.f_locals
# Only report on the first iteration of the 'dwell' loop
if caller_locals['n'] == 1:
print 'best fit:', caller_locals['best_state'].cost, 'current fit:', caller_locals['current_state'].cost
def estimate(start_values=None):
"""Estimate parameter values by fitting to data.
Parameters
==========
parameter_values : numpy array of floats, optional
Starting parameter values. Taken from model's nominal parameter values
if not specified.
Returns
=======
numpy array of floats, containing fitted parameter values.
"""
# Set starting position to nominal parameter values if not specified
if start_values is None:
start_values = nominal_values
else:
assert start_values.shape == nominal_values.shape
# Log-transform the starting position
x0 = np.log10(start_values[rate_mask])
# Displacement size for annealing moves
dx = .02
# The default 'fast' annealing schedule uses the 'lower' and 'upper'
# arguments in a somewhat counterintuitive way. See
# http://projects.scipy.org/scipy/ticket/1126 for more information. This is
# how to get the search to start at x0 and use a displacement on the order
# of dx (note that this will affect the T0 estimation which *does* expect
# lower and upper to be the absolute expected bounds on x).
lower = x0 - dx / 2
upper = x0 + dx / 2
# Log-transform the rate parameter values
xnominal = np.log10(nominal_values[rate_mask])
# Hard lower and upper bounds on x
lb = xnominal - bounds_radius
ub = xnominal + bounds_radius
# Perform the annealing
args = [rate_mask, lb, ub]
(xmin, Jmin, Tfinal, feval, iters, accept, retval) = \
scipy.optimize.anneal(objective_func, x0, full_output=True,
maxiter=4000, quench=0.5,
lower=lower, upper=upper,
args=args)
# Construct vector with resulting parameter values (un-log-transformed)
params_estimated = start_values.copy()
params_estimated[rate_mask] = 10 ** xmin
# Display annealing results
for v in ('xmin', 'Jmin', 'Tfinal', 'feval', 'iters', 'accept', 'retval'):
print "%s: %s" % (v, locals()[v])
return params_estimated
def display(params_estimated):
# Construct matrix of experimental data and variance columns of interest
exp_obs_norm = exp_data[data_names].view(float).reshape(len(exp_data), -1).T
var_norm = exp_data[var_names].view(float).reshape(len(exp_data), -1).T
std_norm = var_norm ** 0.5
# Simulate model with new parameters and construct a matrix of the
# trajectories of the observables of interest, normalized to 0-1.
solver.run(params_estimated)
obs_names_disp = obs_names + ['aSmac']
sim_obs = solver.yobs[obs_names_disp].view(float).reshape(len(solver.yobs), -1)
totals = obs_totals + [momp_obs_total]
sim_obs_norm = (sim_obs / totals).T
# Plot experimental data and simulation on the same axes
colors = ('r', 'b')
for exp, exp_err, sim, c in zip(exp_obs_norm, std_norm, sim_obs_norm, colors):
plt.plot(exp_data['Time'], exp, color=c, marker='.', linestyle=':')
plt.errorbar(exp_data['Time'], exp, yerr=exp_err, ecolor=c,
elinewidth=0.5, capsize=0, fmt=None)
plt.plot(solver.tspan, sim, color=c)
plt.plot(solver.tspan, sim_obs_norm[2], color='g')
plt.vlines(momp_data[0], -0.05, 1.05, color='g', linestyle=':')
plt.show()
if __name__ == '__main__':
print 'Estimating rates for Lopez embedded model (M1a)'
np.random.seed(1)
params_estimated = estimate()
# Write parameter values to a file
fit_filename = os.path.join(earm_path, 'EARM_2_0_M1a_fitted_params.txt')
print 'Saving parameter values to file:', fit_filename
pysb.util.write_params(model, params_estimated, fit_filename)
display(params_estimated)