-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlenski_sim.hh
368 lines (282 loc) · 11.7 KB
/
lenski_sim.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
#ifndef LENSKI_SIM_HH
#define LENSKI_SIM_HH
#include <cstdio>
#include <cstdlib>
#include <math.h>
#include <vector>
#include <map>
#include <unordered_set>
#include <unordered_map>
#include <algorithm>
#include <numeric>
#include <utility>
#include <iterator>
#include <random>
#include <time.h>
#include <gsl/gsl_rng.h>
#include <boost/container_hash/hash.hpp>
using boost::hash_range;
using std::vector;
using std::map;
using std::unordered_set;
using std::unordered_map;
using std::find_if;
using std::find;
using std::partial_sum;
using std::back_inserter;
using std::string;
/* Hash for vectors, allowing us to store the sequence of mutations as
* a vector<int> and use that as a key to unordered_map.
* Taken from https://stackoverflow.com/questions/10405030/c- \
* unordered-map-fail-when-used-with-a-vector-as-key. */
struct vector_hash {
std::size_t operator()(vector<int> const& v) const {
return hash_range(v.cbegin(), v.cend());
}
};
/* Master class for simulation of Lenski's LTEE. */
class lenski_sim {
public:
/* Class constructor with sensible defaults provided by Yipei. */
lenski_sim(
const int _L = 4.6e8,
const int _N_0 = 5e6,
const int _N_f = 5e8,
const double _p = 8.9e-11,
const double _rho = .05,
const double _sigh = 0,
const double _muJ = 0,
const double _sigJ = 0,
string _base_folder = "lenski_data",
bool _interact = true,
int _output_interval = 1,
int _init_rank = -1,
int _rank_interval = 1,
const int _nbins = 250,
const double _min_select = -.125,
const double _max_select = .125,
uint32_t _seed = 0,
const bool _hoc=true,
const int _replicate_number = 0,
const bool _output_sim_info = true,
const int _reset_fac = 15);
/* Copy over what is needed to vary the strength of epistasis. */
void scale_disorder(
vector<double> unit_his,
vector<double> unit_Jijs);
/* Update the initial state. */
void copy_disorder(
vector<double> _his,
vector<double> _Jijs,
vector<double> _alpha0s,
vector<double> _Jalpha0);
/* Set up the simulation environment. */
void allocate_space(
uint32_t seed,
bool output_sim_info = true);
/* Draw the quenched disorder. */
void setup_disorder();
/* Draws a Poisson random number via inversion
* by sequential search. */
int draw_poisson(double lambda);
/* Steps the simulation forward by dt seconds
* in the 'small' setting. */
void step_forward(double dt);
/* Steps the simulation forward by dt seconds in
* the house of cards setting. */
void step_forward_hoc(double dt);
/* Perform the dilution step with gsl random number generation. */
void dilute_gsl();
/* Run the whole simulation for n_days days with timestep dt,
* using curr_exp to label the output folder. */
void simulate_experiment(
int n_days,
double dt);
/* Re-compute the reference strain for faster fitness computation. */
void update_reference_strain();
/* Computes the fitness value for a new bacterial strain,
* given the parent and the identification index for the mutation.
* Does so for real-valued spins. Does not require the
* initial strain to be all ones. */
inline double compute_fitness_hoc(
int parent_index,
int mutation_index,
double Delta_ck,
unordered_map<int, double> &parent_muts);
/* Computes the fitness value for a new bacterial strain,
* given the parent and the identification index for the mutation.
* Does so for +-1 valued spins. Does not require the initial strain
* to be all ones. */
inline double compute_fitness(
int parent_index,
int mutation_index,
unordered_set<int> &parent_muts,
double alphak_p);
/* Computes the fitness using the full definition, for testing
* other fitness computation methods. Used for +-1-valued spins. */
double compute_fitness_slow(int strain_index);
/* Compute the fitness using the local field formulation. */
double compute_fitness_lf(int parent_index, int mutation_index);
/* Computes the fitness using the full definition, for testing
* other fitness computation methods. Used for real-valued spins. */
double compute_fitness_slow_hoc(int strain_index);
/* Compute rank of the inital strain. */
int compute_rank(
int strain_ind,
vector<int> &beneficial_muts,
double &avg_fit_inc,
bool store_incs);
/* Update rank of the initial strain. */
void update_rank();
/* Outputs bacterial information. */
void output_bac_data_bin(string file_name);
/* Outputs mutation and rank information. */
void output_mut_data_bin(string file_name);
/* Outputs the h_i values. */
void output_his_bin();
/* Outputs the J_{ij} values. */
void output_Jijs_bin();
/* Outputs the initial alpha_i values. */
void output_alpha0s();
/* Outputs the J*\alpha information . */
void output_Jalpha_bin();
/* Outputs selection coefficient binning info. */
void output_bin_edges();
void output_bin_counts();
/* Wrapper function that outputs all relevant
* information each frame. */
void output_frame_info(
double &total_time,
time_t &frame_start,
time_t &frame_end,
double &rank_time,
double dilute_time,
double step_time,
int curr_frame,
int n_frames);
/* Checks for a strain with 0 bacteria. */
void check_nbac_wtf();
/* Checks the Jalpha0 vector. */
void check_Jalpha0();
/* Updates the Jalpha0 vector. */
void update_Jalpha0();
/* Fills bin_edges with the correct values based on nbins,
* min_select, and max_select. */
void setup_bins();
/* Finds the bin index for the given selection coefficient. */
int find_bin_ind(double select);
/* Compute n_inits initializations for use when varying the mutation rate. */
vector<vector<double>> compute_alpha0s_and_Jalpha0s(int n_inits);
vector<double> get_alpha0s() { return alpha0s; }
vector<double> get_Jalpha0() { return Jalpha0; }
vector<double> get_his() { return his; }
vector<double> get_Jijs() { return Jijs; }
private:
/* Size of the E. Coli genome. */
const int L;
/* Number of bacteria at the start of every day. */
const int N_0;
/* Number of bacteria at the end of each day. */
const int N_f;
/* Point mutation rate. */
const double p;
/* Interaction parameter. */
const double rho;
/* Standard deviation of the h_i distribution. */
const double sigh;
/* Standard deviation of the J distribution. */
const double sigJ;
/* Offset value for the small simulations. */
double Foff;
/* Current simulation day. */
int curr_day;
/* Current number of mutations that have occurred. */
int n_mutants_so_far;
/* Where to output the data. */
string base_folder;
string output_folder;
/* Total current number of bacteria. */
double nbac_tot;
/* Total current number of strains. */
int n_strains;
/* Boolean indicating small simulations. */
int sim_case;
/* Boolean indicating whether or not the J_{ij}
* terms are turned on. */
bool interact;
/* Maps mutation indices to deviations from initial sequence.
* Indexed such that mutations[ii] contains all the active
* mutations for strain ii. Used for the HOC simulation. */
vector<unordered_map<int, double>> mutations_hoc;
/* Stores deviations from the initial sequence. */
vector<unordered_set<int>> mutations;
/* Stores the orders of mutations. */
vector<vector<int>> mut_order;
/* Stores the fitness effect for each mutation for each strain.
* Goes in line with mutations - i.e.,
* mutations[strain_ind][mutation_ind] has fitness effect
* given by fit_effects[strain_ind][mutation_ind].*/
vector<vector<double>> fit_effects;
/* Stores the set {\alpha_i} for the original strain.
* The vector mutations defined above is then
* relative to this vector. */
vector<double> alpha0s;
/* Stores the rank of each strain. */
unordered_map<int, int> ranks;
/* Stores the distribution of beneficial fitness increments
* for the dominant strain. */
vector<double> beneficial_incs;
/* Stores the average fitness increments over the available
* beneficial mutations for each strain. */
unordered_map<int, double> avg_incs;
/* Maps a sequence of mutations to its index in other data structures.
* Used for checking if a given strain should merge with another. */
unordered_map<vector<int>, int, vector_hash> current_strains;
/* Stores the number of bacteria per species.
* Indexed such that n_bac[ii] is the number of
* mutations for strain ii. */
vector<double> n_bac;
/* Store the fitness value for each strain. Note this only
* needs to be updated on the fly.
* Indexed such that fits[ii] is the fitness for strain ii. */
vector<double> fits;
/* Stores the values of h_i.
* Indexed such that his[i] = h_i. */
vector<double> his;
/* Stores the components of J*alpha0 */
vector<double> Jalpha0;
/* J_{ij} values mapping i+L*j -> J_{ij}. */
//unordered_map<int, double> Jijs;
vector<double> Jijs;
/* Random number generator */
gsl_rng *gsl_gen;
/* How many days to wait before dumping data. */
int output_interval;
/* What the rank of the initial strain should be. */
int init_rank;
/* Number of days before we output the rank. */
int rank_interval;
/* Stores the edges of the bins for counting
* selection coefficients. */
double *bin_edges;
/* Stores the number of selection coefficients in each bin. */
int *bin_counters;
/* Number of bins for storing selection coefficient counts. */
int nbins;
/* Upper and lower bounds for the bins. */
double min_select;
double max_select;
/* Whether we do discrete simulation or house of cards simulation. */
bool hoc;
/* Which simulation this corresponds to. */
int replicate_number;
/* Store the time it takes to compute the fitness. */
double fit_time;
double hash_time;
double rng_time;
/* How many times we have reset the dominant strain. */
int reset_index;
/* How many mutations before we perform the reset. */
int reset_fac;
};
#endif