From 69f6eda6ca00876ac8bfa3f03db2f1094f5c901c Mon Sep 17 00:00:00 2001 From: Carl Suster Date: Fri, 9 Jun 2023 12:02:10 +1000 Subject: [PATCH] Make genome length a const generic --- examples/binned.rs | 2 +- examples/combined.rs | 10 ++++++-- examples/simple.rs | 2 +- src/genome/simple.rs | 49 +++++++++++++++++++++++----------------- src/lib.rs | 4 ++-- src/simulate/binned.rs | 2 +- src/simulate/mod.rs | 2 +- src/simulate/outbreak.rs | 17 +++++++++++--- 8 files changed, 56 insertions(+), 32 deletions(-) diff --git a/examples/binned.rs b/examples/binned.rs index 8c19c07..63fafa7 100644 --- a/examples/binned.rs +++ b/examples/binned.rs @@ -32,7 +32,7 @@ fn main() -> Result<()> { // use this instead for reproducible simulation: // let rng = Xoshiro256PlusPlus::seed_from_u64(32189661_u64); - let genome = SimpleGenome::default(); + let genome = SimpleGenome::<64>::default(); let ob = binned_outbreaks(genome, &disease_model, mutation_rate, &binned_cfg, rng)?; let stdout = std::io::stdout(); diff --git a/examples/combined.rs b/examples/combined.rs index c62f0fc..60c0818 100644 --- a/examples/combined.rs +++ b/examples/combined.rs @@ -27,8 +27,14 @@ fn main() -> Result<()> { // let mut rng = Xoshiro256PlusPlus::seed_from_u64(32189661_u64); // simulate an initial outbreak - let index1 = SimpleGenome::default(); - let mut ob = simulate_outbreak(index1, &disease_model, mutation_rate, max_cases, &mut rng)?; + let index1 = SimpleGenome::<64>::default(); + let mut ob = simulate_outbreak( + index1.clone(), + &disease_model, + mutation_rate, + max_cases, + &mut rng, + )?; // create a second outbreak from a new introduction of the disease 30 time steps later with 10 // SNPs separating it from the original index genome diff --git a/examples/simple.rs b/examples/simple.rs index 466c618..f15daf1 100644 --- a/examples/simple.rs +++ b/examples/simple.rs @@ -25,7 +25,7 @@ fn main() -> Result<()> { // use this instead for reproducible simulation: // let mut rng = Xoshiro256PlusPlus::seed_from_u64(9948901_u64); - let genome = SimpleGenome::default(); + let genome = SimpleGenome::<256>::default(); let ob = simulate_outbreak(genome, &disease_model, mutation_rate, max_cases, &mut rng)?; let stdout = std::io::stdout(); diff --git a/src/genome/simple.rs b/src/genome/simple.rs index 9669576..4c89be9 100644 --- a/src/genome/simple.rs +++ b/src/genome/simple.rs @@ -6,33 +6,37 @@ use std::convert::TryInto; use std::fmt; use std::io; -/// number of distinct sites on the genome that can be represented. -pub const GENOME_LENGTH: usize = 64; -pub type GenomeStorage = BitArr!(for GENOME_LENGTH, in u64); +type GenomeStorage = BitBox; /// Representation of a genome. /// -/// Internally this is represented as a vector of binary bits. Mutation is modelled by +/// Internally this is represented as an array of binary bits. Mutation is modelled by /// selecting bits uniformly at random and flipping them. /// /// This model enables efficient operations and compact storage. -#[derive(PartialEq, Eq, Clone, Copy, Default)] -pub struct SimpleGenome(GenomeStorage); +#[derive(PartialEq, Eq, Clone)] +pub struct SimpleGenome(GenomeStorage); -impl Genome for SimpleGenome { +impl Default for SimpleGenome { + fn default() -> Self { + SimpleGenome(bitbox![usize, Lsb0; 0; BP]) + } +} + +impl Genome for SimpleGenome { /// Flip exactly `n_mutations` bits chosen at random. /// /// Panics if the requested number of mutations is greater than the fixed width of the /// representation. fn mutate(&self, n_mutations: usize, mut rng: R) -> Self { - let mut new_genome = self.0; assert!( - n_mutations <= GENOME_LENGTH, + n_mutations <= BP, "Requested number of mutations ({}) exceeds width of genome representation ({})", n_mutations, - GENOME_LENGTH + BP ); - for pos in index::sample(&mut rng, GENOME_LENGTH, n_mutations) { + let mut new_genome = self.0.clone(); + for pos in index::sample(&mut rng, BP, n_mutations) { let val = !new_genome.get(pos).unwrap(); new_genome.set(pos, val); } @@ -41,23 +45,26 @@ impl Genome for SimpleGenome { /// Counts the bitwise differences between the genome representations. fn snps(&self, other: &Self) -> u32 { - (self.0 ^ other.0).count_ones().try_into().unwrap() + (self.0.clone() ^ other.0.clone()) + .count_ones() + .try_into() + .unwrap() } /// Relabels 1 and 0 as A and C respectively. fn write_nucleotides(&self, mut writer: W) -> io::Result<()> { - for base in self.0 { - write!(writer, "{}", if base { 'A' } else { 'C' })?; + for base in self.0.as_bitslice() { + write!(writer, "{}", if *base { 'A' } else { 'C' })?; } Ok(()) } } -impl fmt::Debug for SimpleGenome { +impl fmt::Debug for SimpleGenome { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "SimpleGenome(")?; - for base in self.0 { - write!(f, "{}", if base { 'A' } else { 'C' })?; + for base in self.0.as_bitslice() { + write!(f, "{}", if *base { 'A' } else { 'C' })?; } write!(f, ")") } @@ -71,22 +78,22 @@ mod tests { #[test] fn test_empty() { - let genome1 = SimpleGenome::default(); - let genome2 = SimpleGenome::default(); + let genome1 = SimpleGenome::<64>::default(); + let genome2 = SimpleGenome::<64>::default(); assert_eq!(genome1.snps(&genome2), 0); } #[test] fn test_mutation() { let mut rng = Xoshiro256PlusPlus::seed_from_u64(89324 as u64); - let genome = SimpleGenome::default(); + let genome = SimpleGenome::<64>::default(); assert_ne!(genome, genome.mutate(4, &mut rng)); } #[test] fn test_distance() { let mut rng = Xoshiro256PlusPlus::seed_from_u64(89324 as u64); - let genome = SimpleGenome::default(); + let genome = SimpleGenome::<64>::default(); let child = genome.mutate(5, &mut rng); assert_eq!(genome.snps(&child), 5); assert_eq!(child.snps(&genome), 5); diff --git a/src/lib.rs b/src/lib.rs index 96f9906..1727e86 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,7 +28,7 @@ //! // stop the simulation at the end of a time step where the case count exceeds 100 //! let max_cases = 100; //! -//! let genome = SimpleGenome::default(); +//! let genome = SimpleGenome::<64>::default(); //! let ob = simulate_outbreak(genome, &disease_model, mutation_rate, max_cases, &mut rng); //! assert!(ob.is_ok()); //! @@ -60,7 +60,7 @@ pub mod simple { //! and can be configured to match details of a pathogen of interest. pub use crate::disease::simple::SimpleDisease; - pub use crate::genome::simple::{GenomeStorage, SimpleGenome, GENOME_LENGTH}; + pub use crate::genome::simple::SimpleGenome; } pub use disease::covid; diff --git a/src/simulate/binned.rs b/src/simulate/binned.rs index 8b5d83e..e066a0a 100644 --- a/src/simulate/binned.rs +++ b/src/simulate/binned.rs @@ -272,7 +272,7 @@ mod tests { bad_simulation_cap: 2000, }; let mut rng = Xoshiro256PlusPlus::seed_from_u64(89324 as u64); - let genome = SimpleGenome::default(); + let genome = SimpleGenome::<64>::default(); let outbreaks = binned_outbreaks(genome, &dm, mutation_rate, &sim_cfg, &mut rng).unwrap(); assert_eq!( diff --git a/src/simulate/mod.rs b/src/simulate/mod.rs index eba3748..eef3ab9 100644 --- a/src/simulate/mod.rs +++ b/src/simulate/mod.rs @@ -140,7 +140,7 @@ mod tests { infectiousness: vec![0.34, 0.33, 0.33], }; let mutation_rate = 2e-4 / 365.; - let genome = SimpleGenome::default(); + let genome = SimpleGenome::<64>::default(); let outbreak = simulate_outbreak(genome, &dm, mutation_rate, 100, &mut rng); assert!(outbreak.is_ok()); diff --git a/src/simulate/outbreak.rs b/src/simulate/outbreak.rs index a014008..eba0a07 100644 --- a/src/simulate/outbreak.rs +++ b/src/simulate/outbreak.rs @@ -3,6 +3,8 @@ use crate::genome::Genome; use crate::{Count, Time}; use std::io; +const FASTA_N_COLS: usize = 70; + /// A simulated outbreak containing a number of cases. #[derive(Debug)] pub struct Outbreak { @@ -49,20 +51,29 @@ impl Outbreak { pub fn write_fasta(&self, mut writer: W) -> io::Result<()> { let sources = self.outbreaks(); + let mut sequence = Vec::::new(); for (i, genome) in self.genome.iter().enumerate() { writeln!( writer, ">case{:06} day_infected={} day_reported={} outbreak={} parent={}", i, self.history[i].infected, - self.history[i].infected, + self.history[i] + .reported + .map(|x| x.to_string()) + .unwrap_or("".to_string()), sources[i], self.source[i] .map(|x| format!("case{:06}", x)) .unwrap_or_else(String::new) )?; - genome.write_nucleotides(&mut writer)?; - writeln!(writer)?; + + genome.write_nucleotides(&mut sequence)?; + for chunk in sequence.chunks(FASTA_N_COLS) { + writer.write_all(chunk)?; + writeln!(writer)?; + } + sequence.clear(); } Ok(()) }