Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/bulk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ fn perform_inference_and_write_output(
// if the user requested bootstrap replicates,
// compute and write those out now.
if args.num_bootstraps > 0 {
let breps = em::bootstrap(&emi, args.num_bootstraps, args.threads);
let breps = em::bootstrap(&emi, args.num_bootstraps, args.threads, args.seed);

let mut new_arrays = vec![];
let mut bs_fields = vec![];
Expand Down
26 changes: 21 additions & 5 deletions src/em.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ use crate::util::oarfish_types::{AlnInfo, EMInfo, TranscriptInfo};
use atomic_float::AtomicF64;
use itertools::izip;
use num_format::{Locale, ToFormattedString};
use rand::rng as trng;
use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator};
use tracing::{info, span, trace};

Expand Down Expand Up @@ -270,8 +269,20 @@ pub fn em(em_info: &EMInfo, _nthreads: usize) -> Vec<f64> {
do_em(em_info, make_iter, true)
}

pub fn do_bootstrap(em_info: &EMInfo) -> Vec<f64> {
let mut rng = trng();
pub fn do_bootstrap(em_info: &EMInfo, seed: Option<u64>, rep_idx: u64) -> Vec<f64> {
use rand::SeedableRng;
use rand::rngs::StdRng;

let mut rng: StdRng = match seed {
Some(s) => {
// Derive a deterministic per-replicate seed so that parallel execution
// yields the same results regardless of scheduling.
let derived = s.wrapping_add(0x9E37_79B9_7F4A_7C15u64.wrapping_mul(rep_idx + 1));
StdRng::seed_from_u64(derived)
}
None => StdRng::from_os_rng(),
};

let n = em_info.eq_map.len();
let inds = bootstrap::get_sample_inds(n, &mut rng);

Expand All @@ -289,7 +300,12 @@ pub fn do_bootstrap(em_info: &EMInfo) -> Vec<f64> {
do_em(em_info, make_iter, false)
}

pub fn bootstrap(em_info: &EMInfo, num_boot: u32, nthreads: usize) -> Vec<Vec<f64>> {
pub fn bootstrap(
em_info: &EMInfo,
num_boot: u32,
nthreads: usize,
seed: Option<u64>,
) -> Vec<Vec<f64>> {
let span = span!(tracing::Level::INFO, "bootstrap");
let _guard = span.enter();

Expand All @@ -307,7 +323,7 @@ pub fn bootstrap(em_info: &EMInfo, num_boot: u32, nthreads: usize) -> Vec<Vec<f6
let span = span!(tracing::Level::INFO, "bootstrap");
let _guard = span.enter();
info!("evaluating bootstrap replicate {}", i);
do_bootstrap(em_info)
do_bootstrap(em_info, seed, i as u64)
})
.collect()
})
Expand Down
6 changes: 6 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,12 @@ fn main() -> anyhow::Result<()> {

let mut args = Args::parse();

// If a seed is provided force single-threaded execution
if let Some(seed) = args.seed {
info!("reproducibility: using seed {seed}; forcing single-threaded execution");
args.threads = 1;
}

// change the logging filter if the user specified quiet or
// verbose.
if args.quiet {
Expand Down
5 changes: 5 additions & 0 deletions src/prog_opts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -418,4 +418,9 @@ pub struct Args {
/// use a KDE model of the observed fragment length distribution
#[arg(short, long, hide = true)]
pub use_kde: bool,

/// RNG seed to make stochastic components (e.g. bootstrapping) deterministic.
/// If provided, oarfish will also force the EM implementation to be single-threaded
#[arg(long, help_heading = "reproducibility")]
pub seed: Option<u64>,
}