Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
35d0e97
refactor: reorganize reconsensus
ivan-aksamentov Jul 25, 2025
7c6a0a3
refactor: return struct instead of tuple
ivan-aksamentov Jul 25, 2025
1893ba3
perf: add inline hints to small utility methods
ivan-aksamentov Jul 25, 2025
bdd3f52
refactor: decompose update_alignment_for_mutation into smaller functions
ivan-aksamentov Jul 25, 2025
9ec5e92
refactor: move update_alignment_for_mutation to Edit method
ivan-aksamentov Jul 25, 2025
ab428de
refactor: extract substitutions_at_position method
ivan-aksamentov Jul 25, 2025
a52114f
refactor: rename update_alignment_for_mutation
ivan-aksamentov Jul 25, 2025
c333bc4
perf: avoid unnecessary vector allocation in substitution reconciliation
ivan-aksamentov Jul 25, 2025
6b4144f
refactor: rename substitutions_at_position to subs_at_position
ivan-aksamentov Jul 25, 2025
e0616a4
refactor: simplify var names
ivan-aksamentov Jul 25, 2025
0ce37bd
feat: add extra error context
ivan-aksamentov Jul 25, 2025
dd970fc
fix: handle .unwrap()
ivan-aksamentov Jul 25, 2025
aa1e4b1
test: add unit tests for majority edits in PangraphBlock
mmolari Jul 31, 2025
76f834b
refactor: update BlockAnalysis to avoid recomputing edits
mmolari Aug 1, 2025
b610bad
feat: add block method to change consensus nucleotide and update alig…
mmolari Aug 1, 2025
0f8496d
test: add unit tests for is_position_deleted
mmolari Aug 2, 2025
dd9abae
fix: correct apply_full_reconsensus function logic and change names
mmolari Aug 2, 2025
c96826b
feat: move consensus update to block method edit_consensus_and_realign
mmolari Aug 3, 2025
267f5b7
docs: improve reconsensus.rs inline comments
mmolari Aug 3, 2025
493a7fe
docs: inline comments in pangraph_block.rs
mmolari Aug 3, 2025
9473045
test: add unit test for reverse_complement method in PangraphBlock
mmolari Aug 3, 2025
8064456
feat: sort reverse complement results for subs, dels, and inss in Edi…
mmolari Aug 3, 2025
86ad1bc
test: add unit tests for `reconcile_substitution_with_consensus`
mmolari Aug 3, 2025
ca15a1d
test: add unit test for `edit_consensus_and_realign`
mmolari Aug 3, 2025
fbe9c0d
test: reconsensus unit tests update
mmolari Aug 3, 2025
1087faa
test: reconsensus unit test update
mmolari Aug 3, 2025
0b170e2
refactor: update change_consensus_nucleotide_at_pos to accept Sub
mmolari Aug 4, 2025
dcf6c32
Merge pull request #165 from neherlab/review/reconsensus-updated
ivan-aksamentov Aug 4, 2025
b09c759
docs: update changelog
mmolari Aug 4, 2025
243b226
refactor: reconcile substitution method
mmolari Aug 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 156 additions & 0 deletions packages/pangraph/src/pangraph/edits.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::io::seq::{complement, reverse_complement};
use crate::make_error;
use crate::representation::seq::Seq;
use crate::representation::seq_char::AsciiChar;
use crate::utils::interval::Interval;
Expand Down Expand Up @@ -127,6 +128,93 @@ impl Edit {
self.subs.is_empty() && self.dels.is_empty() && self.inss.is_empty()
}

/// Returns true if this edit contains any insertions or deletions (indels)
#[inline]
pub fn has_indels(&self) -> bool {
self.has_dels() || self.has_inss()
}

/// Returns true if this edit contains any deletions
#[inline]
pub fn has_dels(&self) -> bool {
!self.dels.is_empty()
}

/// Returns true if this edit contains any insertions
#[inline]
pub fn has_inss(&self) -> bool {
!self.inss.is_empty()
}

/// Returns true if this edit contains any substitutions
#[inline]
pub fn has_subs(&self) -> bool {
!self.subs.is_empty()
}

/// Checks if a position is deleted in this edit
#[inline]
pub fn is_position_deleted(&self, pos: usize) -> bool {
self.dels.iter().any(|d| d.contains(pos))
}

/// Adds reversion mutation when no substitutions exist at a position
pub fn add_reversion_if_not_deleted(&mut self, pos: usize, original: AsciiChar) {
if !self.is_position_deleted(pos) {
self.subs.push(Sub::new(pos, original));
self.subs.sort_by_key(|s| s.pos);
}
}

/// Removes substitution if it matches the new consensus character
pub fn remove_matching_substitution(&mut self, substitution: &Sub) {
if let Some(existing_sub) = self.subs.iter().find(|s| s.pos == substitution.pos) {
if existing_sub.alt == substitution.alt {
self
.subs
.retain(|sub| !(sub.pos == substitution.pos && sub.alt == substitution.alt));
}
}
}

/// Returns all substitutions at a specific position
fn subs_at_position(&self, pos: usize) -> Vec<Sub> {
self.subs.iter().filter(|s| s.pos == pos).cloned().collect()
}

/// Reconciles genome alignment when consensus changes due to a substitution.
///
/// During reconsensus, when a position in the consensus sequence is updated with a new
/// character, this method adjusts the genome's edit to maintain correct alignment.
pub fn reconcile_substitution_with_consensus(
&mut self,
substitution: &Sub,
original: AsciiChar,
) -> Result<(), Report> {
let subs_count = self.subs.iter().filter(|s| s.pos == substitution.pos).count();

match subs_count {
// If genome has no mutation at this position: adds reversion to original character
0 => self.add_reversion_if_not_deleted(substitution.pos, original),
// If genome has matching mutation: removes it (now matches new consensus)
1 => self.remove_matching_substitution(substitution),
// If genome has conflicting mutations: returns error for inconsistent state
_ => {
let subs_at_pos = self.subs_at_position(substitution.pos);
return make_error!(
"At position {}: sequence states disagree: {:}",
substitution.pos,
subs_at_pos
.iter()
.map(|sub| sub.alt.to_string())
.collect_vec()
.join(", ")
);
},
}
Ok(())
}

/// Construct edit which consists of a deletion of length `len`
pub fn deleted(len: usize) -> Self {
Self {
Expand Down Expand Up @@ -1045,4 +1133,72 @@ mod tests {
assert_eq!(mean_shift, 3);
assert_eq!(bandwidth, Some(4));
}

#[test]
fn test_has_indels() {
// Edit with no indels (only substitutions)
let edit_no_indels = Edit::new(vec![], vec![], vec![Sub::new(1, 'A')]);
assert!(!edit_no_indels.has_indels());

// Edit with deletions
let edit_with_dels = Edit::new(vec![], vec![Del::new(5, 2)], vec![]);
assert!(edit_with_dels.has_indels());

// Edit with insertions
let edit_with_inss = Edit::new(vec![Ins::new(10, "ATG")], vec![], vec![]);
assert!(edit_with_inss.has_indels());

// Edit with both insertions and deletions
let edit_with_both = Edit::new(vec![Ins::new(10, "ATG")], vec![Del::new(5, 2)], vec![Sub::new(1, 'A')]);
assert!(edit_with_both.has_indels());

// Empty edit
let edit_empty = Edit::empty();
assert!(!edit_empty.has_indels());
}

#[test]
fn test_has_dels() {
// Edit with no deletions
let edit_no_dels = Edit::new(vec![Ins::new(10, "ATG")], vec![], vec![Sub::new(1, 'A')]);
assert!(!edit_no_dels.has_dels());

// Edit with deletions
let edit_with_dels = Edit::new(vec![], vec![Del::new(5, 2)], vec![]);
assert!(edit_with_dels.has_dels());

// Empty edit
let edit_empty = Edit::empty();
assert!(!edit_empty.has_dels());
}

#[test]
fn test_has_inss() {
// Edit with no insertions
let edit_no_inss = Edit::new(vec![], vec![Del::new(5, 2)], vec![Sub::new(1, 'A')]);
assert!(!edit_no_inss.has_inss());

// Edit with insertions
let edit_with_inss = Edit::new(vec![Ins::new(10, "ATG")], vec![], vec![]);
assert!(edit_with_inss.has_inss());

// Empty edit
let edit_empty = Edit::empty();
assert!(!edit_empty.has_inss());
}

#[test]
fn test_has_subs() {
// Edit with no substitutions
let edit_no_subs = Edit::new(vec![Ins::new(10, "ATG")], vec![Del::new(5, 2)], vec![]);
assert!(!edit_no_subs.has_subs());

// Edit with substitutions
let edit_with_subs = Edit::new(vec![], vec![], vec![Sub::new(1, 'A')]);
assert!(edit_with_subs.has_subs());

// Empty edit
let edit_empty = Edit::empty();
assert!(!edit_empty.has_subs());
}
}
2 changes: 1 addition & 1 deletion packages/pangraph/src/pangraph/graph_merging.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ pub fn self_merge(graph: Pangraph, args: &PangraphBuildArgs) -> Result<(Pangraph

// update consensus and alignment of merged blocks.
let merge_block_ids = new_blocks_dict.keys().copied().collect_vec();
reconsensus_graph(&mut graph, merge_block_ids, args).wrap_err("During reconsensus")?;
reconsensus_graph(&mut graph, &merge_block_ids, args).wrap_err("During reconsensus")?;

Ok((graph, true))
}
Expand Down
71 changes: 70 additions & 1 deletion packages/pangraph/src/pangraph/pangraph_block.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
use crate::io::fasta::FastaRecord;
use crate::io::json::{JsonPretty, json_write_str};
use crate::io::seq::reverse_complement;
use crate::pangraph::edits::Edit;
use crate::pangraph::edits::{Del, Edit, Ins, Sub};
use crate::pangraph::pangraph::Pangraph;
use crate::pangraph::pangraph_node::NodeId;
use crate::pangraph::pangraph_path::PathId;
use crate::representation::seq::Seq;
use crate::representation::seq_char::AsciiChar;
use crate::utils::collections::has_duplicates;
use crate::utils::interval::positions_to_intervals;
use derive_more::{Display, From};
use eyre::{Report, WrapErr};
use getset::{CopyGetters, Getters};
use itertools::Itertools;
use maplit::btreemap;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -195,6 +197,73 @@ impl PangraphBlock {
})
})
}

/// Finds all majority edits (substitutions, deletions, insertions) in this block
pub fn find_majority_edits(&self) -> Edit {
Edit::new(
self.find_majority_insertions(),
self.find_majority_deletions(),
self.find_majority_substitutions(),
)
}

/// Helper method to check if a count represents a majority
#[inline]
pub fn is_majority(&self, count: usize) -> bool {
count > self.depth() / 2
}

/// Finds majority substitutions in this block
pub fn find_majority_substitutions(&self) -> Vec<Sub> {
let mut substitutions: Vec<_> = self
.alignments()
.values()
.flat_map(|edit| &edit.subs)
.map(|sub| (sub.pos, sub.alt))
.into_group_map()
.into_iter()
.filter_map(|(pos, alts)| {
let (alt, count) = alts.into_iter().counts().into_iter().max_by_key(|(_, count)| *count)?;
self.is_majority(count).then_some(Sub::new(pos, alt))
})
.collect();

substitutions.sort_by_key(|sub| sub.pos);
substitutions
}

/// Finds majority deletions in this block
pub fn find_majority_deletions(&self) -> Vec<Del> {
let majority_positions: Vec<usize> = self
.alignments()
.values()
.flat_map(|edit| edit.dels.iter().flat_map(|del| del.range()))
.counts()
.into_iter()
.filter_map(|(pos, count)| self.is_majority(count).then_some(pos))
.collect();

positions_to_intervals(&majority_positions)
.into_iter()
.map(|interval| Del::new(interval.start, interval.len()))
.collect()
}

/// Finds majority insertions in this block
pub fn find_majority_insertions(&self) -> Vec<Ins> {
let mut insertions: Vec<_> = self
.alignments()
.values()
.flat_map(|edit| &edit.inss)
.map(|insertion| (insertion.pos, insertion.seq.clone()))
.counts()
.into_iter()
.filter_map(|((pos, seq), count)| self.is_majority(count).then_some(Ins::new(pos, seq)))
.collect();

insertions.sort_by_key(|ins| ins.pos);
insertions
}
}

#[derive(Copy, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
Expand Down
Loading
Loading