Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions packages/pangraph/src/pangraph/edits.rs
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,26 @@ impl Edit {
self.subs.is_empty() && self.dels.is_empty() && self.inss.is_empty()
}

/// Returns true if this edit contains any insertions or deletions (indels)
pub fn has_indels(&self) -> bool {
self.has_dels() || self.has_inss()
}

/// Returns true if this edit contains any deletions
pub fn has_dels(&self) -> bool {
!self.dels.is_empty()
}

/// Returns true if this edit contains any insertions
pub fn has_inss(&self) -> bool {
!self.inss.is_empty()
}

/// Returns true if this edit contains any substitutions
pub fn has_subs(&self) -> bool {
!self.subs.is_empty()
}

/// Construct edit which consists of a deletion of length `len`
pub fn deleted(len: usize) -> Self {
Self {
Expand Down Expand Up @@ -1045,4 +1065,72 @@ mod tests {
assert_eq!(mean_shift, 3);
assert_eq!(bandwidth, Some(4));
}

#[test]
fn test_has_indels() {
// Edit with no indels (only substitutions)
let edit_no_indels = Edit::new(vec![], vec![], vec![Sub::new(1, 'A')]);
assert!(!edit_no_indels.has_indels());

// Edit with deletions
let edit_with_dels = Edit::new(vec![], vec![Del::new(5, 2)], vec![]);
assert!(edit_with_dels.has_indels());

// Edit with insertions
let edit_with_inss = Edit::new(vec![Ins::new(10, "ATG")], vec![], vec![]);
assert!(edit_with_inss.has_indels());

// Edit with both insertions and deletions
let edit_with_both = Edit::new(vec![Ins::new(10, "ATG")], vec![Del::new(5, 2)], vec![Sub::new(1, 'A')]);
assert!(edit_with_both.has_indels());

// Empty edit
let edit_empty = Edit::empty();
assert!(!edit_empty.has_indels());
}

#[test]
fn test_has_dels() {
// Edit with no deletions
let edit_no_dels = Edit::new(vec![Ins::new(10, "ATG")], vec![], vec![Sub::new(1, 'A')]);
assert!(!edit_no_dels.has_dels());

// Edit with deletions
let edit_with_dels = Edit::new(vec![], vec![Del::new(5, 2)], vec![]);
assert!(edit_with_dels.has_dels());

// Empty edit
let edit_empty = Edit::empty();
assert!(!edit_empty.has_dels());
}

#[test]
fn test_has_inss() {
// Edit with no insertions
let edit_no_inss = Edit::new(vec![], vec![Del::new(5, 2)], vec![Sub::new(1, 'A')]);
assert!(!edit_no_inss.has_inss());

// Edit with insertions
let edit_with_inss = Edit::new(vec![Ins::new(10, "ATG")], vec![], vec![]);
assert!(edit_with_inss.has_inss());

// Empty edit
let edit_empty = Edit::empty();
assert!(!edit_empty.has_inss());
}

#[test]
fn test_has_subs() {
// Edit with no substitutions
let edit_no_subs = Edit::new(vec![Ins::new(10, "ATG")], vec![Del::new(5, 2)], vec![]);
assert!(!edit_no_subs.has_subs());

// Edit with substitutions
let edit_with_subs = Edit::new(vec![], vec![], vec![Sub::new(1, 'A')]);
assert!(edit_with_subs.has_subs());

// Empty edit
let edit_empty = Edit::empty();
assert!(!edit_empty.has_subs());
}
}
2 changes: 1 addition & 1 deletion packages/pangraph/src/pangraph/graph_merging.rs
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ pub fn self_merge(graph: Pangraph, args: &PangraphBuildArgs) -> Result<(Pangraph

// update consensus and alignment of merged blocks.
let merge_block_ids = new_blocks_dict.keys().copied().collect_vec();
reconsensus_graph(&mut graph, merge_block_ids, args).wrap_err("During reconsensus")?;
reconsensus_graph(&mut graph, &merge_block_ids, args).wrap_err("During reconsensus")?;

Ok((graph, true))
}
Expand Down
70 changes: 69 additions & 1 deletion packages/pangraph/src/pangraph/pangraph_block.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
use crate::io::fasta::FastaRecord;
use crate::io::json::{JsonPretty, json_write_str};
use crate::io::seq::reverse_complement;
use crate::pangraph::edits::Edit;
use crate::pangraph::edits::{Del, Edit, Ins, Sub};
use crate::pangraph::pangraph::Pangraph;
use crate::pangraph::pangraph_node::NodeId;
use crate::pangraph::pangraph_path::PathId;
use crate::representation::seq::Seq;
use crate::representation::seq_char::AsciiChar;
use crate::utils::collections::has_duplicates;
use crate::utils::interval::positions_to_intervals;
use derive_more::{Display, From};
use eyre::{Report, WrapErr};
use getset::{CopyGetters, Getters};
use itertools::Itertools;
use maplit::btreemap;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -195,6 +197,72 @@ impl PangraphBlock {
})
})
}

/// Finds all majority edits (substitutions, deletions, insertions) in this block
pub fn find_majority_edits(&self) -> Edit {
Edit::new(
self.find_majority_insertions(),
self.find_majority_deletions(),
self.find_majority_substitutions(),
)
}

/// Helper method to check if a count represents a majority
fn is_majority(&self, count: usize) -> bool {
count > self.depth() / 2
}

/// Finds majority substitutions in this block
pub fn find_majority_substitutions(&self) -> Vec<Sub> {
let mut substitutions: Vec<_> = self
.alignments()
.values()
.flat_map(|edit| &edit.subs)
.map(|sub| (sub.pos, sub.alt))
.into_group_map()
.into_iter()
.filter_map(|(pos, alts)| {
let (alt, count) = alts.into_iter().counts().into_iter().max_by_key(|(_, count)| *count)?;
self.is_majority(count).then_some(Sub::new(pos, alt))
})
.collect();

substitutions.sort_by_key(|sub| sub.pos);
substitutions
}

/// Finds majority deletions in this block
pub fn find_majority_deletions(&self) -> Vec<Del> {
let majority_positions: Vec<usize> = self
.alignments()
.values()
.flat_map(|edit| edit.dels.iter().flat_map(|del| del.range()))
.counts()
.into_iter()
.filter_map(|(pos, count)| self.is_majority(count).then_some(pos))
.collect();

positions_to_intervals(&majority_positions)
.into_iter()
.map(|interval| Del::new(interval.start, interval.len()))
.collect()
}

/// Finds majority insertions in this block
pub fn find_majority_insertions(&self) -> Vec<Ins> {
let mut insertions: Vec<_> = self
.alignments()
.values()
.flat_map(|edit| &edit.inss)
.map(|insertion| (insertion.pos, insertion.seq.clone()))
.counts()
.into_iter()
.filter_map(|((pos, seq), count)| self.is_majority(count).then_some(Ins::new(pos, seq)))
.collect();

insertions.sort_by_key(|ins| ins.pos);
insertions
}
}

#[derive(Copy, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
Expand Down
Loading
Loading