From 5ee7304afe83bc077fe7db4c314cd625a0ba956e Mon Sep 17 00:00:00 2001 From: Glenn Hickey Date: Thu, 11 May 2023 11:07:08 -0400 Subject: [PATCH] adapt rgfa tests to latest conventions --- src/gfa.cpp | 18 +++++------------- src/gfa.hpp | 5 ++--- test/t/11_vg_paths.t | 28 ++++++++++++++-------------- 3 files changed, 21 insertions(+), 30 deletions(-) diff --git a/src/gfa.cpp b/src/gfa.cpp index 2848e202a6b..45cf99ca1be 100644 --- a/src/gfa.cpp +++ b/src/gfa.cpp @@ -20,8 +20,8 @@ static bool should_write_as_w_line(const PathHandleGraph* graph, path_handle_t p static void write_w_line(const PathHandleGraph* graph, ostream& out, path_handle_t path_handle, unordered_map, size_t>& last_phase_block_end); void graph_to_gfa(const PathHandleGraph* graph, ostream& out, const set& rgfa_paths, - bool rgfa_pline, bool use_w_lines, const string& rgfa_sample_name) { - + bool rgfa_pline, bool use_w_lines) { + // TODO: Support sorting nodes, paths, and/or edges for canonical output // TODO: Use a NamedNodeBackTranslation (or forward translation?) to properly round-trip GFA that has had to be chopped. @@ -118,7 +118,6 @@ void graph_to_gfa(const PathHandleGraph* graph, ostream& out, const set& vector w_line_paths; - bool warned_about_tags_as_paths = false; // Paths as P-lines for (const path_handle_t& h : path_handles) { auto path_name = graph->get_path_name(h); @@ -126,9 +125,6 @@ void graph_to_gfa(const PathHandleGraph* graph, ostream& out, const set& if (!rgfa_paths.empty()) { // the path was put into tags, no reason to deal with it here continue; - } else if (!warned_about_tags_as_paths) { - cerr << "warning [gfa]: outputing rGFA cover (rank>=1) path(s) as a P-line(s) and not tags because no reference (rank==0) selected" << endl; - warned_about_tags_as_paths = true; } } if (rgfa_pline || !rgfa_paths.count(path_name)) { @@ -171,9 +167,6 @@ void graph_to_gfa(const PathHandleGraph* graph, ostream& out, const set& if (!rgfa_paths.empty()) { // the path was put into tags, no reason to deal with it here continue; - } else if (!warned_about_tags_as_paths) { - cerr << "warning [gfa]: outputing rGFA cover (rank>=1) path(s) as a W-line(s) and not tags because no reference (rank==0) selected" << endl; - warned_about_tags_as_paths = true; } } write_w_line(graph, out, h, last_phase_block_end); @@ -282,7 +275,7 @@ void write_w_line(const PathHandleGraph* graph, ostream& out, path_handle_t path out << "\n"; } -int get_rgfa_rank(const string& path_name, const string& rgfa_sample) { +int get_rgfa_rank(const string& path_name) { int rank = -1; PathSense path_sense; @@ -296,7 +289,6 @@ int get_rgfa_rank(const string& path_name, const string& rgfa_sample) { size_t pos = path_locus.rfind(":SR:i:"); if (pos != string::npos && path_locus.length() - pos >= 6) { - assert(path_sample == rgfa_sample); pos += 6; size_t pos2 = path_locus.find(":", pos); size_t len = pos2 == string::npos ? pos2 : pos2 - pos; @@ -399,10 +391,10 @@ void rgfa_graph_cover(MutablePathMutableHandleGraph* graph, size_t thread_count = get_thread_count(); vector>>> thread_covers(thread_count); - // we process top-level snarl_manager in parallel + // we process top-level snarls in parallel snarl_manager->for_each_top_level_snarl_parallel([&](const Snarl* snarl) { // per-thread output - // each fragment is a rank and vector of steps, the cove is a list of fragments + // each fragment is a rank and vector of steps, the cover is a list of fragments // TODO: we can store just a first step and count instead of every fragment vector>>& cover_fragments = thread_covers.at(omp_get_thread_num()); // we also index the fragments by their node ids for fast lookups of what's covered by what diff --git a/src/gfa.hpp b/src/gfa.hpp index 3603224cf1a..e66d91aeb72 100644 --- a/src/gfa.hpp +++ b/src/gfa.hpp @@ -26,8 +26,7 @@ using namespace std; void graph_to_gfa(const PathHandleGraph* graph, ostream& out, const set& rgfa_paths = {}, bool rgfa_pline = false, - bool use_w_lines = true, - const string& rgfa_sample_name = ""); + bool use_w_lines = true); /// Prototype code to tag paths as rGFA paths. Either needs to be completely scrapped @@ -48,7 +47,7 @@ void graph_to_gfa(const PathHandleGraph* graph, ostream& out, /// Returns the RGFA rank (SR) of a path. This will be 0 for the reference /// backbone, and higher the further number of (nested) bubbles away it is. /// If the path is not an RGFA path, then return -1 -int get_rgfa_rank(const string& path_name, const string& rgfa_sample="_rGFA_"); +int get_rgfa_rank(const string& path_name); /// Add the rgfa rank to a pathname, also setting its sample to the special rgfa sample and /// moving its old sample into the locus field diff --git a/test/t/11_vg_paths.t b/test/t/11_vg_paths.t index cd28a8fefa5..29d92ba36e4 100644 --- a/test/t/11_vg_paths.t +++ b/test/t/11_vg_paths.t @@ -59,46 +59,46 @@ is $? 0 "vg path coverage reports correct lengths in first column" rm -f q.vg q.cov.len q.len -vg paths -v rgfa/rgfa_tiny.gfa -R 1 -Q x | vg view - > rgfa_tiny.rgfa -printf "P y[33-34]:SR:i:1 10+ * -P y[38-39]:SR:i:1 13+ * -P y[8-10]:SR:i:1 2+,4+ *\n" > rgfa_tiny_expected_fragments.rgfa +vg paths -v rgfa/rgfa_tiny.gfa -R 1 -Q x -N c | vg convert - -fW > rgfa_tiny.rgfa +printf "P c#0#SN:Z:y:SR:i:1[33-34] 10+ * +P c#0#SN:Z:y:SR:i:1[38-39] 13+ * +P c#0#SN:Z:y:SR:i:1[8-10] 2+,4+ *\n" > rgfa_tiny_expected_fragments.rgfa grep ^P rgfa_tiny.rgfa | grep SR | sort > rgfa_tiny_fragments.rgfa diff rgfa_tiny_fragments.rgfa rgfa_tiny_expected_fragments.rgfa is $? 0 "Found the expected rgfa SNP cover of tiny graph" rm -f rgfa_tiny.rgfa rgfa_tiny_expected_fragments.rgfa rgfa_tiny_fragments.rgfa -vg paths -v rgfa/rgfa_ins.gfa -R 5 -Q x | vg view - > rgfa_ins.rgfa -printf "P z[8-17]:SR:i:1 2+,3+,4+ *\n" > rgfa_ins_expected_fragments.rgfa +vg paths -v rgfa/rgfa_ins.gfa -R 5 -Q x -N c | vg convert - -fW > rgfa_ins.rgfa +printf "P c#0#SN:Z:z:SR:i:1[8-17] 2+,3+,4+ *\n" > rgfa_ins_expected_fragments.rgfa grep ^P rgfa_ins.rgfa | grep SR | sort > rgfa_ins_fragments.rgfa diff rgfa_ins_fragments.rgfa rgfa_ins_expected_fragments.rgfa is $? 0 "Found the expected rgfa cover for simple nested insertion" rm -f rgfa_ins.rgfa rgfa_ins_expected_fragments.rgfa rgfa_ins_fragments.rgfa -vg paths -v rgfa/rgfa_ins2.gfa -R 3 -Q x | vg view - > rgfa_ins2.rgfa -printf "P y[8-24]:SR:i:1 2+,6+,4+ * -P z[11-14]:SR:i:2 3+ *\n" > rgfa_ins2_expected_fragments.rgfa +vg paths -v rgfa/rgfa_ins2.gfa -R 3 -Q x | vg convert - -fW > rgfa_ins2.rgfa +printf "P _rGFA_#0#SN:Z:y:SR:i:1[8-24] 2+,6+,4+ * +P _rGFA_#0#SN:Z:z:SR:i:2[11-14] 3+ *\n" > rgfa_ins2_expected_fragments.rgfa grep ^P rgfa_ins2.rgfa | grep SR | sort > rgfa_ins2_fragments.rgfa diff rgfa_ins2_fragments.rgfa rgfa_ins2_expected_fragments.rgfa is $? 0 "Found the expected rgfa cover for simple nested insertion that requires two fragments" rm -f rgfa_ins2.rgfa rgfa_ins2_expected_fragments.rgfa rgfa_ins2_fragments.rgfa -vg paths -v rgfa/rgfa_ins2.gfa -R 5 -Q x | vg view - > rgfa_ins2R5.rgfa -printf "P y[8-24]:SR:i:1 2+,6+,4+ *\n" > rgfa_ins2R5_expected_fragments.rgfa +vg paths -v rgfa/rgfa_ins2.gfa -R 5 -Q x -N c | vg convert - -fW > rgfa_ins2R5.rgfa +printf "P c#0#SN:Z:y:SR:i:1[8-24] 2+,6+,4+ *\n" > rgfa_ins2R5_expected_fragments.rgfa grep ^P rgfa_ins2R5.rgfa | grep SR | sort > rgfa_ins2R5_fragments.rgfa diff rgfa_ins2R5_fragments.rgfa rgfa_ins2R5_expected_fragments.rgfa is $? 0 "rgfa Minimum fragment length filters out small fragment" rm -f rgfa_ins2R5.rgfa rgfa_ins2R5_expected_fragments.rgfa rgfa_ins2R5_fragments.rgfa -vg paths -v rgfa/rgfa_ins3.gfa -R 3 -Q x | vg view - > rgfa_ins3.rgfa +vg paths -v rgfa/rgfa_ins3.gfa -R 3 -Q x -N c | vg convert - -fW > rgfa_ins3.rgfa printf "P x 1+,5+ * -P y[3-19]:SR:i:1 4+,6+,2+ * +P c#0#SN:Z:y:SR:i:1[3-19] 4+,6+,2+ * P y 5-,4+,6+,2+,1- * -P z[11-14]:SR:i:2 3+ * +P c#0#SN:Z:z:SR:i:2[11-14] 3+ * P z 1+,2-,3+,4-,5+ *\n" | sort > rgfa_ins3_expected_fragments.rgfa grep ^P rgfa_ins3.rgfa | sort > rgfa_ins3_fragments.rgfa diff rgfa_ins3_fragments.rgfa rgfa_ins3_expected_fragments.rgfa