Skip to content

Commit 7b1afc3

Browse files
author
Jeremiah Wala
committed
Updated support for BamStats
1 parent f50d16f commit 7b1afc3

File tree

10 files changed

+105
-31
lines changed

10 files changed

+105
-31
lines changed

src/AlignedContig.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,11 +236,18 @@ void AlignedContig::setMultiMapBreakPairs() {
236236

237237
bp.gr1 = SnowTools::GenomicRegion(it->m_align.ChrID(), it->gbreak2, it->gbreak2);
238238
bp.gr2 = SnowTools::GenomicRegion((it+1)->m_align.ChrID(), (it+1)->gbreak1, (it+1)->gbreak1);
239+
240+
//debug
241+
if (getContigName() == "c_19_15644356_15645003_48")
242+
std::cerr << "Frag 1: " << (*it) << " Frag 2: " << (*(it+1)) << std::endl;
243+
239244
//bp.gr1.strand = it->align.IsReverseStrand() ? '-' : '+';
240245
//bp.gr2.strand = (it+1)->align.IsReverseStrand() ? '+' : '-';
241-
bp.gr1.strand = !it->m_align.ReverseFlag() ? '+' : '-';
246+
247+
bp.gr1.strand = it->m_align.ReverseFlag() ? '-' : '+';
242248
bp.gr2.strand = (it+1)->m_align.ReverseFlag() ? '+' : '-';
243249

250+
244251
bp.cpos1 = it->break2; // take the right-most breakpoint as the first
245252
bp.cpos2 = (it+1)->break1; // take the left-most of the next one
246253

src/BamStats.cpp

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
namespace SnowTools {
77

88
BamReadGroup::BamReadGroup(const std::string& name) : reads(0), supp(0), unmap(0), qcfail(0),
9-
duplicate(0), m_name(name)
9+
duplicate(0), mate_unmap(0), m_name(name)
1010
{
1111

1212
mapq = Histogram(0,100,1);
@@ -18,8 +18,44 @@ BamReadGroup::BamReadGroup(const std::string& name) : reads(0), supp(0), unmap(0
1818

1919
}
2020

21+
std::ostream& operator<<(std::ostream& out, const BamStats& qc) {
22+
out << "ReadGroup\tReadCount\tSupplementary\tUnmapped\tMateUnmapped\tQCFailed\tDuplicate\tMappingQuality\tNM\tInsertSize\tClippedBases\tMeanPhredScore\tReadLength" << std::endl;
23+
for (auto& i : qc.m_group_map)
24+
out << i.second << std::endl;
25+
}
26+
27+
std::ostream& operator<<(std::ostream& out, const BamReadGroup& qc) {
28+
std::string sep = "\t";
29+
out << qc.m_name << sep << qc.reads << sep <<
30+
qc.supp << sep <<
31+
qc.unmap << sep <<
32+
qc.mate_unmap << sep <<
33+
qc.qcfail << sep <<
34+
qc.duplicate << sep <<
35+
qc.mapq.toFileString() << sep <<
36+
qc.nm.toFileString() << sep <<
37+
qc.isize.toFileString() << sep <<
38+
qc.clip.toFileString() << sep <<
39+
qc.phred.toFileString() << sep <<
40+
qc.len.toFileString();
41+
return out;
42+
}
43+
2144
void BamReadGroup::addRead(BamRead &r)
2245
{
46+
47+
++reads;
48+
if (r.SecondaryFlag())
49+
++supp;
50+
if (r.QCFailFlag())
51+
++qcfail;
52+
if (r.DuplicateFlag())
53+
++duplicate;
54+
if (!r.MappedFlag())
55+
++unmap;
56+
if (!r.MateMappedFlag())
57+
++mate_unmap;
58+
2359
int mapqr = r.MapQuality();
2460
if (mapqr >=0 && mapqr <= 100)
2561
mapq.addElem(mapqr);

src/BamWalker.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,10 @@ void BamWalker::WriteAlignment(BamRead &r)
291291
for (auto& i : m_tag_list)
292292
r.RemoveTag(i.c_str());
293293

294-
sam_write1(fop, br.get(), r.raw());
294+
if (!fop)
295+
std::cerr << "BamWalker ERROR in writeAlignment. Did you forget to open the Bam for writing (OpenWriteBam)? Skipping write" << std::endl;
296+
else
297+
sam_write1(fop, br.get(), r.raw());
295298
}
296299

297300
std::ostream& SnowTools::operator<<(std::ostream& out, const BamWalker& b)
@@ -307,6 +310,9 @@ std::ostream& SnowTools::operator<<(std::ostream& out, const BamWalker& b)
307310
out << "CRAM" << std::endl;
308311
else if (b.fop->format.format == text_format)
309312
out << "SAM" << std::endl;
313+
else if (b.fop == 0)
314+
out << "NONE" << std::endl;
315+
310316

311317
out << b.m_mr << std::endl;
312318
if (b.m_region.size()) {

src/BreakPoint.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -251,8 +251,8 @@ namespace SnowTools {
251251
}
252252

253253
// TODO convert chr to string with treader
254-
ss << gr1.chr+1 << sep << gr1.pos1 << sep << (gr1.strand ? '+' : '-') << sep
255-
<< gr2.chr+1 << sep << gr2.pos1 << sep << (gr2.strand ? '+' : '-') << sep
254+
ss << gr1.chr+1 << sep << gr1.pos1 << sep << gr1.strand << sep
255+
<< gr2.chr+1 << sep << gr2.pos1 << sep << gr2.strand << sep
256256
<< getSpan() << sep
257257
<< mapq1 << sep << mapq2 << sep
258258
<< nsplit << sep << tsplit << sep
@@ -279,9 +279,9 @@ namespace SnowTools {
279279
num_align = 0;
280280
dc = tdc;
281281

282-
gr1.pos1 = (tdc.m_reg1.strand) ? tdc.m_reg1.pos2 : tdc.m_reg1.pos1;
282+
gr1.pos1 = (tdc.m_reg1.strand == '+') ? tdc.m_reg1.pos2 : tdc.m_reg1.pos1;
283283
gr1.pos2 = gr1.pos1;
284-
gr2.pos1 = (tdc.m_reg2.strand) ? tdc.m_reg2.pos2 : tdc.m_reg2.pos1;
284+
gr2.pos1 = (tdc.m_reg2.strand == '+') ? tdc.m_reg2.pos2 : tdc.m_reg2.pos1;
285285
gr2.pos2 = gr2.pos1;
286286
gr1.chr = tdc.m_reg1.chr;
287287
gr2.chr = tdc.m_reg2.chr;
@@ -454,10 +454,10 @@ namespace SnowTools {
454454
switch(++count) {
455455
case 1: gr1.chr = stoi(val) - 1; break;
456456
case 2: gr1.pos1 = stoi(val); gr1.pos2 = gr1.pos1; break;
457-
case 3: gr1.strand = val.at(0)=='+'; break;
457+
case 3: gr1.strand = val.at(0); break;
458458
case 4: gr2.chr = stoi(val) - 1; break;
459459
case 5: gr2.pos1 = stoi(val); gr2.pos2 = gr2.pos1; break;
460-
case 6: gr2.strand = val.at(0)=='+'; break;
460+
case 6: gr2.strand = val.at(0); break;
461461
//case 7: span = stoi(val); break;
462462
case 8: mapq1 = stoi(val); break;
463463
case 9: mapq2 = stoi(val); break;
@@ -757,7 +757,6 @@ namespace SnowTools {
757757
GenomicRegion bp2 = gr2;
758758
bp1.pad(PAD);
759759
bp2.pad(PAD);
760-
761760

762761
for (auto& d : dmap)
763762
{
@@ -770,9 +769,10 @@ namespace SnowTools {
770769

771770
bool pass = bp1reg1 && bp2reg2;
772771

773-
//debug
774-
if (cname=="c_1_6524299_6527299_3")
775-
std::cerr << " HERE " << d.first << " " << d.second << " pass " << pass << std::endl;
772+
if (cname=="c_19_15644356_15645003_48")
773+
std::cerr << " HERE " << d.first << " " << d.second << " pass " << pass << std::endl <<
774+
"BREAKPOINT " << (*this) << " d.second.m_reg1 " << d.second.m_reg1 << " d.second.m_reg2 " << d.second.m_reg2 <<
775+
" bp1 " << bp1 << " bp2 " << bp2 << std::endl;
776776

777777
if (pass)
778778
{

src/DiscordantCluster.cpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -180,20 +180,6 @@ namespace SnowTools {
180180
return mean;
181181
}
182182

183-
double DiscordantCluster::getMeanMapq() const
184-
{
185-
double mean = 0;
186-
std::vector<int> tmapq;
187-
for (auto& i : mates)
188-
tmapq.push_back(i.second.MapQuality());
189-
for (auto& i : reads)
190-
tmapq.push_back(i.second.MapQuality());
191-
192-
if (tmapq.size() > 0)
193-
mean = accumulate(tmapq.begin(), tmapq.end(), 0.0) / tmapq.size();
194-
return mean;
195-
}
196-
197183
std::string DiscordantCluster::toRegionString() const
198184
{
199185
int pos1 = (m_reg1.strand == '+') ? m_reg1.pos2 : m_reg1.pos1;

src/Histogram.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,11 @@ v * 7. MISCELLANEOUS
4848
*/
4949

5050
#include "SnowTools/Histogram.h"
51+
#include "SnowTools/SnowUtils.h"
5152
#include <fstream>
5253
#include <cmath>
5354
#include <algorithm>
55+
#include <sstream>
5456

5557
#define BINARY_SEARCH 1
5658

@@ -107,6 +109,15 @@ void Histogram::addElem(const int32_t& elem) {
107109
++m_bins[retrieveBinID(elem)];
108110
}
109111

112+
std::string Histogram::toFileString() const {
113+
std::stringstream ss;
114+
for (auto& i : m_bins)
115+
if (i.m_count)
116+
ss << i.bounds.first << "_" << i.bounds.second << "_" << i.m_count << ",";
117+
return(cutLastChar(ss.str())); // trim off last comma
118+
119+
}
120+
110121
size_t Histogram::retrieveBinID(const int32_t& elem) const {
111122

112123
if (elem < m_bins[0].bounds.first)

src/SnowTools/AlignedContig.h

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include "SnowTools/BreakPoint.h"
88
#include "SnowTools/DiscordantCluster.h"
99
#include "SnowTools/BWAWrapper.h"
10+
#include "SnowTools/BamWalker.h"
1011

1112
namespace SnowTools {
1213

@@ -55,8 +56,15 @@ namespace SnowTools {
5556
*/
5657
bool checkLocal(const GenomicRegion& window);
5758

58-
const BPVec& getIndelBreaks() const { return m_indel_breaks; }
59+
const BPVec& getIndelBreaks() const { return m_indel_breaks; }
5960

61+
/*! Write the alignment record to a BAM file
62+
*/
63+
void writeToBAM(BamWalker& bw) {
64+
bw.WriteAlignment(m_align);
65+
}
66+
67+
6068
private:
6169

6270
BPVec m_indel_breaks; /**< indel variants on this alignment */
@@ -210,6 +218,25 @@ namespace SnowTools {
210218
*/
211219
bool hasVariant() const;
212220

221+
/*! Write all of the alignment records to a BAM file
222+
* @param bw BamWalker opened with OpenWriteBam
223+
*/
224+
void writeToBAM(BamWalker& bw) {
225+
for (auto& i : m_frag_v) {
226+
std::cerr << "write to bam " << i << std::endl;
227+
i.writeToBAM(bw);
228+
}
229+
}
230+
231+
/*! Write all of the sequencing reads as aligned to contig to a BAM file
232+
* @param bw BamWalker opened with OpenWriteBam
233+
*/
234+
void writeAlignedReadsToBAM(BamWalker& bw) {
235+
for (auto& i : m_bamreads)
236+
bw.WriteAlignment(i);
237+
}
238+
239+
213240
bool hasLocal() const { for (auto& i : m_frag_v) if (i.local) return true; return false; }
214241

215242
/*! @function retrieves all of the breakpoints by combining indels with global mutli-map break

src/SnowTools/BamStats.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class BamReadGroup {
4343
size_t unmap;
4444
size_t qcfail;
4545
size_t duplicate;
46+
size_t mate_unmap;
4647

4748
Histogram mapq;
4849
Histogram nm;

src/SnowTools/DiscordantCluster.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,7 @@ namespace SnowTools
4444
void addMateReads(const BamReadVector& bav);
4545

4646
// return the mean mapping quality for this cluster
47-
double getMeanMapq(bool mate) const;
48-
49-
double getMeanMapq() const;
47+
double getMeanMapq(bool mate = false) const;
5048

5149
/** Return the discordant cluster as a string with just coordinates */
5250
std::string toRegionString() const;

src/SnowTools/Histogram.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ class Histogram {
104104
*/
105105
Histogram(const int32_t& start, const int32_t& end, const uint32_t& width);
106106

107+
std::string toFileString() const;
108+
107109
friend std::ostream& operator<<(std::ostream &out, const Histogram &h) {
108110
for (auto& i : h.m_bins)
109111
out << i << std::endl;

0 commit comments

Comments
 (0)