Skip to content

Commit a548945

Browse files
smithlab_os.cpp: adding code from former QualityScore.chpp files
1 parent 01eef2e commit a548945

File tree

1 file changed

+45
-13
lines changed

1 file changed

+45
-13
lines changed

smithlab_os.cpp

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,23 @@
1717
*/
1818

1919
#include "smithlab_os.hpp"
20-
#include "QualityScore.hpp"
2120
#include "smithlab_utils.hpp"
2221

22+
#include <dirent.h>
23+
#include <sys/stat.h>
24+
#include <unistd.h>
25+
2326
#include <algorithm>
2427
#include <cassert>
2528
#include <cerrno>
29+
#include <cmath>
2630
#include <cstring>
2731
#include <filesystem>
2832
#include <fstream>
2933
#include <iterator>
3034
#include <stdexcept>
3135
#include <unordered_map>
3236

33-
#include <dirent.h>
34-
#include <sys/stat.h>
35-
#include <unistd.h>
36-
3737
using std::begin;
3838
using std::ios_base;
3939
using std::runtime_error;
@@ -198,11 +198,41 @@ inline bool is_fastq_score_line(size_t line_count) {
198198
return ((line_count & 3ul) == 3ul);
199199
}
200200

201+
static const double neg_ten_over_log_ten = -4.342944819032517501;
202+
203+
//// CONVERT _TO_ ERROR PROBABILITIES
204+
inline double phred_to_error_probability(const double r) {
205+
const double h = r / neg_ten_over_log_ten;
206+
return std::exp(h);
207+
}
208+
inline double solexa_to_error_probability(const double r) {
209+
const double s = r / neg_ten_over_log_ten;
210+
return std::exp(s) / (1.0 + std::exp(s));
211+
}
212+
213+
//// CONVERT _FROM_ QUALITY CHARACTERS (I.E. THE CHARACTERS IN FASTQ FILES)
214+
inline char quality_character_to_phred(const char c) { return char(c - 33); }
215+
inline char quality_character_to_solexa(const char c) { return char(c - 64); }
216+
217+
//// CONVERT _TO_ QUALITY CHARACTERS (I.E. THE CHARACTERS IN FASTQ FILES)
218+
inline char phred_to_quality_character(const double h) {
219+
return char(std::min(60.0, h) + 33);
220+
}
221+
inline char solexa_to_quality_character(const double s) {
222+
return char(std::min(40.0, s) + 64);
223+
}
224+
225+
//// CHECK FOR VALIDITY OF A FASTQ SCORE CHARACTER
226+
inline bool valid_phred_score(const char c) { return (c >= 33 && c <= 93); }
227+
inline bool valid_solexa_score(const char c) {
228+
// return (c >= 64 && c <= 104);
229+
return (c >= 59 && c <= 104); // to allow for old Solexa format
230+
}
231+
201232
void read_fastq_file(const char *filename, vector<string> &names,
202233
vector<string> &sequences,
203234
vector<vector<double>> &scores) {
204-
205-
static const size_t INPUT_BUFFER_SIZE = 1000000;
235+
static constexpr auto INPUT_BUFFER_SIZE = 1000000ul;
206236

207237
std::ifstream in(filename);
208238
if (!in)
@@ -274,13 +304,15 @@ void read_fastq_file(const char *filename, vector<string> &names,
274304
bool phred_scores = true, solexa_scores = true;
275305
for (size_t i = 0; i < scrs.size() && phred_scores && solexa_scores; ++i) {
276306
phred_scores =
277-
phred_scores && (find_if(begin(scrs[i]), end(scrs[i]), [](char c) {
278-
return !valid_phred_score(c);
279-
}) == end(scrs[i]));
307+
phred_scores && (std::find_if(std::cbegin(scrs[i]), std::cend(scrs[i]),
308+
[](const char c) {
309+
return !valid_phred_score(c);
310+
}) == std::cend(scrs[i]));
280311
solexa_scores =
281-
solexa_scores && (find_if(begin(scrs[i]), end(scrs[i]), [](char c) {
282-
return !valid_solexa_score(c);
283-
}) == end(scrs[i]));
312+
solexa_scores && (std::find_if(std::cbegin(scrs[i]), std::cend(scrs[i]),
313+
[](const char c) {
314+
return !valid_solexa_score(c);
315+
}) == std::cend(scrs[i]));
284316
}
285317

286318
if (!phred_scores && !solexa_scores)

0 commit comments

Comments
 (0)