Skip to content

Commit

Permalink
Merge pull request #119 from MitraDarja/increase_cov
Browse files Browse the repository at this point in the history
Increase cov
  • Loading branch information
MitraDarja authored Sep 24, 2021
2 parents ded419a + a8f7a28 commit d8adf45
Show file tree
Hide file tree
Showing 10 changed files with 560 additions and 38 deletions.
5 changes: 0 additions & 5 deletions src/estimate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
#include <seqan3/alphabet/container/concatenated_sequences.hpp>
#include <seqan3/alphabet/nucleotide/dna4.hpp>
#include <seqan3/core/concept/cereal.hpp>
#include <seqan3/core/debug_stream.hpp>
#include <seqan3/io/sequence_file/all.hpp>

#include "estimate.h"
Expand Down Expand Up @@ -73,10 +72,6 @@ void check_ibf(min_arguments const & args, IBFType const & ibf, std::vector<uint
}
else
{
// Prevent division by 0
if (counter[j] == 0)
counter[j]++;

// Actually calculate estimation, in the else case k stands for the prev_expression
if constexpr (multiple_expressions)
estimations_i[j] = std::max(expressions[k][j] * 1.0, expressions[k+1][j] - ((abs(minimiser_pos - prev_counts[j])/(counter[j] * 1.0)) * (expressions[k+1][j]-expressions[k][j])));
Expand Down
20 changes: 8 additions & 12 deletions src/ibf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
#include <seqan3/alphabet/container/concatenated_sequences.hpp>
#include <seqan3/alphabet/nucleotide/dna4.hpp>
#include <seqan3/core/concept/cereal.hpp>
#include <seqan3/core/debug_stream.hpp>
#include <seqan3/io/sequence_file/all.hpp>
#include <seqan3/io/stream/detail/fast_istreambuf_iterator.hpp>
#include <seqan3/utility/container/dynamic_bitset.hpp>
Expand Down Expand Up @@ -317,15 +316,15 @@ void check_fpr(uint8_t const number_expression_thresholds, std::vector<double> &
{
throw std::invalid_argument{"Error. Please give a false positive rate for the IBFs."};
}
// If only one ibf size is given, set it for all levels.
// If only one ibf size is given, set it for all thresholds.
if (fprs.size() == 1)
{
fprs.assign(number_expression_thresholds, fprs[0]);
}
else if (fprs.size() != number_expression_thresholds)
{
throw std::invalid_argument{"Error. Length of false positive rates for IBFs is not equal to length of expression "
"levels."};
"thresholds."};
}
}

Expand Down Expand Up @@ -414,8 +413,6 @@ void get_filsize_per_expression_level(std::filesystem::path filename, uint8_t co
auto p = std::upper_bound(expression_thresholds.begin(), expression_thresholds.end(), minimiser_count);
if(p != expression_thresholds.begin())
sizes[(p-expression_thresholds.begin())-1]++;
else if (minimiser_count>=expression_thresholds[number_expression_thresholds-1])
sizes[number_expression_thresholds-1]++;
}
}

Expand Down Expand Up @@ -539,10 +536,11 @@ void ibf_helper(std::vector<std::filesystem::path> const & minimiser_files,

if (size < 1)
{
seqan3::debug_stream << "[Error]. The choosen expression threshold is not well picked. If you use the automatic "
<< "expression threshold determination, please decrease the number of levels. If you use "
<< "your own expression thresholds, decrease the thresholds from level "
<< ibf_args.expression_thresholds[j] << " on.\n";
throw std::invalid_argument{std::string("[Error]. The choosen expression threshold is not well picked. If you use the automatic ") +
std::string("expression threshold determination, please decrease the number of levels. If you use ") +
std::string("your own expression thresholds, decrease the thresholds from level ") +
std::to_string(ibf_args.expression_thresholds[j]) +
std::string(" on.\n")};
}
// m = -hn/ln(1-p^(1/h))
size = static_cast<uint64_t>((-1.0*num_hash*((1.0*size)/num_files))/(std::log(1.0-std::pow(fprs[j], 1.0/num_hash))));
Expand Down Expand Up @@ -801,9 +799,7 @@ void minimiser(std::vector<std::filesystem::path> const & sequence_files, min_ar

omp_set_num_threads(args.threads);

size_t const chunk_size = std::clamp<size_t>(std::bit_ceil(minimiser_args.samples.size() / args.threads),
1u,
64u);
size_t const chunk_size = std::clamp<size_t>(std::bit_ceil(minimiser_args.samples.size() / args.threads), 1u, 64u);

// Add minimisers to ibf
#pragma omp parallel for schedule(dynamic, chunk_size)
Expand Down
23 changes: 3 additions & 20 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,15 +103,8 @@ int run_needle_count(seqan3::argument_parser & parser)
seqan3::debug_stream << "Error. Incorrect command line input for count. " << ext.what() << "\n";
return -1;
}
try
{
count(args, sequence_files, genome_file, exclude_file, paired);
}
catch (const std::invalid_argument & e)
{
std::cerr << e.what() << std::endl;
return -1;
}

count(args, sequence_files, genome_file, exclude_file, paired);

return 0;
}
Expand Down Expand Up @@ -147,15 +140,7 @@ int run_needle_estimate(seqan3::argument_parser & parser)
return -1;
}

try
{
call_estimate(args, estimate_args);
}
catch (const std::invalid_argument & e)
{
std::cerr << e.what() << std::endl;
return -1;
}
call_estimate(args, estimate_args);

return 0;
}
Expand Down Expand Up @@ -313,6 +298,4 @@ int main(int argc, char const ** argv)
run_needle_ibf_min(sub_parser);
else if (sub_parser.info.app_name == std::string_view{"needle-minimiser"})
run_needle_minimiser(sub_parser);
else
throw std::logic_error{"The used sub parser is not known: " + sub_parser.info.app_name};
}
22 changes: 22 additions & 0 deletions test/api/count_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,25 @@ TEST(count, small_example_paired)
}
std::filesystem::remove(tmp_dir/"Count_Test_mini_example.count.out");
}

TEST(count, small_example_exclude)
{
estimate_ibf_arguments args{};
initialization_args(args);

count(args, {std::string(DATA_INPUT_DIR) + "mini_example.fasta"}, std::string(DATA_INPUT_DIR) + "mini_gen.fasta",
std::string(DATA_INPUT_DIR) + "mini_gen2.fasta", false);

std::ifstream output_file(tmp_dir/"mini_example.count.out");
std::string line;
std::string expected{"gen1\t3"};
if (output_file.is_open())
{
while ( std::getline (output_file,line) )
{
EXPECT_EQ(expected,line);
}
output_file.close();
}
std::filesystem::remove(tmp_dir/"Count_Test_mini_example.count.out");
}
100 changes: 99 additions & 1 deletion test/api/estimate_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ TEST(estimate, small_example)
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_2");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_4");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_Data");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_FPRs.fprs");
std::filesystem::remove(tmp_dir/"expression.out");
}

Expand Down Expand Up @@ -87,9 +88,11 @@ TEST(estimate, small_example_uncompressed)
}
output_file.close();
}
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_1");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_2");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_4");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_Data");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_FPRs.fprs");
std::filesystem::remove(tmp_dir/"expression.out");
}

Expand Down Expand Up @@ -125,9 +128,53 @@ TEST(estimate, small_example_gene_not_found)
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_2");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_4");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_Data");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_FPRs.fprs");
std::filesystem::remove(tmp_dir/"expression.out");
}

TEST(estimate, small_example_different_expressions_per_level)
{
std::filesystem::path tmp_dir = std::filesystem::temp_directory_path(); // get the temp directory
estimate_ibf_arguments ibf_args{};
minimiser_arguments minimiser_args{};
estimate_arguments estimate_args{};
initialization_args(ibf_args);
ibf_args.path_out = tmp_dir/"Estimate_Test0_";
ibf_args.number_expression_thresholds = 2;
std::vector<double> fpr = {0.05};
std::vector<std::filesystem::path> sequence_files = {std::string(DATA_INPUT_DIR) + "mini_example.fasta"};

minimiser(sequence_files, ibf_args, minimiser_args);
std::vector<std::filesystem::path> minimiser_files{tmp_dir/"Estimate_Test0_mini_example.minimiser"};
ibf_args.expression_thresholds= {};
ibf(minimiser_files, ibf_args, fpr);

ibf_args.expression_thresholds = {0, 1, 2};
estimate_args.search_file = std::string(DATA_INPUT_DIR) + "mini_gen.fasta";
estimate_args.path_in = ibf_args.path_out;
ibf_args.path_out = tmp_dir/"Test0_expression.out";
call_estimate(ibf_args, estimate_args);

std::ifstream output_file(tmp_dir/"Test0_expression.out");
std::string line;
std::string expected{"gen1\t3\t"};
if (output_file.is_open())
{
while ( std::getline (output_file,line) )
{
EXPECT_EQ(expected,line);
}
output_file.close();
}
std::filesystem::remove(tmp_dir/"Estimate_Test0_IBF_Level_0");
std::filesystem::remove(tmp_dir/"Estimate_Test0_IBF_Level_1");
std::filesystem::remove(tmp_dir/"Estimate_Test0_IBF_Levels.levels");
std::filesystem::remove(tmp_dir/"Estimate_Test0_IBF_FPRs.fprs");
std::filesystem::remove(tmp_dir/"Estimate_Test0_IBF_Data");
std::filesystem::remove(tmp_dir/"Test0_expression.out");
std::filesystem::remove(tmp_dir/"Estimate_Test0_mini_example.minimiser");
}

TEST(estimate, small_example_different_expressions_per_level_normalization_1)
{
std::filesystem::path tmp_dir = std::filesystem::temp_directory_path(); // get the temp directory
Expand Down Expand Up @@ -166,10 +213,56 @@ TEST(estimate, small_example_different_expressions_per_level_normalization_1)
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_Level_0");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_Level_1");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_Levels.levels");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_FPRs.fprs");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_Data");
std::filesystem::remove(tmp_dir/"expression.out");
}

TEST(estimate, small_example_different_expressions_per_level_normalization_1_uncompressed)
{
std::filesystem::path tmp_dir = std::filesystem::temp_directory_path(); // get the temp directory
estimate_ibf_arguments ibf_args{};
minimiser_arguments minimiser_args{};
estimate_arguments estimate_args{};
estimate_args.normalization_method = 1;
initialization_args(ibf_args);
ibf_args.path_out = tmp_dir/"Estimate_Test2_";
ibf_args.number_expression_thresholds = 2;
ibf_args.compressed = false;
std::vector<double> fpr = {0.05};
std::vector<std::filesystem::path> sequence_files = {std::string(DATA_INPUT_DIR) + "mini_example.fasta"};

minimiser(sequence_files, ibf_args, minimiser_args);
std::vector<std::filesystem::path> minimiser_files{tmp_dir/"Estimate_Test2_mini_example.minimiser"};
ibf_args.expression_thresholds= {};
ibf(minimiser_files, ibf_args, fpr);

ibf_args.expression_thresholds = {0, 1, 2};
estimate_args.search_file = std::string(DATA_INPUT_DIR) + "mini_gen.fasta";
estimate_args.path_in = ibf_args.path_out;
ibf_args.path_out = tmp_dir/"Test2_expression.out";
call_estimate(ibf_args, estimate_args);

std::ifstream output_file(tmp_dir/"Test2_expression.out");
std::string line;
std::string expected{"gen1\t1\t"};
if (output_file.is_open())
{
while ( std::getline (output_file,line) )
{
EXPECT_EQ(expected,line);
}
output_file.close();
}
std::filesystem::remove(tmp_dir/"Estimate_Test2_IBF_Level_0");
std::filesystem::remove(tmp_dir/"Estimate_Test2_IBF_Level_1");
std::filesystem::remove(tmp_dir/"Estimate_Test2_IBF_Levels.levels");
std::filesystem::remove(tmp_dir/"Estimate_Test2_IBF_FPRs.fprs");
std::filesystem::remove(tmp_dir/"Estimate_Test2_IBF_Data");
std::filesystem::remove(tmp_dir/"Test2_expression.out");
std::filesystem::remove(tmp_dir/"Estimate_Test2_mini_example.minimiser");
}

TEST(estimate, example)
{
std::filesystem::path tmp_dir = std::filesystem::temp_directory_path(); // get the temp directory
Expand Down Expand Up @@ -203,8 +296,9 @@ TEST(estimate, example)
output_file.close();
}
std::filesystem::remove(tmp_dir/"Estimate_Test_Single_IBF_4");
std::filesystem::remove(tmp_dir/"Estimate_Test_Single__IBF_32");
std::filesystem::remove(tmp_dir/"Estimate_Test_Single_IBF_32");
std::filesystem::remove(tmp_dir/"Estimate_Test_Single_IBF_Data");
std::filesystem::remove(tmp_dir/"Estimate_Test_Single_IBF_FPRs.fprs");
std::filesystem::remove(tmp_dir/"Single_expression.out");
}

Expand Down Expand Up @@ -244,6 +338,7 @@ TEST(estimate, example_multiple_threads)
std::filesystem::remove(tmp_dir/"Estimate_Test_Multiple_IBF_32");
std::filesystem::remove(tmp_dir/"Estimate_Test_Multiple_IBF_4");
std::filesystem::remove(tmp_dir/"Estimate_Test_Multiple_IBF_Data");
std::filesystem::remove(tmp_dir/"Estimate_Test_Multiple_IBF_FPRs.fprs");
std::filesystem::remove(tmp_dir/"Multiple_expression.out");
}

Expand Down Expand Up @@ -288,6 +383,7 @@ TEST(estimate, example_different_expressions_per_level)
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_Level_1");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_Level_2");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_Levels.levels");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_FPRs.fprs");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_Data");
std::filesystem::remove(tmp_dir/"expression.out");
}
Expand Down Expand Up @@ -336,7 +432,9 @@ TEST(estimate, example_different_expressions_per_level_multiple_threads)
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_Level_0");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_Level_1");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_Level_2");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_Level_3");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_Levels.levels");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_FPRs.fprs");
std::filesystem::remove(tmp_dir/"Estimate_Test_IBF_Data");
std::filesystem::remove(tmp_dir/"expression.out");
}
Loading

0 comments on commit d8adf45

Please sign in to comment.