Skip to content

Module: IO Alignments

Hannes Hauswedell edited this page Mar 6, 2017 · 7 revisions

Files

Draft

align_file_out.hpp:

#pragma once

#include <string>
#include <variant>

namespace seqan3
{

// ==================================================================
// align_file_out_traits
// ==================================================================

template <typename t>
concept bool align_file_out_traits_concept = requires (t v)
{
    t::stream_type;
    t::valid_formats;

    t::valid_compression_formats;
};

struct align_file_out_traits_default
{
#ifdef BOOST
    using stream_type = boost::io::filtering_ostream;
    using valid_formats = std::variant<align_file_out_format_sam, align_file_out_format_bam>;

    using _compressor_variant = std::variant<boost::io::gzip_compressor, boost::io::bzip2_compressor>;
    static constexpr std::vector<std::pair<std::string, _compressor_variant>> valid_compression_formats
    {
        { ".gz",  variant_alternative_t<0, _compressor_variant> },
        { ".bz2", variant_alternative_t<1, _compressor_variant> }
    };
#else
    using stream_type = std::ofstream;
    using valid_formats = std::variant<align_file_out_format_sam>;
    static constexpr std::vector<std::pair<std::string, void>> valid_compression_formats{};
#endif
};

// ==================================================================
// align_file_out
// ==================================================================

template <typename align_file_out_traits = align_file_out_traits_default>
    requires align_file_out_traits_concept<align_file_out_traits>
class align_file_out
{
public:
    using align_file_out_traits::stream_type;               // e.g. std::ostream concept
    using align_file_out_traits::valid_compression_formats; // = std::vector<std::pair<std::string, t>>
    using align_file_out_traits::valid_formats;             // = std::variant<gzip_compressor, align_file_out_format_bam>

    // constructor with arg
    align_file_out(std::string const & _file_name);

    // copy construction and assignment are deleted
    // implicitly because we don't want multiple access to file
    align_file_out(align_file_out const &) = delete;
    align_file_out & operator=(align_file_out const &) = delete;

    // move construction and assignment are defaulted
    align_file_out(align_file_out &&) = default;
    align_file_out & operator=(align_file_out &&) = default;

    void write_record();
    // only tag-dispatch once for multiple writes
    void write_records();

protected:
    ~align_file_out() = default;

private:
    /* file format */
    std::string file_name;
    stream_type stream;
    valid_formats format;

    /* private functions */
    void select_decompression(std::string const & compress_ext);
    template <size_t index>
    void assign_format(std::string const & ext);

    /* context? */
    query_seqs_type * query_seqs;
    query_ids_type * query_ids;

    std::string program_name;
    std::string program_version;
    bool write_sequence;
};

// ------------------------------------------------------------------
// public API
// ------------------------------------------------------------------

align_file_out::align_file_out(std::string const & _file_name) :
        file_name(_file_name)
{
    // open stream
#ifdef BOOST
    std::string compress_ext{get_compressed_file_extension(file_name, valid_compression_formats)};

    if (!compress_ext.empty())
    {
        select_decompression(stream, compress_ext);
        file_name = file_name.substr(file_name.size() - compress_ext.size(), file_name.size());
    }

    stream.push(file_sink(_file_name));
#else
    stream.open(_file_name, std::ios::binary);
#endif

    // initialize format handler
    std::string ext{get_file_extension(file_name)};
    select_format<0>(format, ext);
}

inline void align_file_out::write_record()
{
    assert(!format.valueless_by_exception);
    std::visit([] (align_file_out_format_concept & f) { f->write_record(); }, format);
}

inline void align_file_out::write_records()
{
    assert(!format.valueless_by_exception);
    std::visit([] (align_file_out_format_concept & f) { f->write_records(); }, format);
}

// ------------------------------------------------------------------
// private functions
// ------------------------------------------------------------------

inline void
align_file_out::select_decompression(std::string const & compress_ext)
{
    for (auto const & pair : valid_compression_formats)
    {
        if (compress_ext == std::get<0>(pair))
        {
            std::visit([&stream] (auto & compressor) { stream.push(compressor); }, std::get<1>(pair));
            break;
        }
    }
}

template <size_t index>
inline void align_file_out::select_format(std::string const & ext)
{
    if (index == variant_size_v<valid_formats>)
        throw std::runtime_error("No valid format found for this extension");
    else if (variant_alternative_t<index, valid_formats>::file_extensions().contains(ext))
        format = variant_alternative_t<index, valid_formats>{};
    else
        select_format<index+1>(format, ext);
}

} // namespace seqan

align_file_out_format.hpp:

#pragma once

namespace seqan3
{

template <typename t>
concept bool align_file_out_format_concept = requires (t v)
{
    t::file_extensions();

    v.write_record();
    v.write_records();
};

} // namespace seqan3

align_file_out_format_sam.hpp:

#pragma once

#include <vector>
#include <string>

namespace seqan3
{

class align_file_out_format_sam
{
public:
    static constexpr std::vector<std::string> file_extensions
    {
        {"sam"}
    };

    void write_record();
    void write_records();
};

static_assert(align_file_out_format_concept<align_file_out_format_sam>,
              "align_file_out_format_sam does not satisfy align_file_out_format_concept");

/** implementations **/


} // namespace seqan3

Clone this wiki locally