diff --git a/Cargo.toml b/Cargo.toml index 2e95e3f26..acd6f945f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,7 @@ lazy_static = "0.2" bitflags = "0.9" serde = { version = "^1", optional = true } regex = "0.2" +linear-map = "1.2.0" [features] default = [] diff --git a/src/bam/record_serde.rs b/src/bam/record_serde.rs index 346b7fa3c..d3a0b0757 100644 --- a/src/bam/record_serde.rs +++ b/src/bam/record_serde.rs @@ -289,9 +289,9 @@ impl<'de> Deserialize<'de> for Record { #[cfg(test)] mod tests { - use bam::record::Record; use bam::Read; use bam::Reader; + use bam::record::Record; use std::path::Path; diff --git a/src/bcf/header.rs b/src/bcf/header.rs index b0e675b1f..fa4ad8080 100644 --- a/src/bcf/header.rs +++ b/src/bcf/header.rs @@ -9,6 +9,8 @@ use std::str; use htslib; +use linear_map::LinearMap; + pub type SampleSubset = Vec; custom_derive! { @@ -191,6 +193,38 @@ impl Drop for Header { } } +/// A header record. +#[derive(Debug)] +pub enum HeaderRecord { + /// A `FILTER` header record. + Filter { + key: String, + values: LinearMap, + }, + /// An `INFO` header record. + Info { + key: String, + values: LinearMap, + }, + /// A `FORMAT` header record. + Format { + key: String, + values: LinearMap, + }, + /// A `contig` header record. + Contig { + key: String, + values: LinearMap, + }, + /// A structured header record. + Structured { + key: String, + values: LinearMap, + }, + /// A generic, unstructured header record. + Generic { key: String, value: String }, +} + #[derive(Debug)] pub struct HeaderView { pub inner: *mut htslib::bcf_hdr_t, @@ -340,6 +374,64 @@ impl HeaderView { }; key.to_bytes().to_vec() } + + /// Return structured `HeaderRecord`s. + pub fn header_records(&self) -> Vec { + fn parse_kv(rec: &htslib::bcf_hrec_t) -> LinearMap { + let mut result: LinearMap = LinearMap::new(); + for i in 0_i32..(rec.nkeys) { + let key = unsafe { + ffi::CStr::from_ptr(*rec.keys.offset(i as isize)) + .to_str() + .unwrap() + .to_string() + }; + let value = unsafe { + ffi::CStr::from_ptr(*rec.vals.offset(i as isize)) + .to_str() + .unwrap() + .to_string() + }; + result.insert(key, value); + } + result + } + + let mut result: Vec = Vec::new(); + for i in 1_i32..unsafe { (*self.inner).nhrec } { + let rec = unsafe { &(**(*self.inner).hrec.offset(i as isize)) }; + let key = unsafe { ffi::CStr::from_ptr(rec.key).to_str().unwrap().to_string() }; + let record = match rec.type_ { + 0 => HeaderRecord::Filter { + key, + values: parse_kv(rec), + }, + 1 => HeaderRecord::Info { + key, + values: parse_kv(rec), + }, + 2 => HeaderRecord::Format { + key, + values: parse_kv(rec), + }, + 3 => HeaderRecord::Contig { + key, + values: parse_kv(rec), + }, + 4 => HeaderRecord::Structured { + key, + values: parse_kv(rec), + }, + 5 => HeaderRecord::Generic { + key, + value: unsafe { ffi::CStr::from_ptr(rec.value).to_str().unwrap().to_string() }, + }, + _ => panic!("Unknown type: {}", rec.type_), + }; + result.push(record); + } + result + } } impl Clone for HeaderView { diff --git a/src/bcf/mod.rs b/src/bcf/mod.rs index 564138393..69fb3403f 100644 --- a/src/bcf/mod.rs +++ b/src/bcf/mod.rs @@ -24,8 +24,7 @@ pub mod record; use bcf::header::{HeaderView, SampleSubset}; use htslib; -pub use bcf::buffer::RecordBuffer; -pub use bcf::header::Header; +pub use bcf::header::{Header, HeaderRecord}; pub use bcf::record::Record; /// Redefinition of corresponding `#define` in `vcf.h.`. @@ -839,6 +838,28 @@ mod tests { assert!(header.sample_to_id(b"three").is_err()); } + #[test] + fn test_header_records() { + let vcf = Reader::from_path(&"test/test_string.vcf") + .ok() + .expect("Error opening file."); + let records = vcf.header().header_records(); + assert_eq!(records.len(), 9); + + match &records[0] { + &HeaderRecord::Filter { + ref key, + ref values, + } => { + assert_eq!(key, "FILTER"); + assert_eq!(values["ID"], "PASS"); + } + _ => { + assert!(false); + } + } + } + // Helper function reading full file into string. fn read_all>(path: P) -> String { let mut file = File::open(path.as_ref()) diff --git a/src/lib.rs b/src/lib.rs index c0a11bb91..3e63a7241 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -83,6 +83,8 @@ extern crate quick_error; extern crate regex; extern crate url; +extern crate linear_map; + #[cfg(feature = "serde")] extern crate serde; diff --git a/src/sam/mod.rs b/src/sam/mod.rs index 0484e083f..5568c581c 100644 --- a/src/sam/mod.rs +++ b/src/sam/mod.rs @@ -10,9 +10,9 @@ use std::path::Path; use htslib; +use bam::HeaderView; use bam::header; use bam::record; -use bam::HeaderView; /// SAM writer. #[derive(Debug)] @@ -111,10 +111,10 @@ quick_error! { #[cfg(test)] mod tests { - use bam::header; - use bam::record; use bam::Read; use bam::Reader; + use bam::header; + use bam::record; use sam::Writer; #[test]